diff options
author | Jan C <jan@ruken.pw> | 2016-03-29 16:08:41 (UTC) |
---|---|---|
committer | Jan C <jan@ruken.pw> | 2016-03-29 16:08:41 (UTC) |
commit | b601ea312d2c08560fcb70312536e6ba61d09bf4 (patch) | |
tree | 6490675fc6ee77ceacf1b760eea1d5380cbb0f7c /tools/charcrawler/crawler/crawler.go | |
parent | e0e59e9ca817f1e3c2091cf1b8ac370419fec8aa (diff) | |
parent | ae4f7f89849c4e94b388aabdb28d8f9f110df3c2 (diff) |
Merge branch 'master' of ssh://projekte.fagott.pw/grilist
Diffstat (limited to 'tools/charcrawler/crawler/crawler.go')
-rw-r--r-- | tools/charcrawler/crawler/crawler.go | 70 |
1 files changed, 70 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/crawler.go b/tools/charcrawler/crawler/crawler.go new file mode 100644 index 0000000..3d27ea2 --- /dev/null +++ b/tools/charcrawler/crawler/crawler.go | |||
@@ -0,0 +1,70 @@ | |||
1 | package crawler | ||
2 | |||
3 | import ( | ||
4 | "encoding/json" | ||
5 | "errors" | ||
6 | "io/ioutil" | ||
7 | "log" | ||
8 | "os" | ||
9 | "strconv" | ||
10 | "strings" | ||
11 | "time" | ||
12 | ) | ||
13 | |||
14 | type CharacterData map[string]interface{} | ||
15 | |||
16 | type Crawler interface { | ||
17 | Name() string | ||
18 | Crawl(id int) (CharacterData, error) | ||
19 | } | ||
20 | |||
21 | var Instances []Crawler | ||
22 | |||
23 | var ( | ||
24 | CrawlError = errors.New("Error while crawling") | ||
25 | CharacterNotFound = errors.New("Character not found") | ||
26 | Banned = errors.New("Crawler banned from source") | ||
27 | ) | ||
28 | |||
29 | func Start(c Crawler) { | ||
30 | name := c.Name() | ||
31 | os.MkdirAll("data/"+name, 0755) | ||
32 | log.Printf("Starting Crawler %s...", name) | ||
33 | ticker := time.NewTicker(time.Second * 1) | ||
34 | current := 1 | ||
35 | if save, err := ioutil.ReadFile(name + ".txt"); err == nil { | ||
36 | s := strings.TrimSpace(string(save)) | ||
37 | if i, err := strconv.Atoi(s); err == nil { | ||
38 | current = i | ||
39 | } | ||
40 | } | ||
41 | faultCounter := 0 | ||
42 | for range ticker.C { | ||
43 | if faultCounter > 100 { | ||
44 | faultCounter = 0 | ||
45 | log.Printf("[%s] Exiting after 100 fails", name) | ||
46 | break | ||
47 | } | ||
48 | log.Printf("[%s] Crawling %d", name, current) | ||
49 | char, err := c.Crawl(current) | ||
50 | switch err { | ||
51 | case CharacterNotFound: | ||
52 | log.Printf("[%s] Char %d not found!", name, current) | ||
53 | faultCounter++ | ||
54 | case CrawlError: | ||
55 | panic(err) | ||
56 | case Banned: | ||
57 | panic(err) | ||
58 | default: | ||
59 | cData, _ := json.Marshal(char) | ||
60 | ioutil.WriteFile("data/"+name+"/"+strconv.Itoa(current)+".json", cData, 0755) | ||
61 | } | ||
62 | |||
63 | current++ | ||
64 | ioutil.WriteFile(name+".txt", []byte(strconv.Itoa(current)), os.ModePerm) | ||
65 | } | ||
66 | } | ||
67 | |||
68 | func init() { | ||
69 | Instances = append(Instances, new(ACDCrawler), new(ACDAnimeCrawler)) | ||
70 | } | ||