aboutsummaryrefslogtreecommitdiff
path: root/tools/charcrawler/crawler/crawler.go
diff options
context:
space:
mode:
Diffstat (limited to 'tools/charcrawler/crawler/crawler.go')
-rw-r--r--tools/charcrawler/crawler/crawler.go70
1 files changed, 70 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/crawler.go b/tools/charcrawler/crawler/crawler.go
new file mode 100644
index 0000000..3d27ea2
--- /dev/null
+++ b/tools/charcrawler/crawler/crawler.go
@@ -0,0 +1,70 @@
1package crawler
2
3import (
4 "encoding/json"
5 "errors"
6 "io/ioutil"
7 "log"
8 "os"
9 "strconv"
10 "strings"
11 "time"
12)
13
14type CharacterData map[string]interface{}
15
16type Crawler interface {
17 Name() string
18 Crawl(id int) (CharacterData, error)
19}
20
21var Instances []Crawler
22
23var (
24 CrawlError = errors.New("Error while crawling")
25 CharacterNotFound = errors.New("Character not found")
26 Banned = errors.New("Crawler banned from source")
27)
28
29func Start(c Crawler) {
30 name := c.Name()
31 os.MkdirAll("data/"+name, 0755)
32 log.Printf("Starting Crawler %s...", name)
33 ticker := time.NewTicker(time.Second * 1)
34 current := 1
35 if save, err := ioutil.ReadFile(name + ".txt"); err == nil {
36 s := strings.TrimSpace(string(save))
37 if i, err := strconv.Atoi(s); err == nil {
38 current = i
39 }
40 }
41 faultCounter := 0
42 for range ticker.C {
43 if faultCounter > 100 {
44 faultCounter = 0
45 log.Printf("[%s] Exiting after 100 fails", name)
46 break
47 }
48 log.Printf("[%s] Crawling %d", name, current)
49 char, err := c.Crawl(current)
50 switch err {
51 case CharacterNotFound:
52 log.Printf("[%s] Char %d not found!", name, current)
53 faultCounter++
54 case CrawlError:
55 panic(err)
56 case Banned:
57 panic(err)
58 default:
59 cData, _ := json.Marshal(char)
60 ioutil.WriteFile("data/"+name+"/"+strconv.Itoa(current)+".json", cData, 0755)
61 }
62
63 current++
64 ioutil.WriteFile(name+".txt", []byte(strconv.Itoa(current)), os.ModePerm)
65 }
66}
67
68func init() {
69 Instances = append(Instances, new(ACDCrawler), new(ACDAnimeCrawler))
70}