aboutsummaryrefslogtreecommitdiff
path: root/tools/charcrawler/crawler/crawler.go
diff options
context:
space:
mode:
Diffstat (limited to 'tools/charcrawler/crawler/crawler.go')
-rw-r--r--tools/charcrawler/crawler/crawler.go67
1 files changed, 67 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/crawler.go b/tools/charcrawler/crawler/crawler.go
new file mode 100644
index 0000000..337bc76
--- /dev/null
+++ b/tools/charcrawler/crawler/crawler.go
@@ -0,0 +1,67 @@
1package crawler
2
3import (
4 "encoding/json"
5 "errors"
6 "io/ioutil"
7 "log"
8 "os"
9 "strconv"
10 "strings"
11 "time"
12)
13
14type CharacterData map[string]interface{}
15
16type Crawler interface {
17 Name() string
18 Crawl(id int) (CharacterData, error)
19}
20
21var Instances []Crawler
22
23var (
24 CrawlError = errors.New("Error while crawling")
25 CharacterNotFound = errors.New("Character not found")
26)
27
28func Start(c Crawler) {
29 name := c.Name()
30 os.MkdirAll("data/"+name, 0755)
31 log.Printf("Starting Crawler %s...", name)
32 ticker := time.NewTicker(time.Second * 1)
33 current := 1
34 if save, err := ioutil.ReadFile(name + ".txt"); err == nil {
35 s := strings.TrimSpace(string(save))
36 if i, err := strconv.Atoi(s); err == nil {
37 current = i
38 }
39 }
40 faultCounter := 0
41 for range ticker.C {
42 if faultCounter > 100 {
43 faultCounter = 0
44 log.Printf("[%s] Exiting after 100 fails", name)
45 break
46 }
47 log.Printf("[%s] Crawling %d", name, current)
48 char, err := c.Crawl(current)
49 switch err {
50 case CharacterNotFound:
51 log.Printf("[%s] Char %d not found!", name, current)
52 faultCounter++
53 case CrawlError:
54 panic(err)
55 default:
56 cData, _ := json.Marshal(char)
57 ioutil.WriteFile("data/"+name+"/"+strconv.Itoa(current)+".json", cData, 0755)
58 }
59
60 current++
61 ioutil.WriteFile(name+".txt", []byte(strconv.Itoa(current)), os.ModePerm)
62 }
63}
64
65func init() {
66 Instances = append(Instances, new(ACDCrawler), new(ACDAnimeCrawler))
67}