aboutsummaryrefslogtreecommitdiff
path: root/tools/charcrawler/crawler/crawler.go
blob: 3d27ea2d618f88b28bdfc4389f917568c88a5f97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
package crawler

import (
	"encoding/json"
	"errors"
	"io/ioutil"
	"log"
	"os"
	"strconv"
	"strings"
	"time"
)

type CharacterData map[string]interface{}

type Crawler interface {
	Name() string
	Crawl(id int) (CharacterData, error)
}

var Instances []Crawler

var (
	CrawlError        = errors.New("Error while crawling")
	CharacterNotFound = errors.New("Character not found")
	Banned            = errors.New("Crawler banned from source")
)

func Start(c Crawler) {
	name := c.Name()
	os.MkdirAll("data/"+name, 0755)
	log.Printf("Starting Crawler %s...", name)
	ticker := time.NewTicker(time.Second * 1)
	current := 1
	if save, err := ioutil.ReadFile(name + ".txt"); err == nil {
		s := strings.TrimSpace(string(save))
		if i, err := strconv.Atoi(s); err == nil {
			current = i
		}
	}
	faultCounter := 0
	for range ticker.C {
		if faultCounter > 100 {
			faultCounter = 0
			log.Printf("[%s] Exiting after 100 fails", name)
			break
		}
		log.Printf("[%s] Crawling %d", name, current)
		char, err := c.Crawl(current)
		switch err {
		case CharacterNotFound:
			log.Printf("[%s] Char %d not found!", name, current)
			faultCounter++
		case CrawlError:
			panic(err)
		case Banned:
			panic(err)
		default:
			cData, _ := json.Marshal(char)
			ioutil.WriteFile("data/"+name+"/"+strconv.Itoa(current)+".json", cData, 0755)
		}

		current++
		ioutil.WriteFile(name+".txt", []byte(strconv.Itoa(current)), os.ModePerm)
	}
}

func init() {
	Instances = append(Instances, new(ACDCrawler), new(ACDAnimeCrawler))
}