1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
|
package crawler
import (
"encoding/json"
"errors"
"io/ioutil"
"log"
"os"
"strconv"
"strings"
"time"
)
type CharacterData map[string]interface{}
type Crawler interface {
Name() string
Crawl(id int) (CharacterData, error)
}
var Instances []Crawler
var (
CrawlError = errors.New("Error while crawling")
CharacterNotFound = errors.New("Character not found")
Banned = errors.New("Crawler banned from source")
)
func Start(c Crawler) {
name := c.Name()
os.MkdirAll("data/"+name, 0755)
log.Printf("Starting Crawler %s...", name)
ticker := time.NewTicker(time.Second * 1)
current := 1
if save, err := ioutil.ReadFile(name + ".txt"); err == nil {
s := strings.TrimSpace(string(save))
if i, err := strconv.Atoi(s); err == nil {
current = i
}
}
faultCounter := 0
for range ticker.C {
if faultCounter > 100 {
faultCounter = 0
log.Printf("[%s] Exiting after 100 fails", name)
break
}
log.Printf("[%s] Crawling %d", name, current)
char, err := c.Crawl(current)
switch err {
case CharacterNotFound:
log.Printf("[%s] Char %d not found!", name, current)
faultCounter++
case CrawlError:
panic(err)
case Banned:
panic(err)
default:
cData, _ := json.Marshal(char)
ioutil.WriteFile("data/"+name+"/"+strconv.Itoa(current)+".json", cData, 0755)
}
current++
ioutil.WriteFile(name+".txt", []byte(strconv.Itoa(current)), os.ModePerm)
}
}
func init() {
Instances = append(Instances, new(ACDCrawler), new(ACDAnimeCrawler))
}
|