aboutsummaryrefslogtreecommitdiff
path: root/tools/charcrawler/crawler/acd.go
diff options
context:
space:
mode:
Diffstat (limited to 'tools/charcrawler/crawler/acd.go')
-rw-r--r--tools/charcrawler/crawler/acd.go84
1 files changed, 84 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/acd.go b/tools/charcrawler/crawler/acd.go
new file mode 100644
index 0000000..9a37b95
--- /dev/null
+++ b/tools/charcrawler/crawler/acd.go
@@ -0,0 +1,84 @@
1package crawler
2
3import (
4 "log"
5 "strconv"
6 "strings"
7
8 "github.com/PuerkitoBio/goquery"
9)
10
11type ACDCrawler struct{}
12
13func (a ACDCrawler) Name() string {
14 return "ACD"
15}
16
17func (a ACDCrawler) Crawl(id int) (CharacterData, error) {
18 c := make(CharacterData)
19 doc, err := goquery.NewDocument("http://www.animecharactersdatabase.com/character.php?id=" + strconv.Itoa(id))
20 if err != nil {
21 log.Println(err)
22 return nil, CrawlError
23 }
24 text := doc.Text()
25 if strings.Contains(text, "bad character : try") {
26 return nil, CharacterNotFound
27 }
28 if strings.Contains(text, "Your IP has been blocked") {
29 return nil, Banned
30 }
31 dataTr := doc.Find("#besttable").Next().ChildrenFiltered("tbody").ChildrenFiltered("tr")
32 leftSide := dataTr.Children().Eq(0)
33 rightSide := dataTr.Children().Eq(1)
34 imageCols := rightSide.ChildrenFiltered("table").First().Find("tbody > tr > td")
35 val, _ := imageCols.Eq(0).Find("img").Attr("src")
36 c["__thumb"] = val
37 val, _ = imageCols.Eq(1).Find("a").Attr("href")
38 c["__img"] = val
39 leftSide.ChildrenFiltered("table").Eq(1).Find("tr").Each(func(i int, s *goquery.Selection) {
40 c[s.Find("th").Text()] = s.Find("td").Text()
41 })
42 var key string
43 leftSide.
44 ChildrenFiltered("table").
45 Eq(0).
46 Find("td").
47 Eq(1).
48 Find("dl").
49 Children().
50 Each(func(i int, s *goquery.Selection) {
51 switch goquery.NodeName(s) {
52 case "dt":
53 key = s.Text()
54 case "dd":
55 c[key] = s.Text()
56 }
57 })
58 tags := make([]string, 0)
59 leftSide.ChildrenFiltered("div").Eq(0).Find("a").Each(func(i int, s *goquery.Selection) {
60 tags = append(tags, s.Text())
61 })
62 c["__tags"] = tags
63 vas := make([]string, 0)
64 leftSide.ChildrenFiltered("div").Eq(1).Find("a").Each(func(i int, s *goquery.Selection) {
65 vas = append(vas, s.Text())
66 })
67 c["__vas"] = vas
68 leftSide.ChildrenFiltered("dl").Children().Each(func(i int, s *goquery.Selection) {
69 switch goquery.NodeName(s) {
70 case "dt":
71 key = s.Text()
72 case "dd":
73 c[key] = s.Text()
74 }
75 })
76 apps := make([]int, 0)
77 rightSide.Find(".tile3top").Each(func(i int, s *goquery.Selection) {
78 val, _ = s.Find("a").Attr("href")
79 id, _ := strconv.Atoi(strings.Split(val, "=")[1])
80 apps = append(apps, id)
81 })
82 c["__appearances"] = apps
83 return c, nil
84}