diff options
Diffstat (limited to 'tools/charcrawler/crawler/acd.go')
-rw-r--r-- | tools/charcrawler/crawler/acd.go | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/acd.go b/tools/charcrawler/crawler/acd.go new file mode 100644 index 0000000..31efb0d --- /dev/null +++ b/tools/charcrawler/crawler/acd.go | |||
@@ -0,0 +1,81 @@ | |||
1 | package crawler | ||
2 | |||
3 | import ( | ||
4 | "log" | ||
5 | "strconv" | ||
6 | "strings" | ||
7 | |||
8 | "github.com/PuerkitoBio/goquery" | ||
9 | ) | ||
10 | |||
11 | type ACDCrawler struct{} | ||
12 | |||
13 | func (a ACDCrawler) Name() string { | ||
14 | return "ACD" | ||
15 | } | ||
16 | |||
17 | func (a ACDCrawler) Crawl(id int) (CharacterData, error) { | ||
18 | c := make(CharacterData) | ||
19 | doc, err := goquery.NewDocument("http://www.animecharactersdatabase.com/character.php?id=" + strconv.Itoa(id)) | ||
20 | if err != nil { | ||
21 | log.Println(err) | ||
22 | return nil, CrawlError | ||
23 | } | ||
24 | text := doc.Text() | ||
25 | if strings.Contains(text, "bad character : try") { | ||
26 | return nil, CharacterNotFound | ||
27 | } | ||
28 | dataTr := doc.Find("#besttable").Next().ChildrenFiltered("tbody").ChildrenFiltered("tr") | ||
29 | leftSide := dataTr.Children().Eq(0) | ||
30 | rightSide := dataTr.Children().Eq(1) | ||
31 | imageCols := rightSide.ChildrenFiltered("table").First().Find("tbody > tr > td") | ||
32 | val, _ := imageCols.Eq(0).Find("img").Attr("src") | ||
33 | c["__thumb"] = val | ||
34 | val, _ = imageCols.Eq(1).Find("a").Attr("href") | ||
35 | c["__img"] = val | ||
36 | leftSide.ChildrenFiltered("table").Eq(1).Find("tr").Each(func(i int, s *goquery.Selection) { | ||
37 | c[s.Find("th").Text()] = s.Find("td").Text() | ||
38 | }) | ||
39 | var key string | ||
40 | leftSide. | ||
41 | ChildrenFiltered("table"). | ||
42 | Eq(0). | ||
43 | Find("td"). | ||
44 | Eq(1). | ||
45 | Find("dl"). | ||
46 | Children(). | ||
47 | Each(func(i int, s *goquery.Selection) { | ||
48 | switch goquery.NodeName(s) { | ||
49 | case "dt": | ||
50 | key = s.Text() | ||
51 | case "dd": | ||
52 | c[key] = s.Text() | ||
53 | } | ||
54 | }) | ||
55 | tags := make([]string, 0) | ||
56 | leftSide.ChildrenFiltered("div").Eq(0).Find("a").Each(func(i int, s *goquery.Selection) { | ||
57 | tags = append(tags, s.Text()) | ||
58 | }) | ||
59 | c["__tags"] = tags | ||
60 | vas := make([]string, 0) | ||
61 | leftSide.ChildrenFiltered("div").Eq(1).Find("a").Each(func(i int, s *goquery.Selection) { | ||
62 | vas = append(vas, s.Text()) | ||
63 | }) | ||
64 | c["__vas"] = vas | ||
65 | leftSide.ChildrenFiltered("dl").Children().Each(func(i int, s *goquery.Selection) { | ||
66 | switch goquery.NodeName(s) { | ||
67 | case "dt": | ||
68 | key = s.Text() | ||
69 | case "dd": | ||
70 | c[key] = s.Text() | ||
71 | } | ||
72 | }) | ||
73 | apps := make([]int, 0) | ||
74 | rightSide.Find(".tile3top").Each(func(i int, s *goquery.Selection) { | ||
75 | val, _ = s.Find("a").Attr("href") | ||
76 | id, _ := strconv.Atoi(strings.Split(val, "=")[1]) | ||
77 | apps = append(apps, id) | ||
78 | }) | ||
79 | c["__appearances"] = apps | ||
80 | return c, nil | ||
81 | } | ||