diff options
Diffstat (limited to 'tools/charcrawler/crawler/anidb.go')
-rw-r--r-- | tools/charcrawler/crawler/anidb.go | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/tools/charcrawler/crawler/anidb.go b/tools/charcrawler/crawler/anidb.go new file mode 100644 index 0000000..12bf05b --- /dev/null +++ b/tools/charcrawler/crawler/anidb.go | |||
@@ -0,0 +1,69 @@ | |||
1 | package crawler | ||
2 | |||
3 | import ( | ||
4 | "log" | ||
5 | "strconv" | ||
6 | "strings" | ||
7 | |||
8 | "github.com/PuerkitoBio/goquery" | ||
9 | ) | ||
10 | |||
11 | type AniDBCrawler struct{} | ||
12 | |||
13 | func (a AniDBCrawler) Name() string { | ||
14 | return "AniDB" | ||
15 | } | ||
16 | |||
17 | func (a AniDBCrawler) Crawl(id int) (CharacterData, error) { | ||
18 | c := make(CharacterData) | ||
19 | doc, err := goquery.NewDocument("http://anidb.net/perl-bin/animedb.pl?show=character&charid=" + strconv.Itoa(id)) | ||
20 | if err != nil { | ||
21 | log.Println(err) | ||
22 | return nil, CrawlError | ||
23 | } | ||
24 | siteErr, _ := doc.Find(".error p").Html() | ||
25 | if strings.Contains(siteErr, "Unknown character id") { | ||
26 | return nil, CharacterNotFound | ||
27 | } else if strings.Contains(siteErr, "BANNED") { | ||
28 | return nil, CrawlError | ||
29 | } | ||
30 | first := true | ||
31 | links := make([]string, 0) | ||
32 | doc.Find(".characters").Each(func(i int, s *goquery.Selection) { | ||
33 | if first { | ||
34 | s.Find(".g_definitionlist tr").Each(func(i int, s *goquery.Selection) { | ||
35 | keyHtml, _ := s.Find("th").Html() | ||
36 | valueHtml, _ := s.Find("td").Html() | ||
37 | key := strings.TrimSpace(keyHtml) | ||
38 | value := strings.TrimSpace(valueHtml) | ||
39 | c[key] = value | ||
40 | }) | ||
41 | desc, _ := s.Find(".desc").Html() | ||
42 | c["__desc"] = strings.TrimSpace(desc) | ||
43 | imgUrl, _ := s.Find(".image img").Attr("src") | ||
44 | c["__img"] = imgUrl | ||
45 | animes := make([]map[string]string, 0) | ||
46 | s.Find(".animelist tr").Each(func(i int, s *goquery.Selection) { | ||
47 | if s.HasClass("header") { | ||
48 | return | ||
49 | } | ||
50 | anime := make(map[string]string) | ||
51 | relHtml, _ := s.Find(".relation").Html() | ||
52 | nameHtml, _ := s.Find(".name a").Html() | ||
53 | rel := strings.TrimSpace(relHtml) | ||
54 | name := strings.TrimSpace(nameHtml) | ||
55 | anime["rel"] = rel | ||
56 | anime["name"] = name | ||
57 | animes = append(animes, anime) | ||
58 | }) | ||
59 | c["__animes"] = animes | ||
60 | } else { | ||
61 | link, _ := s.Find(".mainname a").Attr("href") | ||
62 | links = append(links, strings.Replace(link, "http://anidb.net/ch", "", 1)) | ||
63 | } | ||
64 | first = false | ||
65 | }) | ||
66 | c["__links"] = links | ||
67 | |||
68 | return c, nil | ||
69 | } | ||