1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
package crawler
import (
"log"
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
)
type AniDBCrawler struct{}
func (a AniDBCrawler) Name() string {
return "AniDB"
}
func (a AniDBCrawler) Crawl(id int) (CharacterData, error) {
c := make(CharacterData)
doc, err := goquery.NewDocument("http://anidb.net/perl-bin/animedb.pl?show=character&charid=" + strconv.Itoa(id))
if err != nil {
log.Println(err)
return nil, CrawlError
}
siteErr, _ := doc.Find(".error p").Html()
if strings.Contains(siteErr, "Unknown character id") {
return nil, CharacterNotFound
} else if strings.Contains(siteErr, "BANNED") {
return nil, CrawlError
}
first := true
links := make([]string, 0)
doc.Find(".characters").Each(func(i int, s *goquery.Selection) {
if first {
s.Find(".g_definitionlist tr").Each(func(i int, s *goquery.Selection) {
keyHtml, _ := s.Find("th").Html()
valueHtml, _ := s.Find("td").Html()
key := strings.TrimSpace(keyHtml)
value := strings.TrimSpace(valueHtml)
c[key] = value
})
desc, _ := s.Find(".desc").Html()
c["__desc"] = strings.TrimSpace(desc)
imgUrl, _ := s.Find(".image img").Attr("src")
c["__img"] = imgUrl
animes := make([]map[string]string, 0)
s.Find(".animelist tr").Each(func(i int, s *goquery.Selection) {
if s.HasClass("header") {
return
}
anime := make(map[string]string)
relHtml, _ := s.Find(".relation").Html()
nameHtml, _ := s.Find(".name a").Html()
rel := strings.TrimSpace(relHtml)
name := strings.TrimSpace(nameHtml)
anime["rel"] = rel
anime["name"] = name
animes = append(animes, anime)
})
c["__animes"] = animes
} else {
link, _ := s.Find(".mainname a").Attr("href")
links = append(links, strings.Replace(link, "http://anidb.net/ch", "", 1))
}
first = false
})
c["__links"] = links
return c, nil
}
|