package crawler import ( "log" "strconv" "strings" "github.com/PuerkitoBio/goquery" ) type AniDBCrawler struct{} func (a AniDBCrawler) Name() string { return "AniDB" } func (a AniDBCrawler) Crawl(id int) (CharacterData, error) { c := make(CharacterData) doc, err := goquery.NewDocument("http://anidb.net/perl-bin/animedb.pl?show=character&charid=" + strconv.Itoa(id)) if err != nil { log.Println(err) return nil, CrawlError } siteErr, _ := doc.Find(".error p").Html() if strings.Contains(siteErr, "Unknown character id") { return nil, CharacterNotFound } else if strings.Contains(siteErr, "BANNED") { return nil, CrawlError } first := true links := make([]string, 0) doc.Find(".characters").Each(func(i int, s *goquery.Selection) { if first { s.Find(".g_definitionlist tr").Each(func(i int, s *goquery.Selection) { keyHtml, _ := s.Find("th").Html() valueHtml, _ := s.Find("td").Html() key := strings.TrimSpace(keyHtml) value := strings.TrimSpace(valueHtml) c[key] = value }) desc, _ := s.Find(".desc").Html() c["__desc"] = strings.TrimSpace(desc) imgUrl, _ := s.Find(".image img").Attr("src") c["__img"] = imgUrl animes := make([]map[string]string, 0) s.Find(".animelist tr").Each(func(i int, s *goquery.Selection) { if s.HasClass("header") { return } anime := make(map[string]string) relHtml, _ := s.Find(".relation").Html() nameHtml, _ := s.Find(".name a").Html() rel := strings.TrimSpace(relHtml) name := strings.TrimSpace(nameHtml) anime["rel"] = rel anime["name"] = name animes = append(animes, anime) }) c["__animes"] = animes } else { link, _ := s.Find(".mainname a").Attr("href") links = append(links, strings.Replace(link, "http://anidb.net/ch", "", 1)) } first = false }) c["__links"] = links return c, nil }