package crawler import ( "log" "strconv" "strings" "github.com/PuerkitoBio/goquery" ) type ACDCrawler struct{} func (a ACDCrawler) Name() string { return "ACD" } func (a ACDCrawler) Crawl(id int) (CharacterData, error) { c := make(CharacterData) doc, err := goquery.NewDocument("http://www.animecharactersdatabase.com/character.php?id=" + strconv.Itoa(id)) if err != nil { log.Println(err) return nil, CrawlError } text := doc.Text() if strings.Contains(text, "bad character : try") { return nil, CharacterNotFound } if strings.Contains(text, "Your IP has been blocked") { return nil, Banned } dataTr := doc.Find("#besttable").Next().ChildrenFiltered("tbody").ChildrenFiltered("tr") leftSide := dataTr.Children().Eq(0) rightSide := dataTr.Children().Eq(1) imageCols := rightSide.ChildrenFiltered("table").First().Find("tbody > tr > td") val, _ := imageCols.Eq(0).Find("img").Attr("src") c["__thumb"] = val val, _ = imageCols.Eq(1).Find("a").Attr("href") c["__img"] = val leftSide.ChildrenFiltered("table").Eq(1).Find("tr").Each(func(i int, s *goquery.Selection) { c[s.Find("th").Text()] = s.Find("td").Text() }) var key string leftSide. ChildrenFiltered("table"). Eq(0). Find("td"). Eq(1). Find("dl"). Children(). Each(func(i int, s *goquery.Selection) { switch goquery.NodeName(s) { case "dt": key = s.Text() case "dd": c[key] = s.Text() } }) tags := make([]string, 0) leftSide.ChildrenFiltered("div").Eq(0).Find("a").Each(func(i int, s *goquery.Selection) { tags = append(tags, s.Text()) }) c["__tags"] = tags vas := make([]string, 0) leftSide.ChildrenFiltered("div").Eq(1).Find("a").Each(func(i int, s *goquery.Selection) { vas = append(vas, s.Text()) }) c["__vas"] = vas leftSide.ChildrenFiltered("dl").Children().Each(func(i int, s *goquery.Selection) { switch goquery.NodeName(s) { case "dt": key = s.Text() case "dd": c[key] = s.Text() } }) apps := make([]int, 0) rightSide.Find(".tile3top").Each(func(i int, s *goquery.Selection) { val, _ = s.Find("a").Attr("href") id, _ := strconv.Atoi(strings.Split(val, "=")[1]) apps = append(apps, id) }) c["__appearances"] = apps return c, nil }