aboutsummaryrefslogtreecommitdiff
path: root/tools/charcrawler/crawler/anidb.go
blob: 12bf05b1d4a2f32b5a3823b4b8802fd4ef15d8cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package crawler

import (
	"log"
	"strconv"
	"strings"

	"github.com/PuerkitoBio/goquery"
)

type AniDBCrawler struct{}

func (a AniDBCrawler) Name() string {
	return "AniDB"
}

func (a AniDBCrawler) Crawl(id int) (CharacterData, error) {
	c := make(CharacterData)
	doc, err := goquery.NewDocument("http://anidb.net/perl-bin/animedb.pl?show=character&charid=" + strconv.Itoa(id))
	if err != nil {
		log.Println(err)
		return nil, CrawlError
	}
	siteErr, _ := doc.Find(".error p").Html()
	if strings.Contains(siteErr, "Unknown character id") {
		return nil, CharacterNotFound
	} else if strings.Contains(siteErr, "BANNED") {
		return nil, CrawlError
	}
	first := true
	links := make([]string, 0)
	doc.Find(".characters").Each(func(i int, s *goquery.Selection) {
		if first {
			s.Find(".g_definitionlist tr").Each(func(i int, s *goquery.Selection) {
				keyHtml, _ := s.Find("th").Html()
				valueHtml, _ := s.Find("td").Html()
				key := strings.TrimSpace(keyHtml)
				value := strings.TrimSpace(valueHtml)
				c[key] = value
			})
			desc, _ := s.Find(".desc").Html()
			c["__desc"] = strings.TrimSpace(desc)
			imgUrl, _ := s.Find(".image img").Attr("src")
			c["__img"] = imgUrl
			animes := make([]map[string]string, 0)
			s.Find(".animelist tr").Each(func(i int, s *goquery.Selection) {
				if s.HasClass("header") {
					return
				}
				anime := make(map[string]string)
				relHtml, _ := s.Find(".relation").Html()
				nameHtml, _ := s.Find(".name a").Html()
				rel := strings.TrimSpace(relHtml)
				name := strings.TrimSpace(nameHtml)
				anime["rel"] = rel
				anime["name"] = name
				animes = append(animes, anime)
			})
			c["__animes"] = animes
		} else {
			link, _ := s.Find(".mainname a").Attr("href")
			links = append(links, strings.Replace(link, "http://anidb.net/ch", "", 1))
		}
		first = false
	})
	c["__links"] = links

	return c, nil
}