diff options
author | jan <jan@ruken.pw> | 2016-09-30 23:16:19 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-30 23:16:19 (UTC) |
commit | a6b37fa5e1bd505adfae4888896be2a3aa49ec3a (patch) | |
tree | 0a381e0533489a78758b1516680e274f5fd82216 | |
parent | 361ea3a80a60bc6595a7a624b6cc5d71ddc8f6fc (diff) |
parsen von traits und tags, wir koennen den configkram eigentlich wieder komplett entfernen. klappt so nicht wirklich.
-rw-r--r-- | config.yml | 16 | ||||
-rw-r--r-- | src/character.rs | 32 | ||||
-rw-r--r-- | src/config.rs | 25 | ||||
-rw-r--r-- | src/main.rs | 41 | ||||
-rw-r--r-- | src/pre_process.rs | 4 | ||||
-rw-r--r-- | src/tags.rs | 15 | ||||
-rw-r--r-- | src/traits.rs | 13 |
7 files changed, 130 insertions, 16 deletions
@@ -1,11 +1,25 @@ | |||
1 | sections: | 1 | sections: |
2 | name: | 2 | name: |
3 | pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD> | 3 | pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD> |
4 | groups: | 4 | groups: |
5 | - romaji | 5 | - romaji |
6 | - japanese | 6 | - japanese |
7 | - aliases | ||
8 | misc: | ||
9 | pattern: (?is)Role</TH>.*?<TD>(.*?)\s?</TD> | ||
10 | groups: | ||
11 | - role | ||
7 | image: | 12 | image: |
8 | pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image | 13 | pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image |
9 | groups: | 14 | groups: |
10 | - thumb | 15 | - thumb |
11 | - full | 16 | - full |
17 | tags: | ||
18 | pattern: (?is)tagged as</P>.*?<TH>(.*?)</TH> | ||
19 | groups: | ||
20 | - tags_raw | ||
21 | traits: | ||
22 | pattern: (?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl> | ||
23 | groups: | ||
24 | - indexed_raw | ||
25 | - official_raw | ||
diff --git a/src/character.rs b/src/character.rs index 9c548c1..98c4850 100644 --- a/src/character.rs +++ b/src/character.rs | |||
@@ -1,3 +1,31 @@ | |||
1 | use super::tags::Tag; | ||
2 | use super::traits::Trait; | ||
3 | |||
4 | #[derive(Debug)] | ||
5 | pub struct Names { | ||
6 | pub romaji: String, | ||
7 | pub japanese: String, | ||
8 | pub aliases: Vec<String>, | ||
9 | } | ||
10 | |||
11 | #[derive(Debug)] | ||
12 | pub struct Images { | ||
13 | pub thumb: String, | ||
14 | pub full: String, | ||
15 | } | ||
16 | |||
17 | #[derive(Debug)] | ||
18 | pub struct Traits { | ||
19 | pub official: Vec<Trait>, | ||
20 | pub indexed: Vec<Trait>, | ||
21 | } | ||
22 | |||
23 | #[derive(Debug)] | ||
1 | pub struct Character { | 24 | pub struct Character { |
2 | 25 | pub name: Names, | |
3 | } \ No newline at end of file | 26 | pub image: Images, |
27 | pub tags: Vec<Tag>, | ||
28 | pub traits: Traits, | ||
29 | |||
30 | pub role: Option<String> | ||
31 | } | ||
diff --git a/src/config.rs b/src/config.rs index fc8ee03..f491852 100644 --- a/src/config.rs +++ b/src/config.rs | |||
@@ -15,7 +15,7 @@ pub struct Config { | |||
15 | } | 15 | } |
16 | 16 | ||
17 | impl Config { | 17 | impl Config { |
18 | pub fn from_file(p: &str) -> Self { | 18 | pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self { |
19 | let mut f = File::open(p).unwrap(); | 19 | let mut f = File::open(p).unwrap(); |
20 | let mut buf = String::new(); | 20 | let mut buf = String::new(); |
21 | f.read_to_string(&mut buf).unwrap(); | 21 | f.read_to_string(&mut buf).unwrap(); |
@@ -23,8 +23,6 @@ impl Config { | |||
23 | 23 | ||
24 | let doc = &docs[0]; | 24 | let doc = &docs[0]; |
25 | 25 | ||
26 | println!("{:?}", doc); | ||
27 | |||
28 | let mut sections: HashMap<String, SectionConfig> = HashMap::new(); | 26 | let mut sections: HashMap<String, SectionConfig> = HashMap::new(); |
29 | for (name, entry) in doc["sections"].as_hash().unwrap() { | 27 | for (name, entry) in doc["sections"].as_hash().unwrap() { |
30 | sections.insert(name.as_str().unwrap().into(), | 28 | sections.insert(name.as_str().unwrap().into(), |
@@ -39,6 +37,27 @@ impl Config { | |||
39 | }); | 37 | }); |
40 | } | 38 | } |
41 | 39 | ||
40 | |||
41 | for ex in &expected { | ||
42 | if !sections.contains_key(&ex.to_string()) { | ||
43 | panic!("config: section '{}' not found", ex); | ||
44 | } | ||
45 | } | ||
46 | |||
47 | { | ||
48 | let traits = §ions["traits"]; | ||
49 | if !traits.groups.contains(&"indexed_raw".to_string()) { | ||
50 | panic!("config: no group 'indexed_raw' found in section 'traits'"); | ||
51 | } | ||
52 | if !traits.groups.contains(&"official_raw".to_string()) { | ||
53 | panic!("config: no group 'official_raw' found in section 'traits'"); | ||
54 | } | ||
55 | let tags = §ions["tags"]; | ||
56 | if !tags.groups.contains(&"tags_raw".to_string()) { | ||
57 | panic!("config: no group 'tags_raw' found in section 'tags'"); | ||
58 | } | ||
59 | } | ||
60 | |||
42 | Config { sections: sections } | 61 | Config { sections: sections } |
43 | } | 62 | } |
44 | } | 63 | } |
diff --git a/src/main.rs b/src/main.rs index 7d06fe7..ee8c3eb 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -16,15 +16,21 @@ use pre_process::Section; | |||
16 | mod config; | 16 | mod config; |
17 | use config::Config; | 17 | use config::Config; |
18 | 18 | ||
19 | mod character; | ||
20 | use character::{Images, Names, Traits, Character}; | ||
21 | |||
22 | mod tags; | ||
23 | mod traits; | ||
24 | |||
19 | fn main() { | 25 | fn main() { |
20 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 26 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
21 | let base_path = Path::new(&raw_files); | 27 | let base_path = Path::new(&raw_files); |
22 | 28 | ||
23 | let cfg = Config::from_file("config.yml"); | 29 | let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]); |
24 | 30 | ||
25 | let mut sections: Vec<Section> = vec![]; | 31 | let mut sections: HashMap<String, Section> = HashMap::new(); |
26 | for (name, sec) in &cfg.sections { | 32 | for (name, sec) in &cfg.sections { |
27 | sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); | 33 | sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone())); |
28 | } | 34 | } |
29 | 35 | ||
30 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 36 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
@@ -39,11 +45,30 @@ fn main() { | |||
39 | 45 | ||
40 | pre_process::split_sections(&buf, &mut sections); | 46 | pre_process::split_sections(&buf, &mut sections); |
41 | 47 | ||
42 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); | 48 | { |
43 | for s in §ions { | 49 | let name: &Section = §ions["name".into()]; |
44 | char.insert(s.name.clone(), s.data.clone()); | 50 | let image: &Section = §ions["image".into()]; |
51 | let misc: &Section = §ions["misc".into()]; | ||
52 | println!("{:?}", Character { | ||
53 | name: Names { | ||
54 | romaji: name.data["romaji".into()].clone(), | ||
55 | japanese: name.data["japanese".into()].clone(), | ||
56 | aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), | ||
57 | }, | ||
58 | image: Images { | ||
59 | thumb: image.data["thumb".into()].clone(), | ||
60 | full: image.data["full".into()].clone(), | ||
61 | }, | ||
62 | tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), | ||
63 | traits: Traits { | ||
64 | official: traits::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), | ||
65 | indexed: traits::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), | ||
66 | }, | ||
67 | role: match misc.data["role".into()].len() > 0 { | ||
68 | true => Some(misc.data["role".into()].clone()), | ||
69 | false => None | ||
70 | } | ||
71 | }); | ||
45 | } | 72 | } |
46 | |||
47 | println!("{:?}", char); | ||
48 | } | 73 | } |
49 | } | 74 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index a4d6c14..273562d 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -34,8 +34,8 @@ impl Section { | |||
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut HashMap<String, Section>) { |
38 | for section in s { | 38 | for (_, section) in s { |
39 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
40 | assert!(m.len() >= section.keys.len() + 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
41 | 41 | ||
diff --git a/src/tags.rs b/src/tags.rs new file mode 100644 index 0000000..0fdf815 --- /dev/null +++ b/src/tags.rs | |||
@@ -0,0 +1,15 @@ | |||
1 | use super::regex::Regex; | ||
2 | |||
3 | use std::str::FromStr; | ||
4 | |||
5 | #[derive(Debug)] | ||
6 | pub struct Tag { | ||
7 | pub id: u32, | ||
8 | pub name: String, | ||
9 | } | ||
10 | |||
11 | pub fn parse(s: &str) -> Vec<Tag> { | ||
12 | let reg_tag = Regex::new(r#"(?is)<a href="tags\.php\?id=([0-9]+)">(.*?)</a>"#).unwrap(); | ||
13 | |||
14 | reg_tag.captures_iter(s).map(|c| Tag { id: u32::from_str(c.at(1).unwrap()).unwrap(), name: c.at(2).unwrap().into() }).collect() | ||
15 | } | ||
diff --git a/src/traits.rs b/src/traits.rs new file mode 100644 index 0000000..db1b2eb --- /dev/null +++ b/src/traits.rs | |||
@@ -0,0 +1,13 @@ | |||
1 | use super::regex::Regex; | ||
2 | |||
3 | #[derive(Debug)] | ||
4 | pub struct Trait { | ||
5 | name: String, | ||
6 | value: String, | ||
7 | } | ||
8 | |||
9 | pub fn parse(s: &str) -> Vec<Trait> { | ||
10 | let reg_trait = Regex::new(r#"(?is)<dt.*?>(.*?)</dt>.*?<dd>(.*?)</dd>"#).unwrap(); | ||
11 | |||
12 | reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect() | ||
13 | } | ||