aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan <jan@ruken.pw>2016-09-30 23:16:19 (UTC)
committerjan <jan@ruken.pw>2016-09-30 23:16:19 (UTC)
commita6b37fa5e1bd505adfae4888896be2a3aa49ec3a (patch)
tree0a381e0533489a78758b1516680e274f5fd82216
parent361ea3a80a60bc6595a7a624b6cc5d71ddc8f6fc (diff)
parsen von traits und tags, wir koennen den configkram eigentlich wieder komplett entfernen. klappt so nicht wirklich.
-rw-r--r--config.yml16
-rw-r--r--src/character.rs32
-rw-r--r--src/config.rs25
-rw-r--r--src/main.rs41
-rw-r--r--src/pre_process.rs4
-rw-r--r--src/tags.rs15
-rw-r--r--src/traits.rs13
7 files changed, 130 insertions, 16 deletions
diff --git a/config.yml b/config.yml
index 07b5d03..313d8e2 100644
--- a/config.yml
+++ b/config.yml
@@ -1,11 +1,25 @@
1sections: 1sections:
2 name: 2 name:
3 pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD> 3 pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>
4 groups: 4 groups:
5 - romaji 5 - romaji
6 - japanese 6 - japanese
7 - aliases
8 misc:
9 pattern: (?is)Role</TH>.*?<TD>(.*?)\s?</TD>
10 groups:
11 - role
7 image: 12 image:
8 pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image 13 pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image
9 groups: 14 groups:
10 - thumb 15 - thumb
11 - full 16 - full
17 tags:
18 pattern: (?is)tagged as</P>.*?<TH>(.*?)</TH>
19 groups:
20 - tags_raw
21 traits:
22 pattern: (?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>
23 groups:
24 - indexed_raw
25 - official_raw
diff --git a/src/character.rs b/src/character.rs
index 9c548c1..98c4850 100644
--- a/src/character.rs
+++ b/src/character.rs
@@ -1,3 +1,31 @@
1use super::tags::Tag;
2use super::traits::Trait;
3
4#[derive(Debug)]
5pub struct Names {
6 pub romaji: String,
7 pub japanese: String,
8 pub aliases: Vec<String>,
9}
10
11#[derive(Debug)]
12pub struct Images {
13 pub thumb: String,
14 pub full: String,
15}
16
17#[derive(Debug)]
18pub struct Traits {
19 pub official: Vec<Trait>,
20 pub indexed: Vec<Trait>,
21}
22
23#[derive(Debug)]
1pub struct Character { 24pub struct Character {
2 25 pub name: Names,
3} \ No newline at end of file 26 pub image: Images,
27 pub tags: Vec<Tag>,
28 pub traits: Traits,
29
30 pub role: Option<String>
31}
diff --git a/src/config.rs b/src/config.rs
index fc8ee03..f491852 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -15,7 +15,7 @@ pub struct Config {
15} 15}
16 16
17impl Config { 17impl Config {
18 pub fn from_file(p: &str) -> Self { 18 pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self {
19 let mut f = File::open(p).unwrap(); 19 let mut f = File::open(p).unwrap();
20 let mut buf = String::new(); 20 let mut buf = String::new();
21 f.read_to_string(&mut buf).unwrap(); 21 f.read_to_string(&mut buf).unwrap();
@@ -23,8 +23,6 @@ impl Config {
23 23
24 let doc = &docs[0]; 24 let doc = &docs[0];
25 25
26 println!("{:?}", doc);
27
28 let mut sections: HashMap<String, SectionConfig> = HashMap::new(); 26 let mut sections: HashMap<String, SectionConfig> = HashMap::new();
29 for (name, entry) in doc["sections"].as_hash().unwrap() { 27 for (name, entry) in doc["sections"].as_hash().unwrap() {
30 sections.insert(name.as_str().unwrap().into(), 28 sections.insert(name.as_str().unwrap().into(),
@@ -39,6 +37,27 @@ impl Config {
39 }); 37 });
40 } 38 }
41 39
40
41 for ex in &expected {
42 if !sections.contains_key(&ex.to_string()) {
43 panic!("config: section '{}' not found", ex);
44 }
45 }
46
47 {
48 let traits = &sections["traits"];
49 if !traits.groups.contains(&"indexed_raw".to_string()) {
50 panic!("config: no group 'indexed_raw' found in section 'traits'");
51 }
52 if !traits.groups.contains(&"official_raw".to_string()) {
53 panic!("config: no group 'official_raw' found in section 'traits'");
54 }
55 let tags = &sections["tags"];
56 if !tags.groups.contains(&"tags_raw".to_string()) {
57 panic!("config: no group 'tags_raw' found in section 'tags'");
58 }
59 }
60
42 Config { sections: sections } 61 Config { sections: sections }
43 } 62 }
44} 63}
diff --git a/src/main.rs b/src/main.rs
index 7d06fe7..ee8c3eb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -16,15 +16,21 @@ use pre_process::Section;
16mod config; 16mod config;
17use config::Config; 17use config::Config;
18 18
19mod character;
20use character::{Images, Names, Traits, Character};
21
22mod tags;
23mod traits;
24
19fn main() { 25fn main() {
20 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); 26 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
21 let base_path = Path::new(&raw_files); 27 let base_path = Path::new(&raw_files);
22 28
23 let cfg = Config::from_file("config.yml"); 29 let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]);
24 30
25 let mut sections: Vec<Section> = vec![]; 31 let mut sections: HashMap<String, Section> = HashMap::new();
26 for (name, sec) in &cfg.sections { 32 for (name, sec) in &cfg.sections {
27 sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); 33 sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone()));
28 } 34 }
29 35
30 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { 36 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
@@ -39,11 +45,30 @@ fn main() {
39 45
40 pre_process::split_sections(&buf, &mut sections); 46 pre_process::split_sections(&buf, &mut sections);
41 47
42 let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); 48 {
43 for s in &sections { 49 let name: &Section = &sections["name".into()];
44 char.insert(s.name.clone(), s.data.clone()); 50 let image: &Section = &sections["image".into()];
51 let misc: &Section = &sections["misc".into()];
52 println!("{:?}", Character {
53 name: Names {
54 romaji: name.data["romaji".into()].clone(),
55 japanese: name.data["japanese".into()].clone(),
56 aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(),
57 },
58 image: Images {
59 thumb: image.data["thumb".into()].clone(),
60 full: image.data["full".into()].clone(),
61 },
62 tags: tags::parse(&(&sections["tags".into()] as &Section).data["tags_raw".into()]),
63 traits: Traits {
64 official: traits::parse(&(&sections["traits"] as &Section).data["official_raw".into()]),
65 indexed: traits::parse(&(&sections["traits"] as &Section).data["indexed_raw".into()]),
66 },
67 role: match misc.data["role".into()].len() > 0 {
68 true => Some(misc.data["role".into()].clone()),
69 false => None
70 }
71 });
45 } 72 }
46
47 println!("{:?}", char);
48 } 73 }
49} 74}
diff --git a/src/pre_process.rs b/src/pre_process.rs
index a4d6c14..273562d 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -34,8 +34,8 @@ impl Section {
34 } 34 }
35} 35}
36 36
37pub fn split_sections(d: &str, s: &mut Vec<Section>) { 37pub fn split_sections(d: &str, s: &mut HashMap<String, Section>) {
38 for section in s { 38 for (_, section) in s {
39 for m in section.re.captures_iter(d) { 39 for m in section.re.captures_iter(d) {
40 assert!(m.len() >= section.keys.len() + 1); 40 assert!(m.len() >= section.keys.len() + 1);
41 41
diff --git a/src/tags.rs b/src/tags.rs
new file mode 100644
index 0000000..0fdf815
--- /dev/null
+++ b/src/tags.rs
@@ -0,0 +1,15 @@
1use super::regex::Regex;
2
3use std::str::FromStr;
4
5#[derive(Debug)]
6pub struct Tag {
7 pub id: u32,
8 pub name: String,
9}
10
11pub fn parse(s: &str) -> Vec<Tag> {
12 let reg_tag = Regex::new(r#"(?is)<a href="tags\.php\?id=([0-9]+)">(.*?)</a>"#).unwrap();
13
14 reg_tag.captures_iter(s).map(|c| Tag { id: u32::from_str(c.at(1).unwrap()).unwrap(), name: c.at(2).unwrap().into() }).collect()
15}
diff --git a/src/traits.rs b/src/traits.rs
new file mode 100644
index 0000000..db1b2eb
--- /dev/null
+++ b/src/traits.rs
@@ -0,0 +1,13 @@
1use super::regex::Regex;
2
3#[derive(Debug)]
4pub struct Trait {
5 name: String,
6 value: String,
7}
8
9pub fn parse(s: &str) -> Vec<Trait> {
10 let reg_trait = Regex::new(r#"(?is)<dt.*?>(.*?)</dt>.*?<dd>(.*?)</dd>"#).unwrap();
11
12 reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect()
13}