From a6b37fa5e1bd505adfae4888896be2a3aa49ec3a Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 1 Oct 2016 01:16:19 +0200 Subject: parsen von traits und tags, wir koennen den configkram eigentlich wieder komplett entfernen. klappt so nicht wirklich. diff --git a/config.yml b/config.yml index 07b5d03..313d8e2 100644 --- a/config.yml +++ b/config.yml @@ -1,11 +1,25 @@ sections: name: - pattern: (?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s? + pattern: (?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s?.*?Aliases.*?(.*?)\s? groups: - romaji - japanese + - aliases + misc: + pattern: (?is)Role.*?(.*?)\s? + groups: + - role image: pattern: (?is)

.*.*?

View Full Size Image groups: - thumb - full + tags: + pattern: (?is)tagged as

.*?(.*?) + groups: + - tags_raw + traits: + pattern: (?is)indexed traits

.*?
(.*?)
.*?official traits\s?

.*?
(.*?)
+ groups: + - indexed_raw + - official_raw diff --git a/src/character.rs b/src/character.rs index 9c548c1..98c4850 100644 --- a/src/character.rs +++ b/src/character.rs @@ -1,3 +1,31 @@ +use super::tags::Tag; +use super::traits::Trait; + +#[derive(Debug)] +pub struct Names { + pub romaji: String, + pub japanese: String, + pub aliases: Vec, +} + +#[derive(Debug)] +pub struct Images { + pub thumb: String, + pub full: String, +} + +#[derive(Debug)] +pub struct Traits { + pub official: Vec, + pub indexed: Vec, +} + +#[derive(Debug)] pub struct Character { - -} \ No newline at end of file + pub name: Names, + pub image: Images, + pub tags: Vec, + pub traits: Traits, + + pub role: Option +} diff --git a/src/config.rs b/src/config.rs index fc8ee03..f491852 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,7 +15,7 @@ pub struct Config { } impl Config { - pub fn from_file(p: &str) -> Self { + pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self { let mut f = File::open(p).unwrap(); let mut buf = String::new(); f.read_to_string(&mut buf).unwrap(); @@ -23,8 +23,6 @@ impl Config { let doc = &docs[0]; - println!("{:?}", doc); - let mut sections: HashMap = HashMap::new(); for (name, entry) in doc["sections"].as_hash().unwrap() { sections.insert(name.as_str().unwrap().into(), @@ -39,6 +37,27 @@ impl Config { }); } + + for ex in &expected { + if !sections.contains_key(&ex.to_string()) { + panic!("config: section '{}' not found", ex); + } + } + + { + let traits = §ions["traits"]; + if !traits.groups.contains(&"indexed_raw".to_string()) { + panic!("config: no group 'indexed_raw' found in section 'traits'"); + } + if !traits.groups.contains(&"official_raw".to_string()) { + panic!("config: no group 'official_raw' found in section 'traits'"); + } + let tags = §ions["tags"]; + if !tags.groups.contains(&"tags_raw".to_string()) { + panic!("config: no group 'tags_raw' found in section 'tags'"); + } + } + Config { sections: sections } } } diff --git a/src/main.rs b/src/main.rs index 7d06fe7..ee8c3eb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,15 +16,21 @@ use pre_process::Section; mod config; use config::Config; +mod character; +use character::{Images, Names, Traits, Character}; + +mod tags; +mod traits; + fn main() { let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); let base_path = Path::new(&raw_files); - let cfg = Config::from_file("config.yml"); + let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]); - let mut sections: Vec
= vec![]; + let mut sections: HashMap = HashMap::new(); for (name, sec) in &cfg.sections { - sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); + sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone())); } for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { @@ -39,11 +45,30 @@ fn main() { pre_process::split_sections(&buf, &mut sections); - let mut char: HashMap> = HashMap::new(); - for s in §ions { - char.insert(s.name.clone(), s.data.clone()); + { + let name: &Section = §ions["name".into()]; + let image: &Section = §ions["image".into()]; + let misc: &Section = §ions["misc".into()]; + println!("{:?}", Character { + name: Names { + romaji: name.data["romaji".into()].clone(), + japanese: name.data["japanese".into()].clone(), + aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), + }, + image: Images { + thumb: image.data["thumb".into()].clone(), + full: image.data["full".into()].clone(), + }, + tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), + traits: Traits { + official: traits::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), + indexed: traits::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), + }, + role: match misc.data["role".into()].len() > 0 { + true => Some(misc.data["role".into()].clone()), + false => None + } + }); } - - println!("{:?}", char); } } diff --git a/src/pre_process.rs b/src/pre_process.rs index a4d6c14..273562d 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs @@ -34,8 +34,8 @@ impl Section { } } -pub fn split_sections(d: &str, s: &mut Vec
) { - for section in s { +pub fn split_sections(d: &str, s: &mut HashMap) { + for (_, section) in s { for m in section.re.captures_iter(d) { assert!(m.len() >= section.keys.len() + 1); diff --git a/src/tags.rs b/src/tags.rs new file mode 100644 index 0000000..0fdf815 --- /dev/null +++ b/src/tags.rs @@ -0,0 +1,15 @@ +use super::regex::Regex; + +use std::str::FromStr; + +#[derive(Debug)] +pub struct Tag { + pub id: u32, + pub name: String, +} + +pub fn parse(s: &str) -> Vec { + let reg_tag = Regex::new(r#"(?is)(.*?)"#).unwrap(); + + reg_tag.captures_iter(s).map(|c| Tag { id: u32::from_str(c.at(1).unwrap()).unwrap(), name: c.at(2).unwrap().into() }).collect() +} diff --git a/src/traits.rs b/src/traits.rs new file mode 100644 index 0000000..db1b2eb --- /dev/null +++ b/src/traits.rs @@ -0,0 +1,13 @@ +use super::regex::Regex; + +#[derive(Debug)] +pub struct Trait { + name: String, + value: String, +} + +pub fn parse(s: &str) -> Vec { + let reg_trait = Regex::new(r#"(?is)(.*?).*?
(.*?)
"#).unwrap(); + + reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect() +} -- cgit v0.10.1