From ad63b49b94bf4b4596e6420e37d265a57b77d731 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 1 Oct 2016 12:45:55 +0200 Subject: config entfernt, extra details parsen diff --git a/src/character.rs b/src/character.rs index 98c4850..b82ac18 100644 --- a/src/character.rs +++ b/src/character.rs @@ -1,5 +1,8 @@ use super::tags::Tag; -use super::traits::Trait; +use super::dl_list::DLListItem; +use super::section::Section; + +use std::collections::HashMap; #[derive(Debug)] pub struct Names { @@ -16,8 +19,8 @@ pub struct Images { #[derive(Debug)] pub struct Traits { - pub official: Vec, - pub indexed: Vec, + pub official: Vec, + pub indexed: Vec, } #[derive(Debug)] @@ -26,6 +29,18 @@ pub struct Character { pub image: Images, pub tags: Vec, pub traits: Traits, + pub extra: Vec, pub role: Option } + +pub fn get_sections() -> HashMap { + let mut s: HashMap = HashMap::new(); + + s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s?.*?Aliases.*?(.*?)\s?"#, vec!["romaji", "japanese", "aliases"])); + s.insert("misc".into(), Section::new("misc", r#"(?is)Role.*?(.*?)\s?"#, vec!["role"])); + s.insert("image".into(), Section::new("image", r#"(?is)

.*.*?

View Full Size Image"#, vec!["thumb", "full"])); + s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as

.*?(.*?)"#, vec!["tags_raw"])); + s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits

.*?
(.*?)
.*?official traits\s?

.*?
(.*?)
"#, vec!["indexed_raw", "official_raw"])); + s +} diff --git a/src/config.rs b/src/config.rs deleted file mode 100644 index f491852..0000000 --- a/src/config.rs +++ /dev/null @@ -1,63 +0,0 @@ -extern crate yaml_rust; -use self::yaml_rust::YamlLoader; - -use std::collections::HashMap; -use std::fs::File; -use std::io::prelude::*; - -pub struct SectionConfig { - pub pattern: String, - pub groups: Vec, -} - -pub struct Config { - pub sections: HashMap, -} - -impl Config { - pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self { - let mut f = File::open(p).unwrap(); - let mut buf = String::new(); - f.read_to_string(&mut buf).unwrap(); - let docs = YamlLoader::load_from_str(&buf).unwrap(); - - let doc = &docs[0]; - - let mut sections: HashMap = HashMap::new(); - for (name, entry) in doc["sections"].as_hash().unwrap() { - sections.insert(name.as_str().unwrap().into(), - SectionConfig { - pattern: entry["pattern"].as_str().unwrap().into(), - groups: entry["groups"] - .as_vec() - .unwrap() - .into_iter() - .map(|v| v.as_str().unwrap().into()) - .collect(), - }); - } - - - for ex in &expected { - if !sections.contains_key(&ex.to_string()) { - panic!("config: section '{}' not found", ex); - } - } - - { - let traits = §ions["traits"]; - if !traits.groups.contains(&"indexed_raw".to_string()) { - panic!("config: no group 'indexed_raw' found in section 'traits'"); - } - if !traits.groups.contains(&"official_raw".to_string()) { - panic!("config: no group 'official_raw' found in section 'traits'"); - } - let tags = §ions["tags"]; - if !tags.groups.contains(&"tags_raw".to_string()) { - panic!("config: no group 'tags_raw' found in section 'tags'"); - } - } - - Config { sections: sections } - } -} diff --git a/src/dl_list.rs b/src/dl_list.rs new file mode 100644 index 0000000..979c332 --- /dev/null +++ b/src/dl_list.rs @@ -0,0 +1,13 @@ +use super::regex::Regex; + +#[derive(Debug)] +pub struct DLListItem { + name: String, + value: String, +} + +pub fn parse(s: &str) -> Vec { + let reg_list_item = Regex::new(r#"(?is)(.*?).*?
(.*?)
"#).unwrap(); + + reg_list_item.captures_iter(s).map(|c| DLListItem { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect() +} diff --git a/src/main.rs b/src/main.rs index ee8c3eb..e76da66 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ extern crate regex; +use regex::Regex; extern crate yaml_rust; extern crate walkdir; @@ -11,27 +12,23 @@ use std::path::Path; use std::collections::HashMap; mod pre_process; -use pre_process::Section; - -mod config; -use config::Config; +mod section; +use section::Section; mod character; use character::{Images, Names, Traits, Character}; mod tags; -mod traits; +mod dl_list; +use dl_list::DLListItem; fn main() { let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); let base_path = Path::new(&raw_files); - let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]); + let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+

.*?
(.*?)
"#).unwrap(); - let mut sections: HashMap = HashMap::new(); - for (name, sec) in &cfg.sections { - sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone())); - } + let mut sections: HashMap = character::get_sections(); for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { let mut f = File::open(entry.path()).expect("could not open file"); @@ -43,7 +40,14 @@ fn main() { let buf = pre_process::strip_irrelevant_content(&buf); - pre_process::split_sections(&buf, &mut sections); + section::process(&buf, &mut sections); + + // find optional extra details + let mut extra_details: Vec = vec![]; + let caps = re_extras.captures(&buf); + if caps.is_some() { + extra_details = dl_list::parse(caps.unwrap().at(1).unwrap()); + } { let name: &Section = §ions["name".into()]; @@ -53,7 +57,10 @@ fn main() { name: Names { romaji: name.data["romaji".into()].clone(), japanese: name.data["japanese".into()].clone(), - aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), + aliases: match name.data["aliases".into()].len() > 0 { + true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), + false => vec![] + } }, image: Images { thumb: image.data["thumb".into()].clone(), @@ -61,9 +68,10 @@ fn main() { }, tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), traits: Traits { - official: traits::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), - indexed: traits::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), + official: dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), + indexed: dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), }, + extra: extra_details, role: match misc.data["role".into()].len() > 0 { true => Some(misc.data["role".into()].clone()), false => None diff --git a/src/pre_process.rs b/src/pre_process.rs index 273562d..877ddc3 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs @@ -1,7 +1,3 @@ -use super::regex::Regex; - -use std::collections::HashMap; - pub fn strip_irrelevant_content(s: &str) -> String { let mut retn = ""; match s.find(r#"
"#) { @@ -16,34 +12,4 @@ pub fn strip_irrelevant_content(s: &str) -> String { return retn.into(); } -pub struct Section { - pub name: String, - pub re: Regex, - pub keys: Vec, - pub data: HashMap, -} -impl Section { - pub fn new(name: &str, re: &str, groups: Vec) -> Self { - Section { - name: name.into(), - re: Regex::new(re).unwrap(), - keys: groups, - data: HashMap::new(), - } - } -} - -pub fn split_sections(d: &str, s: &mut HashMap) { - for (_, section) in s { - for m in section.re.captures_iter(d) { - assert!(m.len() >= section.keys.len() + 1); - - let mut idx = 0; - for key in §ion.keys { - section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); - idx += 1; - } - } - } -} diff --git a/src/section.rs b/src/section.rs new file mode 100644 index 0000000..7e492b1 --- /dev/null +++ b/src/section.rs @@ -0,0 +1,34 @@ +use super::regex::Regex; +use std::collections::HashMap; + +pub struct Section { + pub name: String, + pub re: Regex, + pub keys: Vec, + pub data: HashMap, +} + +impl Section { + pub fn new(name: &str, re: &str, groups: Vec<&'static str>) -> Self { + Section { + name: name.into(), + re: Regex::new(re).unwrap(), + keys: groups.into_iter().map(|s| s.into()).collect(), + data: HashMap::new(), + } + } +} + +pub fn process(d: &str, s: &mut HashMap) { + for (_, section) in s { + for m in section.re.captures_iter(d) { + assert!(m.len() >= section.keys.len() + 1); + + let mut idx = 0; + for key in §ion.keys { + section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); + idx += 1; + } + } + } +} diff --git a/src/traits.rs b/src/traits.rs deleted file mode 100644 index db1b2eb..0000000 --- a/src/traits.rs +++ /dev/null @@ -1,13 +0,0 @@ -use super::regex::Regex; - -#[derive(Debug)] -pub struct Trait { - name: String, - value: String, -} - -pub fn parse(s: &str) -> Vec { - let reg_trait = Regex::new(r#"(?is)(.*?).*?
(.*?)
"#).unwrap(); - - reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect() -} -- cgit v0.10.1