diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/config.rs | 44 | ||||
| -rw-r--r-- | src/main.rs | 21 | ||||
| -rw-r--r-- | src/pre_process.rs | 26 |
3 files changed, 71 insertions, 20 deletions
diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..fc8ee03 --- /dev/null +++ b/src/config.rs | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | extern crate yaml_rust; | ||
| 2 | use self::yaml_rust::YamlLoader; | ||
| 3 | |||
| 4 | use std::collections::HashMap; | ||
| 5 | use std::fs::File; | ||
| 6 | use std::io::prelude::*; | ||
| 7 | |||
| 8 | pub struct SectionConfig { | ||
| 9 | pub pattern: String, | ||
| 10 | pub groups: Vec<String>, | ||
| 11 | } | ||
| 12 | |||
| 13 | pub struct Config { | ||
| 14 | pub sections: HashMap<String, SectionConfig>, | ||
| 15 | } | ||
| 16 | |||
| 17 | impl Config { | ||
| 18 | pub fn from_file(p: &str) -> Self { | ||
| 19 | let mut f = File::open(p).unwrap(); | ||
| 20 | let mut buf = String::new(); | ||
| 21 | f.read_to_string(&mut buf).unwrap(); | ||
| 22 | let docs = YamlLoader::load_from_str(&buf).unwrap(); | ||
| 23 | |||
| 24 | let doc = &docs[0]; | ||
| 25 | |||
| 26 | println!("{:?}", doc); | ||
| 27 | |||
| 28 | let mut sections: HashMap<String, SectionConfig> = HashMap::new(); | ||
| 29 | for (name, entry) in doc["sections"].as_hash().unwrap() { | ||
| 30 | sections.insert(name.as_str().unwrap().into(), | ||
| 31 | SectionConfig { | ||
| 32 | pattern: entry["pattern"].as_str().unwrap().into(), | ||
| 33 | groups: entry["groups"] | ||
| 34 | .as_vec() | ||
| 35 | .unwrap() | ||
| 36 | .into_iter() | ||
| 37 | .map(|v| v.as_str().unwrap().into()) | ||
| 38 | .collect(), | ||
| 39 | }); | ||
| 40 | } | ||
| 41 | |||
| 42 | Config { sections: sections } | ||
| 43 | } | ||
| 44 | } | ||
diff --git a/src/main.rs b/src/main.rs index 97b3094..7d06fe7 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | extern crate regex; | 1 | extern crate regex; |
| 2 | extern crate yaml_rust; | ||
| 2 | 3 | ||
| 3 | extern crate walkdir; | 4 | extern crate walkdir; |
| 4 | use walkdir::WalkDir; | 5 | use walkdir::WalkDir; |
| @@ -12,13 +13,19 @@ use std::collections::HashMap; | |||
| 12 | mod pre_process; | 13 | mod pre_process; |
| 13 | use pre_process::Section; | 14 | use pre_process::Section; |
| 14 | 15 | ||
| 16 | mod config; | ||
| 17 | use config::Config; | ||
| 18 | |||
| 15 | fn main() { | 19 | fn main() { |
| 16 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 20 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
| 17 | let base_path = Path::new(&raw_files); | 21 | let base_path = Path::new(&raw_files); |
| 18 | 22 | ||
| 23 | let cfg = Config::from_file("config.yml"); | ||
| 24 | |||
| 19 | let mut sections: Vec<Section> = vec![]; | 25 | let mut sections: Vec<Section> = vec![]; |
| 20 | sections.push(Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>"#, vec!["romaji".into(), "japanese".into()])); | 26 | for (name, sec) in &cfg.sections { |
| 21 | sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); | 27 | sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); |
| 28 | } | ||
| 22 | 29 | ||
| 23 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 30 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
| 24 | let mut f = File::open(entry.path()).expect("could not open file"); | 31 | let mut f = File::open(entry.path()).expect("could not open file"); |
| @@ -32,11 +39,11 @@ fn main() { | |||
| 32 | 39 | ||
| 33 | pre_process::split_sections(&buf, &mut sections); | 40 | pre_process::split_sections(&buf, &mut sections); |
| 34 | 41 | ||
| 35 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); | 42 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); |
| 36 | for s in §ions { | 43 | for s in §ions { |
| 37 | char.insert(s.name.clone(), s.data.clone()); | 44 | char.insert(s.name.clone(), s.data.clone()); |
| 38 | } | 45 | } |
| 39 | 46 | ||
| 40 | println!("{:?}", char); | 47 | println!("{:?}", char); |
| 41 | } | 48 | } |
| 42 | } | 49 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index 0d8c6be..a4d6c14 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
| @@ -24,14 +24,14 @@ pub struct Section { | |||
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | impl Section { | 26 | impl Section { |
| 27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | 27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { |
| 28 | Section { | 28 | Section { |
| 29 | name: name.into(), | 29 | name: name.into(), |
| 30 | re: Regex::new(re).unwrap(), | 30 | re: Regex::new(re).unwrap(), |
| 31 | keys: groups, | 31 | keys: groups, |
| 32 | data: HashMap::new(), | 32 | data: HashMap::new(), |
| 33 | } | 33 | } |
| 34 | } | 34 | } |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
| @@ -39,11 +39,11 @@ pub fn split_sections(d: &str, s: &mut Vec<Section>) { | |||
| 39 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
| 40 | assert!(m.len() >= section.keys.len() + 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
| 41 | 41 | ||
| 42 | let mut idx = 0; | 42 | let mut idx = 0; |
| 43 | for key in §ion.keys { | 43 | for key in §ion.keys { |
| 44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | 44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); |
| 45 | idx += 1; | 45 | idx += 1; |
| 46 | } | 46 | } |
| 47 | } | 47 | } |
| 48 | } | 48 | } |
| 49 | } | 49 | } |
