diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/config.rs | 44 | ||||
-rw-r--r-- | src/main.rs | 21 | ||||
-rw-r--r-- | src/pre_process.rs | 26 |
3 files changed, 71 insertions, 20 deletions
diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..fc8ee03 --- /dev/null +++ b/src/config.rs | |||
@@ -0,0 +1,44 @@ | |||
1 | extern crate yaml_rust; | ||
2 | use self::yaml_rust::YamlLoader; | ||
3 | |||
4 | use std::collections::HashMap; | ||
5 | use std::fs::File; | ||
6 | use std::io::prelude::*; | ||
7 | |||
8 | pub struct SectionConfig { | ||
9 | pub pattern: String, | ||
10 | pub groups: Vec<String>, | ||
11 | } | ||
12 | |||
13 | pub struct Config { | ||
14 | pub sections: HashMap<String, SectionConfig>, | ||
15 | } | ||
16 | |||
17 | impl Config { | ||
18 | pub fn from_file(p: &str) -> Self { | ||
19 | let mut f = File::open(p).unwrap(); | ||
20 | let mut buf = String::new(); | ||
21 | f.read_to_string(&mut buf).unwrap(); | ||
22 | let docs = YamlLoader::load_from_str(&buf).unwrap(); | ||
23 | |||
24 | let doc = &docs[0]; | ||
25 | |||
26 | println!("{:?}", doc); | ||
27 | |||
28 | let mut sections: HashMap<String, SectionConfig> = HashMap::new(); | ||
29 | for (name, entry) in doc["sections"].as_hash().unwrap() { | ||
30 | sections.insert(name.as_str().unwrap().into(), | ||
31 | SectionConfig { | ||
32 | pattern: entry["pattern"].as_str().unwrap().into(), | ||
33 | groups: entry["groups"] | ||
34 | .as_vec() | ||
35 | .unwrap() | ||
36 | .into_iter() | ||
37 | .map(|v| v.as_str().unwrap().into()) | ||
38 | .collect(), | ||
39 | }); | ||
40 | } | ||
41 | |||
42 | Config { sections: sections } | ||
43 | } | ||
44 | } | ||
diff --git a/src/main.rs b/src/main.rs index 97b3094..7d06fe7 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,4 +1,5 @@ | |||
1 | extern crate regex; | 1 | extern crate regex; |
2 | extern crate yaml_rust; | ||
2 | 3 | ||
3 | extern crate walkdir; | 4 | extern crate walkdir; |
4 | use walkdir::WalkDir; | 5 | use walkdir::WalkDir; |
@@ -12,13 +13,19 @@ use std::collections::HashMap; | |||
12 | mod pre_process; | 13 | mod pre_process; |
13 | use pre_process::Section; | 14 | use pre_process::Section; |
14 | 15 | ||
16 | mod config; | ||
17 | use config::Config; | ||
18 | |||
15 | fn main() { | 19 | fn main() { |
16 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 20 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
17 | let base_path = Path::new(&raw_files); | 21 | let base_path = Path::new(&raw_files); |
18 | 22 | ||
23 | let cfg = Config::from_file("config.yml"); | ||
24 | |||
19 | let mut sections: Vec<Section> = vec![]; | 25 | let mut sections: Vec<Section> = vec![]; |
20 | sections.push(Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>"#, vec!["romaji".into(), "japanese".into()])); | 26 | for (name, sec) in &cfg.sections { |
21 | sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); | 27 | sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); |
28 | } | ||
22 | 29 | ||
23 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 30 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
24 | let mut f = File::open(entry.path()).expect("could not open file"); | 31 | let mut f = File::open(entry.path()).expect("could not open file"); |
@@ -32,11 +39,11 @@ fn main() { | |||
32 | 39 | ||
33 | pre_process::split_sections(&buf, &mut sections); | 40 | pre_process::split_sections(&buf, &mut sections); |
34 | 41 | ||
35 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); | 42 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); |
36 | for s in §ions { | 43 | for s in §ions { |
37 | char.insert(s.name.clone(), s.data.clone()); | 44 | char.insert(s.name.clone(), s.data.clone()); |
38 | } | 45 | } |
39 | 46 | ||
40 | println!("{:?}", char); | 47 | println!("{:?}", char); |
41 | } | 48 | } |
42 | } | 49 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index 0d8c6be..a4d6c14 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -24,14 +24,14 @@ pub struct Section { | |||
24 | } | 24 | } |
25 | 25 | ||
26 | impl Section { | 26 | impl Section { |
27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | 27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { |
28 | Section { | 28 | Section { |
29 | name: name.into(), | 29 | name: name.into(), |
30 | re: Regex::new(re).unwrap(), | 30 | re: Regex::new(re).unwrap(), |
31 | keys: groups, | 31 | keys: groups, |
32 | data: HashMap::new(), | 32 | data: HashMap::new(), |
33 | } | 33 | } |
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
@@ -39,11 +39,11 @@ pub fn split_sections(d: &str, s: &mut Vec<Section>) { | |||
39 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
40 | assert!(m.len() >= section.keys.len() + 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
41 | 41 | ||
42 | let mut idx = 0; | 42 | let mut idx = 0; |
43 | for key in §ion.keys { | 43 | for key in §ion.keys { |
44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | 44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); |
45 | idx += 1; | 45 | idx += 1; |
46 | } | 46 | } |
47 | } | 47 | } |
48 | } | 48 | } |
49 | } | 49 | } |