diff options
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | Cargo.toml | 3 | ||||
-rw-r--r-- | config.yml | 11 | ||||
-rw-r--r-- | src/config.rs | 44 | ||||
-rw-r--r-- | src/main.rs | 21 | ||||
-rw-r--r-- | src/pre_process.rs | 26 |
6 files changed, 91 insertions, 21 deletions
@@ -4,6 +4,7 @@ version = "0.1.0" | |||
4 | dependencies = [ | 4 | dependencies = [ |
5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | 5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", |
6 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", | 6 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", |
7 | "yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
7 | ] | 8 | ] |
8 | 9 | ||
9 | [[package]] | 10 | [[package]] |
@@ -94,6 +95,11 @@ name = "winapi-build" | |||
94 | version = "0.1.1" | 95 | version = "0.1.1" |
95 | source = "registry+https://github.com/rust-lang/crates.io-index" | 96 | source = "registry+https://github.com/rust-lang/crates.io-index" |
96 | 97 | ||
98 | [[package]] | ||
99 | name = "yaml-rust" | ||
100 | version = "0.3.3" | ||
101 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
102 | |||
97 | [metadata] | 103 | [metadata] |
98 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | 104 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" |
99 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | 105 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" |
@@ -107,3 +113,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
107 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" | 113 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" |
108 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | 114 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" |
109 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | 115 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" |
116 | "checksum yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ebfe12f475ad59be6178ebf004d51e682022496535994f8d23fd7ed31084598c" | ||
@@ -5,4 +5,5 @@ authors = ["jan <jan@ruken.pw>"] | |||
5 | 5 | ||
6 | [dependencies] | 6 | [dependencies] |
7 | regex = "0.1" | 7 | regex = "0.1" |
8 | walkdir = "0.1" \ No newline at end of file | 8 | walkdir = "0.1" |
9 | yaml-rust = "0.3" | ||
diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..07b5d03 --- /dev/null +++ b/config.yml | |||
@@ -0,0 +1,11 @@ | |||
1 | sections: | ||
2 | name: | ||
3 | pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD> | ||
4 | groups: | ||
5 | - romaji | ||
6 | - japanese | ||
7 | image: | ||
8 | pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image | ||
9 | groups: | ||
10 | - thumb | ||
11 | - full | ||
diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..fc8ee03 --- /dev/null +++ b/src/config.rs | |||
@@ -0,0 +1,44 @@ | |||
1 | extern crate yaml_rust; | ||
2 | use self::yaml_rust::YamlLoader; | ||
3 | |||
4 | use std::collections::HashMap; | ||
5 | use std::fs::File; | ||
6 | use std::io::prelude::*; | ||
7 | |||
8 | pub struct SectionConfig { | ||
9 | pub pattern: String, | ||
10 | pub groups: Vec<String>, | ||
11 | } | ||
12 | |||
13 | pub struct Config { | ||
14 | pub sections: HashMap<String, SectionConfig>, | ||
15 | } | ||
16 | |||
17 | impl Config { | ||
18 | pub fn from_file(p: &str) -> Self { | ||
19 | let mut f = File::open(p).unwrap(); | ||
20 | let mut buf = String::new(); | ||
21 | f.read_to_string(&mut buf).unwrap(); | ||
22 | let docs = YamlLoader::load_from_str(&buf).unwrap(); | ||
23 | |||
24 | let doc = &docs[0]; | ||
25 | |||
26 | println!("{:?}", doc); | ||
27 | |||
28 | let mut sections: HashMap<String, SectionConfig> = HashMap::new(); | ||
29 | for (name, entry) in doc["sections"].as_hash().unwrap() { | ||
30 | sections.insert(name.as_str().unwrap().into(), | ||
31 | SectionConfig { | ||
32 | pattern: entry["pattern"].as_str().unwrap().into(), | ||
33 | groups: entry["groups"] | ||
34 | .as_vec() | ||
35 | .unwrap() | ||
36 | .into_iter() | ||
37 | .map(|v| v.as_str().unwrap().into()) | ||
38 | .collect(), | ||
39 | }); | ||
40 | } | ||
41 | |||
42 | Config { sections: sections } | ||
43 | } | ||
44 | } | ||
diff --git a/src/main.rs b/src/main.rs index 97b3094..7d06fe7 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,4 +1,5 @@ | |||
1 | extern crate regex; | 1 | extern crate regex; |
2 | extern crate yaml_rust; | ||
2 | 3 | ||
3 | extern crate walkdir; | 4 | extern crate walkdir; |
4 | use walkdir::WalkDir; | 5 | use walkdir::WalkDir; |
@@ -12,13 +13,19 @@ use std::collections::HashMap; | |||
12 | mod pre_process; | 13 | mod pre_process; |
13 | use pre_process::Section; | 14 | use pre_process::Section; |
14 | 15 | ||
16 | mod config; | ||
17 | use config::Config; | ||
18 | |||
15 | fn main() { | 19 | fn main() { |
16 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 20 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
17 | let base_path = Path::new(&raw_files); | 21 | let base_path = Path::new(&raw_files); |
18 | 22 | ||
23 | let cfg = Config::from_file("config.yml"); | ||
24 | |||
19 | let mut sections: Vec<Section> = vec![]; | 25 | let mut sections: Vec<Section> = vec![]; |
20 | sections.push(Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>"#, vec!["romaji".into(), "japanese".into()])); | 26 | for (name, sec) in &cfg.sections { |
21 | sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); | 27 | sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); |
28 | } | ||
22 | 29 | ||
23 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 30 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
24 | let mut f = File::open(entry.path()).expect("could not open file"); | 31 | let mut f = File::open(entry.path()).expect("could not open file"); |
@@ -32,11 +39,11 @@ fn main() { | |||
32 | 39 | ||
33 | pre_process::split_sections(&buf, &mut sections); | 40 | pre_process::split_sections(&buf, &mut sections); |
34 | 41 | ||
35 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); | 42 | let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); |
36 | for s in §ions { | 43 | for s in §ions { |
37 | char.insert(s.name.clone(), s.data.clone()); | 44 | char.insert(s.name.clone(), s.data.clone()); |
38 | } | 45 | } |
39 | 46 | ||
40 | println!("{:?}", char); | 47 | println!("{:?}", char); |
41 | } | 48 | } |
42 | } | 49 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index 0d8c6be..a4d6c14 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -24,14 +24,14 @@ pub struct Section { | |||
24 | } | 24 | } |
25 | 25 | ||
26 | impl Section { | 26 | impl Section { |
27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | 27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { |
28 | Section { | 28 | Section { |
29 | name: name.into(), | 29 | name: name.into(), |
30 | re: Regex::new(re).unwrap(), | 30 | re: Regex::new(re).unwrap(), |
31 | keys: groups, | 31 | keys: groups, |
32 | data: HashMap::new(), | 32 | data: HashMap::new(), |
33 | } | 33 | } |
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
@@ -39,11 +39,11 @@ pub fn split_sections(d: &str, s: &mut Vec<Section>) { | |||
39 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
40 | assert!(m.len() >= section.keys.len() + 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
41 | 41 | ||
42 | let mut idx = 0; | 42 | let mut idx = 0; |
43 | for key in §ion.keys { | 43 | for key in §ion.keys { |
44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | 44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); |
45 | idx += 1; | 45 | idx += 1; |
46 | } | 46 | } |
47 | } | 47 | } |
48 | } | 48 | } |
49 | } | 49 | } |