aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml3
-rw-r--r--config.yml11
-rw-r--r--src/config.rs44
-rw-r--r--src/main.rs21
-rw-r--r--src/pre_process.rs26
6 files changed, 91 insertions, 21 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 335938c..f7516cb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,6 +4,7 @@ version = "0.1.0"
4dependencies = [ 4dependencies = [
5 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", 5 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
6 "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", 6 "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
7 "yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
7] 8]
8 9
9[[package]] 10[[package]]
@@ -94,6 +95,11 @@ name = "winapi-build"
94version = "0.1.1" 95version = "0.1.1"
95source = "registry+https://github.com/rust-lang/crates.io-index" 96source = "registry+https://github.com/rust-lang/crates.io-index"
96 97
98[[package]]
99name = "yaml-rust"
100version = "0.3.3"
101source = "registry+https://github.com/rust-lang/crates.io-index"
102
97[metadata] 103[metadata]
98"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" 104"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
99"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" 105"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
@@ -107,3 +113,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
107"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" 113"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
108"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" 114"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
109"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" 115"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
116"checksum yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ebfe12f475ad59be6178ebf004d51e682022496535994f8d23fd7ed31084598c"
diff --git a/Cargo.toml b/Cargo.toml
index 540d3a1..517a6b1 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,4 +5,5 @@ authors = ["jan <jan@ruken.pw>"]
5 5
6[dependencies] 6[dependencies]
7regex = "0.1" 7regex = "0.1"
8walkdir = "0.1" \ No newline at end of file 8walkdir = "0.1"
9yaml-rust = "0.3"
diff --git a/config.yml b/config.yml
new file mode 100644
index 0000000..07b5d03
--- /dev/null
+++ b/config.yml
@@ -0,0 +1,11 @@
1sections:
2 name:
3 pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>
4 groups:
5 - romaji
6 - japanese
7 image:
8 pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image
9 groups:
10 - thumb
11 - full
diff --git a/src/config.rs b/src/config.rs
new file mode 100644
index 0000000..fc8ee03
--- /dev/null
+++ b/src/config.rs
@@ -0,0 +1,44 @@
1extern crate yaml_rust;
2use self::yaml_rust::YamlLoader;
3
4use std::collections::HashMap;
5use std::fs::File;
6use std::io::prelude::*;
7
8pub struct SectionConfig {
9 pub pattern: String,
10 pub groups: Vec<String>,
11}
12
13pub struct Config {
14 pub sections: HashMap<String, SectionConfig>,
15}
16
17impl Config {
18 pub fn from_file(p: &str) -> Self {
19 let mut f = File::open(p).unwrap();
20 let mut buf = String::new();
21 f.read_to_string(&mut buf).unwrap();
22 let docs = YamlLoader::load_from_str(&buf).unwrap();
23
24 let doc = &docs[0];
25
26 println!("{:?}", doc);
27
28 let mut sections: HashMap<String, SectionConfig> = HashMap::new();
29 for (name, entry) in doc["sections"].as_hash().unwrap() {
30 sections.insert(name.as_str().unwrap().into(),
31 SectionConfig {
32 pattern: entry["pattern"].as_str().unwrap().into(),
33 groups: entry["groups"]
34 .as_vec()
35 .unwrap()
36 .into_iter()
37 .map(|v| v.as_str().unwrap().into())
38 .collect(),
39 });
40 }
41
42 Config { sections: sections }
43 }
44}
diff --git a/src/main.rs b/src/main.rs
index 97b3094..7d06fe7 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
1extern crate regex; 1extern crate regex;
2extern crate yaml_rust;
2 3
3extern crate walkdir; 4extern crate walkdir;
4use walkdir::WalkDir; 5use walkdir::WalkDir;
@@ -12,13 +13,19 @@ use std::collections::HashMap;
12mod pre_process; 13mod pre_process;
13use pre_process::Section; 14use pre_process::Section;
14 15
16mod config;
17use config::Config;
18
15fn main() { 19fn main() {
16 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); 20 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
17 let base_path = Path::new(&raw_files); 21 let base_path = Path::new(&raw_files);
18 22
23 let cfg = Config::from_file("config.yml");
24
19 let mut sections: Vec<Section> = vec![]; 25 let mut sections: Vec<Section> = vec![];
20 sections.push(Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>"#, vec!["romaji".into(), "japanese".into()])); 26 for (name, sec) in &cfg.sections {
21 sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); 27 sections.push(Section::new(&name, &sec.pattern, sec.groups.clone()));
28 }
22 29
23 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { 30 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
24 let mut f = File::open(entry.path()).expect("could not open file"); 31 let mut f = File::open(entry.path()).expect("could not open file");
@@ -32,11 +39,11 @@ fn main() {
32 39
33 pre_process::split_sections(&buf, &mut sections); 40 pre_process::split_sections(&buf, &mut sections);
34 41
35 let mut char: HashMap<String, HashMap<String, String>> = HashMap::new(); 42 let mut char: HashMap<String, HashMap<String, String>> = HashMap::new();
36 for s in &sections { 43 for s in &sections {
37 char.insert(s.name.clone(), s.data.clone()); 44 char.insert(s.name.clone(), s.data.clone());
38 } 45 }
39 46
40 println!("{:?}", char); 47 println!("{:?}", char);
41 } 48 }
42} 49}
diff --git a/src/pre_process.rs b/src/pre_process.rs
index 0d8c6be..a4d6c14 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -24,14 +24,14 @@ pub struct Section {
24} 24}
25 25
26impl Section { 26impl Section {
27 pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { 27 pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self {
28 Section { 28 Section {
29 name: name.into(), 29 name: name.into(),
30 re: Regex::new(re).unwrap(), 30 re: Regex::new(re).unwrap(),
31 keys: groups, 31 keys: groups,
32 data: HashMap::new(), 32 data: HashMap::new(),
33 } 33 }
34 } 34 }
35} 35}
36 36
37pub fn split_sections(d: &str, s: &mut Vec<Section>) { 37pub fn split_sections(d: &str, s: &mut Vec<Section>) {
@@ -39,11 +39,11 @@ pub fn split_sections(d: &str, s: &mut Vec<Section>) {
39 for m in section.re.captures_iter(d) { 39 for m in section.re.captures_iter(d) {
40 assert!(m.len() >= section.keys.len() + 1); 40 assert!(m.len() >= section.keys.len() + 1);
41 41
42 let mut idx = 0; 42 let mut idx = 0;
43 for key in &section.keys { 43 for key in &section.keys {
44 section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); 44 section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
45 idx += 1; 45 idx += 1;
46 } 46 }
47 } 47 }
48 } 48 }
49} 49}