From 361ea3a80a60bc6595a7a624b6cc5d71ddc8f6fc Mon Sep 17 00:00:00 2001 From: jan Date: Fri, 30 Sep 2016 14:30:31 +0200 Subject: jetzt werden die Sektionen aus einer config.yml rausgezogen. diff --git a/Cargo.lock b/Cargo.lock index 335938c..f7516cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,6 +4,7 @@ version = "0.1.0" dependencies = [ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -94,6 +95,11 @@ name = "winapi-build" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "yaml-rust" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [metadata] "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" @@ -107,3 +113,4 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" +"checksum yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ebfe12f475ad59be6178ebf004d51e682022496535994f8d23fd7ed31084598c" diff --git a/Cargo.toml b/Cargo.toml index 540d3a1..517a6b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,5 @@ authors = ["jan "] [dependencies] regex = "0.1" -walkdir = "0.1" \ No newline at end of file +walkdir = "0.1" +yaml-rust = "0.3" diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..07b5d03 --- /dev/null +++ b/config.yml @@ -0,0 +1,11 @@ +sections: + name: + pattern: (?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s? + groups: + - romaji + - japanese + image: + pattern: (?is)

.*.*?

View Full Size Image + groups: + - thumb + - full diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..fc8ee03 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,44 @@ +extern crate yaml_rust; +use self::yaml_rust::YamlLoader; + +use std::collections::HashMap; +use std::fs::File; +use std::io::prelude::*; + +pub struct SectionConfig { + pub pattern: String, + pub groups: Vec, +} + +pub struct Config { + pub sections: HashMap, +} + +impl Config { + pub fn from_file(p: &str) -> Self { + let mut f = File::open(p).unwrap(); + let mut buf = String::new(); + f.read_to_string(&mut buf).unwrap(); + let docs = YamlLoader::load_from_str(&buf).unwrap(); + + let doc = &docs[0]; + + println!("{:?}", doc); + + let mut sections: HashMap = HashMap::new(); + for (name, entry) in doc["sections"].as_hash().unwrap() { + sections.insert(name.as_str().unwrap().into(), + SectionConfig { + pattern: entry["pattern"].as_str().unwrap().into(), + groups: entry["groups"] + .as_vec() + .unwrap() + .into_iter() + .map(|v| v.as_str().unwrap().into()) + .collect(), + }); + } + + Config { sections: sections } + } +} diff --git a/src/main.rs b/src/main.rs index 97b3094..7d06fe7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ extern crate regex; +extern crate yaml_rust; extern crate walkdir; use walkdir::WalkDir; @@ -12,13 +13,19 @@ use std::collections::HashMap; mod pre_process; use pre_process::Section; +mod config; +use config::Config; + fn main() { let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); let base_path = Path::new(&raw_files); + let cfg = Config::from_file("config.yml"); + let mut sections: Vec

= vec![]; - sections.push(Section::new("name", r#"(?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s?"#, vec!["romaji".into(), "japanese".into()])); - sections.push(Section::new("image", r#"(?is)

.*.*?

View Full Size Image"#, vec!["thumb".into(), "full".into()])); + for (name, sec) in &cfg.sections { + sections.push(Section::new(&name, &sec.pattern, sec.groups.clone())); + } for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { let mut f = File::open(entry.path()).expect("could not open file"); @@ -32,11 +39,11 @@ fn main() { pre_process::split_sections(&buf, &mut sections); - let mut char: HashMap> = HashMap::new(); - for s in §ions { - char.insert(s.name.clone(), s.data.clone()); - } + let mut char: HashMap> = HashMap::new(); + for s in §ions { + char.insert(s.name.clone(), s.data.clone()); + } - println!("{:?}", char); + println!("{:?}", char); } } diff --git a/src/pre_process.rs b/src/pre_process.rs index 0d8c6be..a4d6c14 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs @@ -24,14 +24,14 @@ pub struct Section { } impl Section { - pub fn new(name: &str, re: &str, groups: Vec) -> Self { - Section { - name: name.into(), - re: Regex::new(re).unwrap(), - keys: groups, - data: HashMap::new(), - } - } + pub fn new(name: &str, re: &str, groups: Vec) -> Self { + Section { + name: name.into(), + re: Regex::new(re).unwrap(), + keys: groups, + data: HashMap::new(), + } + } } pub fn split_sections(d: &str, s: &mut Vec

) { @@ -39,11 +39,11 @@ pub fn split_sections(d: &str, s: &mut Vec
) { for m in section.re.captures_iter(d) { assert!(m.len() >= section.keys.len() + 1); - let mut idx = 0; - for key in §ion.keys { - section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); - idx += 1; - } + let mut idx = 0; + for key in §ion.keys { + section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); + idx += 1; + } } } } -- cgit v0.10.1