diff options
| author | jan <jan@ruken.pw> | 2016-09-29 11:10:20 (UTC) |
|---|---|---|
| committer | jan <jan@ruken.pw> | 2016-09-29 11:10:20 (UTC) |
| commit | 9e59cd7e6a21751420ccbb853ac883154c6e578e (patch) | |
| tree | f8497b7f12634985fb3409b9ab3343a5c0285f9f /src | |
| parent | 23942b01ada4ef30a4bf183d90650ade56255ecc (diff) | |
dynamischere sektionssuche, jetzt auch mit thumb images. koennten wir eigentlich in irgendeine config-datei auslagern.
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 10 | ||||
| -rw-r--r-- | src/pre_process.rs | 27 |
2 files changed, 26 insertions, 11 deletions
diff --git a/src/main.rs b/src/main.rs index b733e6f..2123c6b 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -13,15 +13,11 @@ mod pre_process; | |||
| 13 | use pre_process::Section; | 13 | use pre_process::Section; |
| 14 | 14 | ||
| 15 | fn main() { | 15 | fn main() { |
| 16 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 16 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
| 17 | let base_path = Path::new(&raw_files); | 17 | let base_path = Path::new(&raw_files); |
| 18 | 18 | ||
| 19 | let mut sections: Vec<Section> = vec![]; | 19 | let mut sections: Vec<Section> = vec![]; |
| 20 | sections.push(Section { | 20 | sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); |
| 21 | name: "image".into(), | ||
| 22 | re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(), | ||
| 23 | content: String::new(), | ||
| 24 | }); | ||
| 25 | 21 | ||
| 26 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 22 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
| 27 | let mut f = File::open(entry.path()).expect("could not open file"); | 23 | let mut f = File::open(entry.path()).expect("could not open file"); |
| @@ -35,6 +31,6 @@ fn main() { | |||
| 35 | 31 | ||
| 36 | pre_process::split_sections(&buf, &mut sections); | 32 | pre_process::split_sections(&buf, &mut sections); |
| 37 | 33 | ||
| 38 | println!("{}", sections[0].content); | 34 | println!("{:?}", sections[0].data); |
| 39 | } | 35 | } |
| 40 | } | 36 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index d69cfce..0d8c6be 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
| @@ -1,5 +1,7 @@ | |||
| 1 | use super::regex::Regex; | 1 | use super::regex::Regex; |
| 2 | 2 | ||
| 3 | use std::collections::HashMap; | ||
| 4 | |||
| 3 | pub fn strip_irrelevant_content(s: &str) -> String { | 5 | pub fn strip_irrelevant_content(s: &str) -> String { |
| 4 | let mut retn = ""; | 6 | let mut retn = ""; |
| 5 | match s.find(r#"<div class=profile id=profile>"#) { | 7 | match s.find(r#"<div class=profile id=profile>"#) { |
| @@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
| 17 | pub struct Section { | 19 | pub struct Section { |
| 18 | pub name: String, | 20 | pub name: String, |
| 19 | pub re: Regex, | 21 | pub re: Regex, |
| 20 | pub content: String, | 22 | pub keys: Vec<String>, |
| 23 | pub data: HashMap<String, String>, | ||
| 24 | } | ||
| 25 | |||
| 26 | impl Section { | ||
| 27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | ||
| 28 | Section { | ||
| 29 | name: name.into(), | ||
| 30 | re: Regex::new(re).unwrap(), | ||
| 31 | keys: groups, | ||
| 32 | data: HashMap::new(), | ||
| 33 | } | ||
| 34 | } | ||
| 21 | } | 35 | } |
| 22 | 36 | ||
| 23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
| 24 | for section in s { | 38 | for section in s { |
| 25 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
| 26 | assert!(m.len() > 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
| 27 | section.content = format!("{}", m.at(1).unwrap()); | 41 | |
| 42 | let mut idx = 0; | ||
| 43 | for key in §ion.keys { | ||
| 44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | ||
| 45 | idx += 1; | ||
| 46 | } | ||
| 28 | } | 47 | } |
| 29 | } | 48 | } |
| 30 | } \ No newline at end of file | 49 | } |
