diff options
author | jan <jan@ruken.pw> | 2016-09-29 11:10:20 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-29 11:10:20 (UTC) |
commit | 9e59cd7e6a21751420ccbb853ac883154c6e578e (patch) | |
tree | f8497b7f12634985fb3409b9ab3343a5c0285f9f | |
parent | 23942b01ada4ef30a4bf183d90650ade56255ecc (diff) |
dynamischere sektionssuche, jetzt auch mit thumb images. koennten wir eigentlich in irgendeine config-datei auslagern.
-rw-r--r-- | src/main.rs | 10 | ||||
-rw-r--r-- | src/pre_process.rs | 27 |
2 files changed, 26 insertions, 11 deletions
diff --git a/src/main.rs b/src/main.rs index b733e6f..2123c6b 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -13,15 +13,11 @@ mod pre_process; | |||
13 | use pre_process::Section; | 13 | use pre_process::Section; |
14 | 14 | ||
15 | fn main() { | 15 | fn main() { |
16 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 16 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
17 | let base_path = Path::new(&raw_files); | 17 | let base_path = Path::new(&raw_files); |
18 | 18 | ||
19 | let mut sections: Vec<Section> = vec![]; | 19 | let mut sections: Vec<Section> = vec![]; |
20 | sections.push(Section { | 20 | sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()])); |
21 | name: "image".into(), | ||
22 | re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(), | ||
23 | content: String::new(), | ||
24 | }); | ||
25 | 21 | ||
26 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 22 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
27 | let mut f = File::open(entry.path()).expect("could not open file"); | 23 | let mut f = File::open(entry.path()).expect("could not open file"); |
@@ -35,6 +31,6 @@ fn main() { | |||
35 | 31 | ||
36 | pre_process::split_sections(&buf, &mut sections); | 32 | pre_process::split_sections(&buf, &mut sections); |
37 | 33 | ||
38 | println!("{}", sections[0].content); | 34 | println!("{:?}", sections[0].data); |
39 | } | 35 | } |
40 | } | 36 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index d69cfce..0d8c6be 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -1,5 +1,7 @@ | |||
1 | use super::regex::Regex; | 1 | use super::regex::Regex; |
2 | 2 | ||
3 | use std::collections::HashMap; | ||
4 | |||
3 | pub fn strip_irrelevant_content(s: &str) -> String { | 5 | pub fn strip_irrelevant_content(s: &str) -> String { |
4 | let mut retn = ""; | 6 | let mut retn = ""; |
5 | match s.find(r#"<div class=profile id=profile>"#) { | 7 | match s.find(r#"<div class=profile id=profile>"#) { |
@@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
17 | pub struct Section { | 19 | pub struct Section { |
18 | pub name: String, | 20 | pub name: String, |
19 | pub re: Regex, | 21 | pub re: Regex, |
20 | pub content: String, | 22 | pub keys: Vec<String>, |
23 | pub data: HashMap<String, String>, | ||
24 | } | ||
25 | |||
26 | impl Section { | ||
27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | ||
28 | Section { | ||
29 | name: name.into(), | ||
30 | re: Regex::new(re).unwrap(), | ||
31 | keys: groups, | ||
32 | data: HashMap::new(), | ||
33 | } | ||
34 | } | ||
21 | } | 35 | } |
22 | 36 | ||
23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
24 | for section in s { | 38 | for section in s { |
25 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
26 | assert!(m.len() > 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
27 | section.content = format!("{}", m.at(1).unwrap()); | 41 | |
42 | let mut idx = 0; | ||
43 | for key in §ion.keys { | ||
44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | ||
45 | idx += 1; | ||
46 | } | ||
28 | } | 47 | } |
29 | } | 48 | } |
30 | } \ No newline at end of file | 49 | } |