diff options
author | jan <jan@ruken.pw> | 2016-09-26 10:22:38 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-26 10:22:38 (UTC) |
commit | 23942b01ada4ef30a4bf183d90650ade56255ecc (patch) | |
tree | f223ff9e594665b5974e9ccf587baff3d407eeb8 | |
parent | 75ac0507ce8142ab4105e4067debf79b63ac7e62 (diff) |
sektions-parsing, character bilder werden derzeit ausgegeben
-rw-r--r-- | src/main.rs | 12 | ||||
-rw-r--r-- | src/pre_process.rs | 17 |
2 files changed, 29 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs index 066a07a..b733e6f 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -10,11 +10,19 @@ use std::env; | |||
10 | use std::path::{Path, PathBuf}; | 10 | use std::path::{Path, PathBuf}; |
11 | 11 | ||
12 | mod pre_process; | 12 | mod pre_process; |
13 | use pre_process::Section; | ||
13 | 14 | ||
14 | fn main() { | 15 | fn main() { |
15 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 16 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); |
16 | let base_path = Path::new(&raw_files); | 17 | let base_path = Path::new(&raw_files); |
17 | 18 | ||
19 | let mut sections: Vec<Section> = vec![]; | ||
20 | sections.push(Section { | ||
21 | name: "image".into(), | ||
22 | re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(), | ||
23 | content: String::new(), | ||
24 | }); | ||
25 | |||
18 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 26 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
19 | let mut f = File::open(entry.path()).expect("could not open file"); | 27 | let mut f = File::open(entry.path()).expect("could not open file"); |
20 | let mut buf = String::new(); | 28 | let mut buf = String::new(); |
@@ -24,5 +32,9 @@ fn main() { | |||
24 | } | 32 | } |
25 | 33 | ||
26 | let buf = pre_process::strip_irrelevant_content(&buf); | 34 | let buf = pre_process::strip_irrelevant_content(&buf); |
35 | |||
36 | pre_process::split_sections(&buf, &mut sections); | ||
37 | |||
38 | println!("{}", sections[0].content); | ||
27 | } | 39 | } |
28 | } | 40 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index aa55bd8..d69cfce 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -1,3 +1,5 @@ | |||
1 | use super::regex::Regex; | ||
2 | |||
1 | pub fn strip_irrelevant_content(s: &str) -> String { | 3 | pub fn strip_irrelevant_content(s: &str) -> String { |
2 | let mut retn = ""; | 4 | let mut retn = ""; |
3 | match s.find(r#"<div class=profile id=profile>"#) { | 5 | match s.find(r#"<div class=profile id=profile>"#) { |
@@ -10,4 +12,19 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
10 | None => (), | 12 | None => (), |
11 | }; | 13 | }; |
12 | return retn.into(); | 14 | return retn.into(); |
15 | } | ||
16 | |||
17 | pub struct Section { | ||
18 | pub name: String, | ||
19 | pub re: Regex, | ||
20 | pub content: String, | ||
21 | } | ||
22 | |||
23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | ||
24 | for section in s { | ||
25 | for m in section.re.captures_iter(d) { | ||
26 | assert!(m.len() > 1); | ||
27 | section.content = format!("{}", m.at(1).unwrap()); | ||
28 | } | ||
29 | } | ||
13 | } \ No newline at end of file | 30 | } \ No newline at end of file |