diff options
| author | jan <jan@ruken.pw> | 2016-09-26 10:22:38 (UTC) |
|---|---|---|
| committer | jan <jan@ruken.pw> | 2016-09-26 10:22:38 (UTC) |
| commit | 23942b01ada4ef30a4bf183d90650ade56255ecc (patch) | |
| tree | f223ff9e594665b5974e9ccf587baff3d407eeb8 /src | |
| parent | 75ac0507ce8142ab4105e4067debf79b63ac7e62 (diff) | |
sektions-parsing, character bilder werden derzeit ausgegeben
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 12 | ||||
| -rw-r--r-- | src/pre_process.rs | 17 |
2 files changed, 29 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs index 066a07a..b733e6f 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -10,11 +10,19 @@ use std::env; | |||
| 10 | use std::path::{Path, PathBuf}; | 10 | use std::path::{Path, PathBuf}; |
| 11 | 11 | ||
| 12 | mod pre_process; | 12 | mod pre_process; |
| 13 | use pre_process::Section; | ||
| 13 | 14 | ||
| 14 | fn main() { | 15 | fn main() { |
| 15 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 16 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); |
| 16 | let base_path = Path::new(&raw_files); | 17 | let base_path = Path::new(&raw_files); |
| 17 | 18 | ||
| 19 | let mut sections: Vec<Section> = vec![]; | ||
| 20 | sections.push(Section { | ||
| 21 | name: "image".into(), | ||
| 22 | re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(), | ||
| 23 | content: String::new(), | ||
| 24 | }); | ||
| 25 | |||
| 18 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 26 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
| 19 | let mut f = File::open(entry.path()).expect("could not open file"); | 27 | let mut f = File::open(entry.path()).expect("could not open file"); |
| 20 | let mut buf = String::new(); | 28 | let mut buf = String::new(); |
| @@ -24,5 +32,9 @@ fn main() { | |||
| 24 | } | 32 | } |
| 25 | 33 | ||
| 26 | let buf = pre_process::strip_irrelevant_content(&buf); | 34 | let buf = pre_process::strip_irrelevant_content(&buf); |
| 35 | |||
| 36 | pre_process::split_sections(&buf, &mut sections); | ||
| 37 | |||
| 38 | println!("{}", sections[0].content); | ||
| 27 | } | 39 | } |
| 28 | } | 40 | } |
diff --git a/src/pre_process.rs b/src/pre_process.rs index aa55bd8..d69cfce 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | use super::regex::Regex; | ||
| 2 | |||
| 1 | pub fn strip_irrelevant_content(s: &str) -> String { | 3 | pub fn strip_irrelevant_content(s: &str) -> String { |
| 2 | let mut retn = ""; | 4 | let mut retn = ""; |
| 3 | match s.find(r#"<div class=profile id=profile>"#) { | 5 | match s.find(r#"<div class=profile id=profile>"#) { |
| @@ -10,4 +12,19 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
| 10 | None => (), | 12 | None => (), |
| 11 | }; | 13 | }; |
| 12 | return retn.into(); | 14 | return retn.into(); |
| 15 | } | ||
| 16 | |||
| 17 | pub struct Section { | ||
| 18 | pub name: String, | ||
| 19 | pub re: Regex, | ||
| 20 | pub content: String, | ||
| 21 | } | ||
| 22 | |||
| 23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | ||
| 24 | for section in s { | ||
| 25 | for m in section.re.captures_iter(d) { | ||
| 26 | assert!(m.len() > 1); | ||
| 27 | section.content = format!("{}", m.at(1).unwrap()); | ||
| 28 | } | ||
| 29 | } | ||
| 13 | } \ No newline at end of file | 30 | } \ No newline at end of file |
