diff options
| -rw-r--r-- | src/main.rs | 12 | ||||
| -rw-r--r-- | src/pre_process.rs | 17 | 
2 files changed, 29 insertions, 0 deletions
| diff --git a/src/main.rs b/src/main.rs index 066a07a..b733e6f 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -10,11 +10,19 @@ use std::env; | |||
| 10 | use std::path::{Path, PathBuf}; | 10 | use std::path::{Path, PathBuf}; | 
| 11 | 11 | ||
| 12 | mod pre_process; | 12 | mod pre_process; | 
| 13 | use pre_process::Section; | ||
| 13 | 14 | ||
| 14 | fn main() { | 15 | fn main() { | 
| 15 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 16 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | 
| 16 | let base_path = Path::new(&raw_files); | 17 | let base_path = Path::new(&raw_files); | 
| 17 | 18 | ||
| 19 | let mut sections: Vec<Section> = vec![]; | ||
| 20 | sections.push(Section { | ||
| 21 | name: "image".into(), | ||
| 22 | re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(), | ||
| 23 | content: String::new(), | ||
| 24 | }); | ||
| 25 | |||
| 18 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 26 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 
| 19 | let mut f = File::open(entry.path()).expect("could not open file"); | 27 | let mut f = File::open(entry.path()).expect("could not open file"); | 
| 20 | let mut buf = String::new(); | 28 | let mut buf = String::new(); | 
| @@ -24,5 +32,9 @@ fn main() { | |||
| 24 | } | 32 | } | 
| 25 | 33 | ||
| 26 | let buf = pre_process::strip_irrelevant_content(&buf); | 34 | let buf = pre_process::strip_irrelevant_content(&buf); | 
| 35 | |||
| 36 | pre_process::split_sections(&buf, &mut sections); | ||
| 37 | |||
| 38 | println!("{}", sections[0].content); | ||
| 27 | } | 39 | } | 
| 28 | } | 40 | } | 
| diff --git a/src/pre_process.rs b/src/pre_process.rs index aa55bd8..d69cfce 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
| @@ -1,3 +1,5 @@ | |||
| 1 | use super::regex::Regex; | ||
| 2 | |||
| 1 | pub fn strip_irrelevant_content(s: &str) -> String { | 3 | pub fn strip_irrelevant_content(s: &str) -> String { | 
| 2 | let mut retn = ""; | 4 | let mut retn = ""; | 
| 3 | match s.find(r#"<div class=profile id=profile>"#) { | 5 | match s.find(r#"<div class=profile id=profile>"#) { | 
| @@ -10,4 +12,19 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
| 10 | None => (), | 12 | None => (), | 
| 11 | }; | 13 | }; | 
| 12 | return retn.into(); | 14 | return retn.into(); | 
| 15 | } | ||
| 16 | |||
| 17 | pub struct Section { | ||
| 18 | pub name: String, | ||
| 19 | pub re: Regex, | ||
| 20 | pub content: String, | ||
| 21 | } | ||
| 22 | |||
| 23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | ||
| 24 | for section in s { | ||
| 25 | for m in section.re.captures_iter(d) { | ||
| 26 | assert!(m.len() > 1); | ||
| 27 | section.content = format!("{}", m.at(1).unwrap()); | ||
| 28 | } | ||
| 29 | } | ||
| 13 | } \ No newline at end of file | 30 | } \ No newline at end of file | 
