aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan <jan@ruken.pw>2016-09-26 10:22:38 (UTC)
committerjan <jan@ruken.pw>2016-09-26 10:22:38 (UTC)
commit23942b01ada4ef30a4bf183d90650ade56255ecc (patch)
treef223ff9e594665b5974e9ccf587baff3d407eeb8
parent75ac0507ce8142ab4105e4067debf79b63ac7e62 (diff)
sektions-parsing, character bilder werden derzeit ausgegeben
-rw-r--r--src/main.rs12
-rw-r--r--src/pre_process.rs17
2 files changed, 29 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
index 066a07a..b733e6f 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,11 +10,19 @@ use std::env;
10use std::path::{Path, PathBuf}; 10use std::path::{Path, PathBuf};
11 11
12mod pre_process; 12mod pre_process;
13use pre_process::Section;
13 14
14fn main() { 15fn main() {
15 let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); 16 let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into());
16 let base_path = Path::new(&raw_files); 17 let base_path = Path::new(&raw_files);
17 18
19 let mut sections: Vec<Section> = vec![];
20 sections.push(Section {
21 name: "image".into(),
22 re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(),
23 content: String::new(),
24 });
25
18 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { 26 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
19 let mut f = File::open(entry.path()).expect("could not open file"); 27 let mut f = File::open(entry.path()).expect("could not open file");
20 let mut buf = String::new(); 28 let mut buf = String::new();
@@ -24,5 +32,9 @@ fn main() {
24 } 32 }
25 33
26 let buf = pre_process::strip_irrelevant_content(&buf); 34 let buf = pre_process::strip_irrelevant_content(&buf);
35
36 pre_process::split_sections(&buf, &mut sections);
37
38 println!("{}", sections[0].content);
27 } 39 }
28} 40}
diff --git a/src/pre_process.rs b/src/pre_process.rs
index aa55bd8..d69cfce 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -1,3 +1,5 @@
1use super::regex::Regex;
2
1pub fn strip_irrelevant_content(s: &str) -> String { 3pub fn strip_irrelevant_content(s: &str) -> String {
2 let mut retn = ""; 4 let mut retn = "";
3 match s.find(r#"<div class=profile id=profile>"#) { 5 match s.find(r#"<div class=profile id=profile>"#) {
@@ -10,4 +12,19 @@ pub fn strip_irrelevant_content(s: &str) -> String {
10 None => (), 12 None => (),
11 }; 13 };
12 return retn.into(); 14 return retn.into();
15}
16
17pub struct Section {
18 pub name: String,
19 pub re: Regex,
20 pub content: String,
21}
22
23pub fn split_sections(d: &str, s: &mut Vec<Section>) {
24 for section in s {
25 for m in section.re.captures_iter(d) {
26 assert!(m.len() > 1);
27 section.content = format!("{}", m.at(1).unwrap());
28 }
29 }
13} \ No newline at end of file 30} \ No newline at end of file