2 files changed, 26 insertions, 11 deletions
diff --git a/src/main.rs b/src/main.rs
index b733e6f..2123c6b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -13,15 +13,11 @@ mod pre_process;
 use pre_process::Section;
 fn main() {
-    let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into());
+    let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
    let base_path = Path::new(&raw_files);
    let mut sections: Vec<Section> = vec![];
-    sections.push(Section {
+    sections.push(Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb".into(), "full".into()]));
-        name: "image".into(),
-        re: Regex::new(r#"(?is)<H3 id="section99">.*<p><a href="(.*?)">View Full Size Image"#).unwrap(),
-        content: String::new(),
-    });
    for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
        let mut f = File::open(entry.path()).expect("could not open file");
@@ -35,6 +31,6 @@ fn main() {
        pre_process::split_sections(&buf, &mut sections);
-        println!("{}", sections[0].content);
+        println!("{:?}", sections[0].data);
    }
 }
diff --git a/src/pre_process.rs b/src/pre_process.rs
index d69cfce..0d8c6be 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -1,5 +1,7 @@
 use super::regex::Regex;
+use std::collections::HashMap;
 pub fn strip_irrelevant_content(s: &str) -> String {
    let mut retn = "";
    match s.find(r#"<div class=profile id=profile>"#) {
@@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String {
 pub struct Section {
    pub name: String,
    pub re: Regex,
-    pub content: String,
+    pub keys: Vec<String>,
+    pub data: HashMap<String, String>,
+}
+impl Section {
+        pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self {
+                Section {
+                        name: name.into(),
+                        re: Regex::new(re).unwrap(),
+                        keys: groups,
+                        data: HashMap::new(),
+                }
+        }
 }
 pub fn split_sections(d: &str, s: &mut Vec<Section>) {
    for section in s {
        for m in section.re.captures_iter(d) {
-            assert!(m.len() > 1);
+            assert!(m.len() >= section.keys.len() + 1);
-            section.content = format!("{}", m.at(1).unwrap());
+                        let mut idx = 0;
+                        for key in &section.keys {
+                                section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
+                                idx += 1;
+                        }
        }
    }
-}
-\ No newline at end of file
+}

diff --git a/src/main.rs b/src/main.rs index b733e6f..2123c6b 100644 --- a/src/main.rs +++ b/src/main.rs
@@ -13,15 +13,11 @@ mod pre_process;
13	use pre_process::Section;	13	use pre_process::Section;
14		14
15	fn main() {	15	fn main() {
16	let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into());	16	let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
17	let base_path = Path::new(&raw_files);	17	let base_path = Path::new(&raw_files);
18		18
19	let mut sections: Vec<Section> = vec![];	19	let mut sections: Vec<Section> = vec![];
20	sections.push(Section {	20	sections.push(Section::new("image", r#"(?is)<H3 id="section99">.<img src="(.?)" alt=.?></a><p><a href="(.?)">View Full Size Image"#, vec!["thumb".into(), "full".into()]));
21	name: "image".into(),
22	re: Regex::new(r#"(?is)<H3 id="section99">.<p><a href="(.?)">View Full Size Image"#).unwrap(),
23	content: String::new(),
24	});
25		21
26	for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(\|e\| e.ok()) {	22	for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(\|e\| e.ok()) {
27	let mut f = File::open(entry.path()).expect("could not open file");	23	let mut f = File::open(entry.path()).expect("could not open file");
@@ -35,6 +31,6 @@ fn main() {
35		31
36	pre_process::split_sections(&buf, &mut sections);	32	pre_process::split_sections(&buf, &mut sections);
37		33
38	println!("{}", sections[0].content);	34	println!("{:?}", sections[0].data);
39	}	35	}
40	}	36	}


diff --git a/src/pre_process.rs b/src/pre_process.rs index d69cfce..0d8c6be 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs
@@ -1,5 +1,7 @@
1	use super::regex::Regex;	1	use super::regex::Regex;
2		2
		3	use std::collections::HashMap;
		4
3	pub fn strip_irrelevant_content(s: &str) -> String {	5	pub fn strip_irrelevant_content(s: &str) -> String {
4	let mut retn = "";	6	let mut retn = "";
5	match s.find(r#"<div class=profile id=profile>"#) {	7	match s.find(r#"<div class=profile id=profile>"#) {
@@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String {
17	pub struct Section {	19	pub struct Section {
18	pub name: String,	20	pub name: String,
19	pub re: Regex,	21	pub re: Regex,
20	pub content: String,	22	pub keys: Vec<String>,
		23	pub data: HashMap<String, String>,
		24	}
		25
		26	impl Section {
		27	pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self {
		28	Section {
		29	name: name.into(),
		30	re: Regex::new(re).unwrap(),
		31	keys: groups,
		32	data: HashMap::new(),
		33	}
		34	}
21	}	35	}
22		36
23	pub fn split_sections(d: &str, s: &mut Vec<Section>) {	37	pub fn split_sections(d: &str, s: &mut Vec<Section>) {
24	for section in s {	38	for section in s {
25	for m in section.re.captures_iter(d) {	39	for m in section.re.captures_iter(d) {
26	assert!(m.len() > 1);	40	assert!(m.len() >= section.keys.len() + 1);
27	section.content = format!("{}", m.at(1).unwrap());	41
		42	let mut idx = 0;
		43	for key in &section.keys {
		44	section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
		45	idx += 1;
		46	}
28	}	47	}
29	}	48	}
30	} \ No newline at end of file	49	}