aboutsummaryrefslogtreecommitdiff
path: root/src/pre_process.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/pre_process.rs')
-rw-r--r--src/pre_process.rs27
1 files changed, 23 insertions, 4 deletions
diff --git a/src/pre_process.rs b/src/pre_process.rs
index d69cfce..0d8c6be 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -1,5 +1,7 @@
1use super::regex::Regex; 1use super::regex::Regex;
2 2
3use std::collections::HashMap;
4
3pub fn strip_irrelevant_content(s: &str) -> String { 5pub fn strip_irrelevant_content(s: &str) -> String {
4 let mut retn = ""; 6 let mut retn = "";
5 match s.find(r#"<div class=profile id=profile>"#) { 7 match s.find(r#"<div class=profile id=profile>"#) {
@@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String {
17pub struct Section { 19pub struct Section {
18 pub name: String, 20 pub name: String,
19 pub re: Regex, 21 pub re: Regex,
20 pub content: String, 22 pub keys: Vec<String>,
23 pub data: HashMap<String, String>,
24}
25
26impl Section {
27 pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self {
28 Section {
29 name: name.into(),
30 re: Regex::new(re).unwrap(),
31 keys: groups,
32 data: HashMap::new(),
33 }
34 }
21} 35}
22 36
23pub fn split_sections(d: &str, s: &mut Vec<Section>) { 37pub fn split_sections(d: &str, s: &mut Vec<Section>) {
24 for section in s { 38 for section in s {
25 for m in section.re.captures_iter(d) { 39 for m in section.re.captures_iter(d) {
26 assert!(m.len() > 1); 40 assert!(m.len() >= section.keys.len() + 1);
27 section.content = format!("{}", m.at(1).unwrap()); 41
42 let mut idx = 0;
43 for key in &section.keys {
44 section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
45 idx += 1;
46 }
28 } 47 }
29 } 48 }
30} \ No newline at end of file 49}