diff options
Diffstat (limited to 'src/pre_process.rs')
-rw-r--r-- | src/pre_process.rs | 27 |
1 files changed, 23 insertions, 4 deletions
diff --git a/src/pre_process.rs b/src/pre_process.rs index d69cfce..0d8c6be 100644 --- a/src/pre_process.rs +++ b/src/pre_process.rs | |||
@@ -1,5 +1,7 @@ | |||
1 | use super::regex::Regex; | 1 | use super::regex::Regex; |
2 | 2 | ||
3 | use std::collections::HashMap; | ||
4 | |||
3 | pub fn strip_irrelevant_content(s: &str) -> String { | 5 | pub fn strip_irrelevant_content(s: &str) -> String { |
4 | let mut retn = ""; | 6 | let mut retn = ""; |
5 | match s.find(r#"<div class=profile id=profile>"#) { | 7 | match s.find(r#"<div class=profile id=profile>"#) { |
@@ -17,14 +19,31 @@ pub fn strip_irrelevant_content(s: &str) -> String { | |||
17 | pub struct Section { | 19 | pub struct Section { |
18 | pub name: String, | 20 | pub name: String, |
19 | pub re: Regex, | 21 | pub re: Regex, |
20 | pub content: String, | 22 | pub keys: Vec<String>, |
23 | pub data: HashMap<String, String>, | ||
24 | } | ||
25 | |||
26 | impl Section { | ||
27 | pub fn new(name: &str, re: &str, groups: Vec<String>) -> Self { | ||
28 | Section { | ||
29 | name: name.into(), | ||
30 | re: Regex::new(re).unwrap(), | ||
31 | keys: groups, | ||
32 | data: HashMap::new(), | ||
33 | } | ||
34 | } | ||
21 | } | 35 | } |
22 | 36 | ||
23 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { | 37 | pub fn split_sections(d: &str, s: &mut Vec<Section>) { |
24 | for section in s { | 38 | for section in s { |
25 | for m in section.re.captures_iter(d) { | 39 | for m in section.re.captures_iter(d) { |
26 | assert!(m.len() > 1); | 40 | assert!(m.len() >= section.keys.len() + 1); |
27 | section.content = format!("{}", m.at(1).unwrap()); | 41 | |
42 | let mut idx = 0; | ||
43 | for key in §ion.keys { | ||
44 | section.data.insert(key.clone(), m.at(idx + 1).unwrap().into()); | ||
45 | idx += 1; | ||
46 | } | ||
28 | } | 47 | } |
29 | } | 48 | } |
30 | } \ No newline at end of file | 49 | } |