diff options
-rw-r--r-- | src/character.rs | 86 | ||||
-rw-r--r-- | src/main.rs | 51 | ||||
-rw-r--r-- | src/tiles.rs | 9 |
3 files changed, 98 insertions, 48 deletions
diff --git a/src/character.rs b/src/character.rs index b82ac18..b0322dd 100644 --- a/src/character.rs +++ b/src/character.rs | |||
@@ -1,6 +1,11 @@ | |||
1 | use super::tags; | ||
1 | use super::tags::Tag; | 2 | use super::tags::Tag; |
3 | use super::dl_list; | ||
2 | use super::dl_list::DLListItem; | 4 | use super::dl_list::DLListItem; |
5 | use super::section; | ||
3 | use super::section::Section; | 6 | use super::section::Section; |
7 | use super::regex::Regex; | ||
8 | use super::tiles; | ||
4 | 9 | ||
5 | use std::collections::HashMap; | 10 | use std::collections::HashMap; |
6 | 11 | ||
@@ -29,12 +34,86 @@ pub struct Character { | |||
29 | pub image: Images, | 34 | pub image: Images, |
30 | pub tags: Vec<Tag>, | 35 | pub tags: Vec<Tag>, |
31 | pub traits: Traits, | 36 | pub traits: Traits, |
37 | pub assignments: Vec<u32>, | ||
38 | pub chars_similar_traits: Vec<u32>, | ||
32 | pub extra: Vec<DLListItem>, | 39 | pub extra: Vec<DLListItem>, |
33 | 40 | ||
34 | pub role: Option<String> | 41 | pub role: Option<String>, |
35 | } | 42 | } |
36 | 43 | ||
37 | pub fn get_sections() -> HashMap<String, Section> { | 44 | impl Names { |
45 | pub fn new() -> Self { | ||
46 | Names { romaji: String::new(), japanese: String::new(), aliases: vec![] } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | impl Images { | ||
51 | pub fn new() -> Self { | ||
52 | Images { thumb: String::new(), full: String::new() } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | impl Traits { | ||
57 | pub fn new() -> Self { | ||
58 | Traits { official: vec![], indexed: vec![] } | ||
59 | } | ||
60 | } | ||
61 | |||
62 | impl Character { | ||
63 | pub fn new() -> Self { | ||
64 | Character { | ||
65 | name: Names::new(), | ||
66 | image: Images::new(), | ||
67 | tags: vec![], | ||
68 | traits: Traits::new(), | ||
69 | assignments: vec![], | ||
70 | chars_similar_traits: vec![], | ||
71 | extra: vec![], | ||
72 | role: None | ||
73 | } | ||
74 | } | ||
75 | |||
76 | pub fn parse(&mut self, buf: &str) { | ||
77 | let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap(); | ||
78 | let mut sections = get_sections(); | ||
79 | section::process(&buf, &mut sections); | ||
80 | |||
81 | let caps = re_extras.captures(&buf); | ||
82 | if caps.is_some() { | ||
83 | self.extra = dl_list::parse(caps.unwrap().at(1).unwrap()); | ||
84 | } | ||
85 | |||
86 | { | ||
87 | let name: &Section = §ions["name".into()]; | ||
88 | let image: &Section = §ions["image".into()]; | ||
89 | let misc: &Section = §ions["misc".into()]; | ||
90 | |||
91 | self.name.romaji = name.data["romaji".into()].clone(); | ||
92 | self.name.japanese = name.data["japanese".into()].clone(); | ||
93 | |||
94 | if name.data["aliases".into()].len() > 0 { | ||
95 | self.name.aliases = name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(); | ||
96 | } | ||
97 | |||
98 | self.image.thumb = image.data["thumb".into()].clone(); | ||
99 | self.image.full = image.data["full".into()].clone(); | ||
100 | |||
101 | self.tags = tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]); | ||
102 | |||
103 | self.traits.official = dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]); | ||
104 | self.traits.indexed = dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]); | ||
105 | |||
106 | self.assignments = tiles::parse_tile_link_ids(&(§ions["assignments"] as &Section).data["raw".into()], "series"); | ||
107 | self.chars_similar_traits = tiles::parse_tile_link_ids(&(§ions["chars_similar_traits"] as &Section).data["raw".into()], "character"); | ||
108 | |||
109 | if misc.data["role".into()].len() > 0 { | ||
110 | self.role = Some(misc.data["role".into()].clone()); | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | |||
116 | fn get_sections() -> HashMap<String, Section> { | ||
38 | let mut s: HashMap<String, Section> = HashMap::new(); | 117 | let mut s: HashMap<String, Section> = HashMap::new(); |
39 | 118 | ||
40 | s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"])); | 119 | s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"])); |
@@ -42,5 +121,8 @@ pub fn get_sections() -> HashMap<String, Section> { | |||
42 | s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"])); | 121 | s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"])); |
43 | s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"])); | 122 | s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"])); |
44 | s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"])); | 123 | s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"])); |
124 | s.insert("assignments".into(), Section::new("assignments", r#"(?is)appears in the following</P>(.*?)</UL>"#, vec!["raw"])); | ||
125 | s.insert("chars_similar_traits".into(), Section::new("assignments", r#"(?is)with Similar Traits</H3>(.*?)</UL>"#, vec!["raw"])); | ||
126 | |||
45 | s | 127 | s |
46 | } | 128 | } |
diff --git a/src/main.rs b/src/main.rs index e76da66..ea89b02 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,5 +1,4 @@ | |||
1 | extern crate regex; | 1 | extern crate regex; |
2 | use regex::Regex; | ||
3 | extern crate yaml_rust; | 2 | extern crate yaml_rust; |
4 | 3 | ||
5 | extern crate walkdir; | 4 | extern crate walkdir; |
@@ -9,26 +8,21 @@ use std::io::prelude::*; | |||
9 | use std::fs::File; | 8 | use std::fs::File; |
10 | use std::env; | 9 | use std::env; |
11 | use std::path::Path; | 10 | use std::path::Path; |
12 | use std::collections::HashMap; | ||
13 | 11 | ||
14 | mod pre_process; | 12 | mod pre_process; |
15 | mod section; | 13 | mod section; |
16 | use section::Section; | ||
17 | 14 | ||
18 | mod character; | 15 | mod character; |
19 | use character::{Images, Names, Traits, Character}; | 16 | use character::Character; |
20 | 17 | ||
21 | mod tags; | 18 | mod tags; |
22 | mod dl_list; | 19 | mod dl_list; |
23 | use dl_list::DLListItem; | 20 | mod tiles; |
24 | 21 | ||
25 | fn main() { | 22 | fn main() { |
26 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 23 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
27 | let base_path = Path::new(&raw_files); | 24 | let base_path = Path::new(&raw_files); |
28 | 25 | ||
29 | let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap(); | ||
30 | |||
31 | let mut sections: HashMap<String, Section> = character::get_sections(); | ||
32 | 26 | ||
33 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 27 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
34 | let mut f = File::open(entry.path()).expect("could not open file"); | 28 | let mut f = File::open(entry.path()).expect("could not open file"); |
@@ -40,43 +34,8 @@ fn main() { | |||
40 | 34 | ||
41 | let buf = pre_process::strip_irrelevant_content(&buf); | 35 | let buf = pre_process::strip_irrelevant_content(&buf); |
42 | 36 | ||
43 | section::process(&buf, &mut sections); | 37 | let mut char = Character::new(); |
44 | 38 | char.parse(&buf); | |
45 | // find optional extra details | 39 | println!("{:?}", char); |
46 | let mut extra_details: Vec<DLListItem> = vec![]; | ||
47 | let caps = re_extras.captures(&buf); | ||
48 | if caps.is_some() { | ||
49 | extra_details = dl_list::parse(caps.unwrap().at(1).unwrap()); | ||
50 | } | ||
51 | |||
52 | { | ||
53 | let name: &Section = §ions["name".into()]; | ||
54 | let image: &Section = §ions["image".into()]; | ||
55 | let misc: &Section = §ions["misc".into()]; | ||
56 | println!("{:?}", Character { | ||
57 | name: Names { | ||
58 | romaji: name.data["romaji".into()].clone(), | ||
59 | japanese: name.data["japanese".into()].clone(), | ||
60 | aliases: match name.data["aliases".into()].len() > 0 { | ||
61 | true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), | ||
62 | false => vec![] | ||
63 | } | ||
64 | }, | ||
65 | image: Images { | ||
66 | thumb: image.data["thumb".into()].clone(), | ||
67 | full: image.data["full".into()].clone(), | ||
68 | }, | ||
69 | tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), | ||
70 | traits: Traits { | ||
71 | official: dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), | ||
72 | indexed: dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), | ||
73 | }, | ||
74 | extra: extra_details, | ||
75 | role: match misc.data["role".into()].len() > 0 { | ||
76 | true => Some(misc.data["role".into()].clone()), | ||
77 | false => None | ||
78 | } | ||
79 | }); | ||
80 | } | ||
81 | } | 40 | } |
82 | } | 41 | } |
diff --git a/src/tiles.rs b/src/tiles.rs new file mode 100644 index 0000000..1ed7320 --- /dev/null +++ b/src/tiles.rs | |||
@@ -0,0 +1,9 @@ | |||
1 | use super::regex::Regex; | ||
2 | |||
3 | use std::str::FromStr; | ||
4 | |||
5 | pub fn parse_tile_link_ids(s: &str, php_file: &str) -> Vec<u32> { | ||
6 | let re = Regex::new(&format!(r#"(?is)<A href="{}\.php\?id=([0-9]+)"><IMG"#, php_file)).unwrap(); | ||
7 | |||
8 | re.captures_iter(s).map(|cap| u32::from_str(cap.at(1).unwrap()).unwrap()).collect() | ||
9 | } | ||