diff options
| -rw-r--r-- | src/character.rs | 86 | ||||
| -rw-r--r-- | src/main.rs | 51 | ||||
| -rw-r--r-- | src/tiles.rs | 9 |
3 files changed, 98 insertions, 48 deletions
diff --git a/src/character.rs b/src/character.rs index b82ac18..b0322dd 100644 --- a/src/character.rs +++ b/src/character.rs | |||
| @@ -1,6 +1,11 @@ | |||
| 1 | use super::tags; | ||
| 1 | use super::tags::Tag; | 2 | use super::tags::Tag; |
| 3 | use super::dl_list; | ||
| 2 | use super::dl_list::DLListItem; | 4 | use super::dl_list::DLListItem; |
| 5 | use super::section; | ||
| 3 | use super::section::Section; | 6 | use super::section::Section; |
| 7 | use super::regex::Regex; | ||
| 8 | use super::tiles; | ||
| 4 | 9 | ||
| 5 | use std::collections::HashMap; | 10 | use std::collections::HashMap; |
| 6 | 11 | ||
| @@ -29,12 +34,86 @@ pub struct Character { | |||
| 29 | pub image: Images, | 34 | pub image: Images, |
| 30 | pub tags: Vec<Tag>, | 35 | pub tags: Vec<Tag>, |
| 31 | pub traits: Traits, | 36 | pub traits: Traits, |
| 37 | pub assignments: Vec<u32>, | ||
| 38 | pub chars_similar_traits: Vec<u32>, | ||
| 32 | pub extra: Vec<DLListItem>, | 39 | pub extra: Vec<DLListItem>, |
| 33 | 40 | ||
| 34 | pub role: Option<String> | 41 | pub role: Option<String>, |
| 35 | } | 42 | } |
| 36 | 43 | ||
| 37 | pub fn get_sections() -> HashMap<String, Section> { | 44 | impl Names { |
| 45 | pub fn new() -> Self { | ||
| 46 | Names { romaji: String::new(), japanese: String::new(), aliases: vec![] } | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | impl Images { | ||
| 51 | pub fn new() -> Self { | ||
| 52 | Images { thumb: String::new(), full: String::new() } | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | impl Traits { | ||
| 57 | pub fn new() -> Self { | ||
| 58 | Traits { official: vec![], indexed: vec![] } | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | impl Character { | ||
| 63 | pub fn new() -> Self { | ||
| 64 | Character { | ||
| 65 | name: Names::new(), | ||
| 66 | image: Images::new(), | ||
| 67 | tags: vec![], | ||
| 68 | traits: Traits::new(), | ||
| 69 | assignments: vec![], | ||
| 70 | chars_similar_traits: vec![], | ||
| 71 | extra: vec![], | ||
| 72 | role: None | ||
| 73 | } | ||
| 74 | } | ||
| 75 | |||
| 76 | pub fn parse(&mut self, buf: &str) { | ||
| 77 | let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap(); | ||
| 78 | let mut sections = get_sections(); | ||
| 79 | section::process(&buf, &mut sections); | ||
| 80 | |||
| 81 | let caps = re_extras.captures(&buf); | ||
| 82 | if caps.is_some() { | ||
| 83 | self.extra = dl_list::parse(caps.unwrap().at(1).unwrap()); | ||
| 84 | } | ||
| 85 | |||
| 86 | { | ||
| 87 | let name: &Section = §ions["name".into()]; | ||
| 88 | let image: &Section = §ions["image".into()]; | ||
| 89 | let misc: &Section = §ions["misc".into()]; | ||
| 90 | |||
| 91 | self.name.romaji = name.data["romaji".into()].clone(); | ||
| 92 | self.name.japanese = name.data["japanese".into()].clone(); | ||
| 93 | |||
| 94 | if name.data["aliases".into()].len() > 0 { | ||
| 95 | self.name.aliases = name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(); | ||
| 96 | } | ||
| 97 | |||
| 98 | self.image.thumb = image.data["thumb".into()].clone(); | ||
| 99 | self.image.full = image.data["full".into()].clone(); | ||
| 100 | |||
| 101 | self.tags = tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]); | ||
| 102 | |||
| 103 | self.traits.official = dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]); | ||
| 104 | self.traits.indexed = dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]); | ||
| 105 | |||
| 106 | self.assignments = tiles::parse_tile_link_ids(&(§ions["assignments"] as &Section).data["raw".into()], "series"); | ||
| 107 | self.chars_similar_traits = tiles::parse_tile_link_ids(&(§ions["chars_similar_traits"] as &Section).data["raw".into()], "character"); | ||
| 108 | |||
| 109 | if misc.data["role".into()].len() > 0 { | ||
| 110 | self.role = Some(misc.data["role".into()].clone()); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | fn get_sections() -> HashMap<String, Section> { | ||
| 38 | let mut s: HashMap<String, Section> = HashMap::new(); | 117 | let mut s: HashMap<String, Section> = HashMap::new(); |
| 39 | 118 | ||
| 40 | s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"])); | 119 | s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"])); |
| @@ -42,5 +121,8 @@ pub fn get_sections() -> HashMap<String, Section> { | |||
| 42 | s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"])); | 121 | s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"])); |
| 43 | s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"])); | 122 | s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"])); |
| 44 | s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"])); | 123 | s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"])); |
| 124 | s.insert("assignments".into(), Section::new("assignments", r#"(?is)appears in the following</P>(.*?)</UL>"#, vec!["raw"])); | ||
| 125 | s.insert("chars_similar_traits".into(), Section::new("assignments", r#"(?is)with Similar Traits</H3>(.*?)</UL>"#, vec!["raw"])); | ||
| 126 | |||
| 45 | s | 127 | s |
| 46 | } | 128 | } |
diff --git a/src/main.rs b/src/main.rs index e76da66..ea89b02 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | extern crate regex; | 1 | extern crate regex; |
| 2 | use regex::Regex; | ||
| 3 | extern crate yaml_rust; | 2 | extern crate yaml_rust; |
| 4 | 3 | ||
| 5 | extern crate walkdir; | 4 | extern crate walkdir; |
| @@ -9,26 +8,21 @@ use std::io::prelude::*; | |||
| 9 | use std::fs::File; | 8 | use std::fs::File; |
| 10 | use std::env; | 9 | use std::env; |
| 11 | use std::path::Path; | 10 | use std::path::Path; |
| 12 | use std::collections::HashMap; | ||
| 13 | 11 | ||
| 14 | mod pre_process; | 12 | mod pre_process; |
| 15 | mod section; | 13 | mod section; |
| 16 | use section::Section; | ||
| 17 | 14 | ||
| 18 | mod character; | 15 | mod character; |
| 19 | use character::{Images, Names, Traits, Character}; | 16 | use character::Character; |
| 20 | 17 | ||
| 21 | mod tags; | 18 | mod tags; |
| 22 | mod dl_list; | 19 | mod dl_list; |
| 23 | use dl_list::DLListItem; | 20 | mod tiles; |
| 24 | 21 | ||
| 25 | fn main() { | 22 | fn main() { |
| 26 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); | 23 | let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); |
| 27 | let base_path = Path::new(&raw_files); | 24 | let base_path = Path::new(&raw_files); |
| 28 | 25 | ||
| 29 | let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap(); | ||
| 30 | |||
| 31 | let mut sections: HashMap<String, Section> = character::get_sections(); | ||
| 32 | 26 | ||
| 33 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | 27 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { |
| 34 | let mut f = File::open(entry.path()).expect("could not open file"); | 28 | let mut f = File::open(entry.path()).expect("could not open file"); |
| @@ -40,43 +34,8 @@ fn main() { | |||
| 40 | 34 | ||
| 41 | let buf = pre_process::strip_irrelevant_content(&buf); | 35 | let buf = pre_process::strip_irrelevant_content(&buf); |
| 42 | 36 | ||
| 43 | section::process(&buf, &mut sections); | 37 | let mut char = Character::new(); |
| 44 | 38 | char.parse(&buf); | |
| 45 | // find optional extra details | 39 | println!("{:?}", char); |
| 46 | let mut extra_details: Vec<DLListItem> = vec![]; | ||
| 47 | let caps = re_extras.captures(&buf); | ||
| 48 | if caps.is_some() { | ||
| 49 | extra_details = dl_list::parse(caps.unwrap().at(1).unwrap()); | ||
| 50 | } | ||
| 51 | |||
| 52 | { | ||
| 53 | let name: &Section = §ions["name".into()]; | ||
| 54 | let image: &Section = §ions["image".into()]; | ||
| 55 | let misc: &Section = §ions["misc".into()]; | ||
| 56 | println!("{:?}", Character { | ||
| 57 | name: Names { | ||
| 58 | romaji: name.data["romaji".into()].clone(), | ||
| 59 | japanese: name.data["japanese".into()].clone(), | ||
| 60 | aliases: match name.data["aliases".into()].len() > 0 { | ||
| 61 | true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), | ||
| 62 | false => vec![] | ||
| 63 | } | ||
| 64 | }, | ||
| 65 | image: Images { | ||
| 66 | thumb: image.data["thumb".into()].clone(), | ||
| 67 | full: image.data["full".into()].clone(), | ||
| 68 | }, | ||
| 69 | tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), | ||
| 70 | traits: Traits { | ||
| 71 | official: dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), | ||
| 72 | indexed: dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), | ||
| 73 | }, | ||
| 74 | extra: extra_details, | ||
| 75 | role: match misc.data["role".into()].len() > 0 { | ||
| 76 | true => Some(misc.data["role".into()].clone()), | ||
| 77 | false => None | ||
| 78 | } | ||
| 79 | }); | ||
| 80 | } | ||
| 81 | } | 40 | } |
| 82 | } | 41 | } |
diff --git a/src/tiles.rs b/src/tiles.rs new file mode 100644 index 0000000..1ed7320 --- /dev/null +++ b/src/tiles.rs | |||
| @@ -0,0 +1,9 @@ | |||
| 1 | use super::regex::Regex; | ||
| 2 | |||
| 3 | use std::str::FromStr; | ||
| 4 | |||
| 5 | pub fn parse_tile_link_ids(s: &str, php_file: &str) -> Vec<u32> { | ||
| 6 | let re = Regex::new(&format!(r#"(?is)<A href="{}\.php\?id=([0-9]+)"><IMG"#, php_file)).unwrap(); | ||
| 7 | |||
| 8 | re.captures_iter(s).map(|cap| u32::from_str(cap.at(1).unwrap()).unwrap()).collect() | ||
| 9 | } | ||
