aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan <jan@ruken.pw>2016-10-01 17:07:35 (UTC)
committerjan <jan@ruken.pw>2016-10-01 17:07:35 (UTC)
commit9138d3e359fcf7283b78f48c8d2d58a492814773 (patch)
tree3997199c187d64c662f69206e2b95da85536fe96
parentad63b49b94bf4b4596e6420e37d265a57b77d731 (diff)
assignments & chars mit aehnlichen traits
-rw-r--r--src/character.rs86
-rw-r--r--src/main.rs51
-rw-r--r--src/tiles.rs9
3 files changed, 98 insertions, 48 deletions
diff --git a/src/character.rs b/src/character.rs
index b82ac18..b0322dd 100644
--- a/src/character.rs
+++ b/src/character.rs
@@ -1,6 +1,11 @@
1use super::tags;
1use super::tags::Tag; 2use super::tags::Tag;
3use super::dl_list;
2use super::dl_list::DLListItem; 4use super::dl_list::DLListItem;
5use super::section;
3use super::section::Section; 6use super::section::Section;
7use super::regex::Regex;
8use super::tiles;
4 9
5use std::collections::HashMap; 10use std::collections::HashMap;
6 11
@@ -29,12 +34,86 @@ pub struct Character {
29 pub image: Images, 34 pub image: Images,
30 pub tags: Vec<Tag>, 35 pub tags: Vec<Tag>,
31 pub traits: Traits, 36 pub traits: Traits,
37 pub assignments: Vec<u32>,
38 pub chars_similar_traits: Vec<u32>,
32 pub extra: Vec<DLListItem>, 39 pub extra: Vec<DLListItem>,
33 40
34 pub role: Option<String> 41 pub role: Option<String>,
35} 42}
36 43
37pub fn get_sections() -> HashMap<String, Section> { 44impl Names {
45 pub fn new() -> Self {
46 Names { romaji: String::new(), japanese: String::new(), aliases: vec![] }
47 }
48}
49
50impl Images {
51 pub fn new() -> Self {
52 Images { thumb: String::new(), full: String::new() }
53 }
54}
55
56impl Traits {
57 pub fn new() -> Self {
58 Traits { official: vec![], indexed: vec![] }
59 }
60}
61
62impl Character {
63 pub fn new() -> Self {
64 Character {
65 name: Names::new(),
66 image: Images::new(),
67 tags: vec![],
68 traits: Traits::new(),
69 assignments: vec![],
70 chars_similar_traits: vec![],
71 extra: vec![],
72 role: None
73 }
74 }
75
76 pub fn parse(&mut self, buf: &str) {
77 let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap();
78 let mut sections = get_sections();
79 section::process(&buf, &mut sections);
80
81 let caps = re_extras.captures(&buf);
82 if caps.is_some() {
83 self.extra = dl_list::parse(caps.unwrap().at(1).unwrap());
84 }
85
86 {
87 let name: &Section = &sections["name".into()];
88 let image: &Section = &sections["image".into()];
89 let misc: &Section = &sections["misc".into()];
90
91 self.name.romaji = name.data["romaji".into()].clone();
92 self.name.japanese = name.data["japanese".into()].clone();
93
94 if name.data["aliases".into()].len() > 0 {
95 self.name.aliases = name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect();
96 }
97
98 self.image.thumb = image.data["thumb".into()].clone();
99 self.image.full = image.data["full".into()].clone();
100
101 self.tags = tags::parse(&(&sections["tags".into()] as &Section).data["tags_raw".into()]);
102
103 self.traits.official = dl_list::parse(&(&sections["traits"] as &Section).data["official_raw".into()]);
104 self.traits.indexed = dl_list::parse(&(&sections["traits"] as &Section).data["indexed_raw".into()]);
105
106 self.assignments = tiles::parse_tile_link_ids(&(&sections["assignments"] as &Section).data["raw".into()], "series");
107 self.chars_similar_traits = tiles::parse_tile_link_ids(&(&sections["chars_similar_traits"] as &Section).data["raw".into()], "character");
108
109 if misc.data["role".into()].len() > 0 {
110 self.role = Some(misc.data["role".into()].clone());
111 }
112 }
113 }
114}
115
116fn get_sections() -> HashMap<String, Section> {
38 let mut s: HashMap<String, Section> = HashMap::new(); 117 let mut s: HashMap<String, Section> = HashMap::new();
39 118
40 s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"])); 119 s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD>"#, vec!["romaji", "japanese", "aliases"]));
@@ -42,5 +121,8 @@ pub fn get_sections() -> HashMap<String, Section> {
42 s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"])); 121 s.insert("image".into(), Section::new("image", r#"(?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image"#, vec!["thumb", "full"]));
43 s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"])); 122 s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as</P>.*?<TH>(.*?)</TH>"#, vec!["tags_raw"]));
44 s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"])); 123 s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl>"#, vec!["indexed_raw", "official_raw"]));
124 s.insert("assignments".into(), Section::new("assignments", r#"(?is)appears in the following</P>(.*?)</UL>"#, vec!["raw"]));
125 s.insert("chars_similar_traits".into(), Section::new("assignments", r#"(?is)with Similar Traits</H3>(.*?)</UL>"#, vec!["raw"]));
126
45 s 127 s
46} 128}
diff --git a/src/main.rs b/src/main.rs
index e76da66..ea89b02 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,4 @@
1extern crate regex; 1extern crate regex;
2use regex::Regex;
3extern crate yaml_rust; 2extern crate yaml_rust;
4 3
5extern crate walkdir; 4extern crate walkdir;
@@ -9,26 +8,21 @@ use std::io::prelude::*;
9use std::fs::File; 8use std::fs::File;
10use std::env; 9use std::env;
11use std::path::Path; 10use std::path::Path;
12use std::collections::HashMap;
13 11
14mod pre_process; 12mod pre_process;
15mod section; 13mod section;
16use section::Section;
17 14
18mod character; 15mod character;
19use character::{Images, Names, Traits, Character}; 16use character::Character;
20 17
21mod tags; 18mod tags;
22mod dl_list; 19mod dl_list;
23use dl_list::DLListItem; 20mod tiles;
24 21
25fn main() { 22fn main() {
26 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); 23 let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
27 let base_path = Path::new(&raw_files); 24 let base_path = Path::new(&raw_files);
28 25
29 let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+</H3>.*?<dl>(.*?)</dl>"#).unwrap();
30
31 let mut sections: HashMap<String, Section> = character::get_sections();
32 26
33 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { 27 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
34 let mut f = File::open(entry.path()).expect("could not open file"); 28 let mut f = File::open(entry.path()).expect("could not open file");
@@ -40,43 +34,8 @@ fn main() {
40 34
41 let buf = pre_process::strip_irrelevant_content(&buf); 35 let buf = pre_process::strip_irrelevant_content(&buf);
42 36
43 section::process(&buf, &mut sections); 37 let mut char = Character::new();
44 38 char.parse(&buf);
45 // find optional extra details 39 println!("{:?}", char);
46 let mut extra_details: Vec<DLListItem> = vec![];
47 let caps = re_extras.captures(&buf);
48 if caps.is_some() {
49 extra_details = dl_list::parse(caps.unwrap().at(1).unwrap());
50 }
51
52 {
53 let name: &Section = &sections["name".into()];
54 let image: &Section = &sections["image".into()];
55 let misc: &Section = &sections["misc".into()];
56 println!("{:?}", Character {
57 name: Names {
58 romaji: name.data["romaji".into()].clone(),
59 japanese: name.data["japanese".into()].clone(),
60 aliases: match name.data["aliases".into()].len() > 0 {
61 true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(),
62 false => vec![]
63 }
64 },
65 image: Images {
66 thumb: image.data["thumb".into()].clone(),
67 full: image.data["full".into()].clone(),
68 },
69 tags: tags::parse(&(&sections["tags".into()] as &Section).data["tags_raw".into()]),
70 traits: Traits {
71 official: dl_list::parse(&(&sections["traits"] as &Section).data["official_raw".into()]),
72 indexed: dl_list::parse(&(&sections["traits"] as &Section).data["indexed_raw".into()]),
73 },
74 extra: extra_details,
75 role: match misc.data["role".into()].len() > 0 {
76 true => Some(misc.data["role".into()].clone()),
77 false => None
78 }
79 });
80 }
81 } 40 }
82} 41}
diff --git a/src/tiles.rs b/src/tiles.rs
new file mode 100644
index 0000000..1ed7320
--- /dev/null
+++ b/src/tiles.rs
@@ -0,0 +1,9 @@
1use super::regex::Regex;
2
3use std::str::FromStr;
4
5pub fn parse_tile_link_ids(s: &str, php_file: &str) -> Vec<u32> {
6 let re = Regex::new(&format!(r#"(?is)<A href="{}\.php\?id=([0-9]+)"><IMG"#, php_file)).unwrap();
7
8 re.captures_iter(s).map(|cap| u32::from_str(cap.at(1).unwrap()).unwrap()).collect()
9}