From 9138d3e359fcf7283b78f48c8d2d58a492814773 Mon Sep 17 00:00:00 2001 From: jan Date: Sat, 1 Oct 2016 19:07:35 +0200 Subject: assignments & chars mit aehnlichen traits diff --git a/src/character.rs b/src/character.rs index b82ac18..b0322dd 100644 --- a/src/character.rs +++ b/src/character.rs @@ -1,6 +1,11 @@ +use super::tags; use super::tags::Tag; +use super::dl_list; use super::dl_list::DLListItem; +use super::section; use super::section::Section; +use super::regex::Regex; +use super::tiles; use std::collections::HashMap; @@ -29,12 +34,86 @@ pub struct Character { pub image: Images, pub tags: Vec, pub traits: Traits, + pub assignments: Vec, + pub chars_similar_traits: Vec, pub extra: Vec, - pub role: Option + pub role: Option, } -pub fn get_sections() -> HashMap { +impl Names { + pub fn new() -> Self { + Names { romaji: String::new(), japanese: String::new(), aliases: vec![] } + } +} + +impl Images { + pub fn new() -> Self { + Images { thumb: String::new(), full: String::new() } + } +} + +impl Traits { + pub fn new() -> Self { + Traits { official: vec![], indexed: vec![] } + } +} + +impl Character { + pub fn new() -> Self { + Character { + name: Names::new(), + image: Images::new(), + tags: vec![], + traits: Traits::new(), + assignments: vec![], + chars_similar_traits: vec![], + extra: vec![], + role: None + } + } + + pub fn parse(&mut self, buf: &str) { + let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+.*?
(.*?)
"#).unwrap(); + let mut sections = get_sections(); + section::process(&buf, &mut sections); + + let caps = re_extras.captures(&buf); + if caps.is_some() { + self.extra = dl_list::parse(caps.unwrap().at(1).unwrap()); + } + + { + let name: &Section = §ions["name".into()]; + let image: &Section = §ions["image".into()]; + let misc: &Section = §ions["misc".into()]; + + self.name.romaji = name.data["romaji".into()].clone(); + self.name.japanese = name.data["japanese".into()].clone(); + + if name.data["aliases".into()].len() > 0 { + self.name.aliases = name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(); + } + + self.image.thumb = image.data["thumb".into()].clone(); + self.image.full = image.data["full".into()].clone(); + + self.tags = tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]); + + self.traits.official = dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]); + self.traits.indexed = dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]); + + self.assignments = tiles::parse_tile_link_ids(&(§ions["assignments"] as &Section).data["raw".into()], "series"); + self.chars_similar_traits = tiles::parse_tile_link_ids(&(§ions["chars_similar_traits"] as &Section).data["raw".into()], "character"); + + if misc.data["role".into()].len() > 0 { + self.role = Some(misc.data["role".into()].clone()); + } + } + } +} + +fn get_sections() -> HashMap { let mut s: HashMap = HashMap::new(); s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?(.*?)\s?.*?Japanese Name.*?(.*?)\s?.*?Aliases.*?(.*?)\s?"#, vec!["romaji", "japanese", "aliases"])); @@ -42,5 +121,8 @@ pub fn get_sections() -> HashMap { s.insert("image".into(), Section::new("image", r#"(?is)

.*.*?

View Full Size Image"#, vec!["thumb", "full"])); s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as

.*?(.*?)"#, vec!["tags_raw"])); s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits

.*?
(.*?)
.*?official traits\s?

.*?
(.*?)
"#, vec!["indexed_raw", "official_raw"])); + s.insert("assignments".into(), Section::new("assignments", r#"(?is)appears in the following

(.*?)"#, vec!["raw"])); + s.insert("chars_similar_traits".into(), Section::new("assignments", r#"(?is)with Similar Traits

(.*?)"#, vec!["raw"])); + s } diff --git a/src/main.rs b/src/main.rs index e76da66..ea89b02 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ extern crate regex; -use regex::Regex; extern crate yaml_rust; extern crate walkdir; @@ -9,26 +8,21 @@ use std::io::prelude::*; use std::fs::File; use std::env; use std::path::Path; -use std::collections::HashMap; mod pre_process; mod section; -use section::Section; mod character; -use character::{Images, Names, Traits, Character}; +use character::Character; mod tags; mod dl_list; -use dl_list::DLListItem; +mod tiles; fn main() { let raw_files = env::var("RAW_FILES").unwrap_or("characters".into()); let base_path = Path::new(&raw_files); - let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+.*?
(.*?)
"#).unwrap(); - - let mut sections: HashMap = character::get_sections(); for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { let mut f = File::open(entry.path()).expect("could not open file"); @@ -40,43 +34,8 @@ fn main() { let buf = pre_process::strip_irrelevant_content(&buf); - section::process(&buf, &mut sections); - - // find optional extra details - let mut extra_details: Vec = vec![]; - let caps = re_extras.captures(&buf); - if caps.is_some() { - extra_details = dl_list::parse(caps.unwrap().at(1).unwrap()); - } - - { - let name: &Section = §ions["name".into()]; - let image: &Section = §ions["image".into()]; - let misc: &Section = §ions["misc".into()]; - println!("{:?}", Character { - name: Names { - romaji: name.data["romaji".into()].clone(), - japanese: name.data["japanese".into()].clone(), - aliases: match name.data["aliases".into()].len() > 0 { - true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(), - false => vec![] - } - }, - image: Images { - thumb: image.data["thumb".into()].clone(), - full: image.data["full".into()].clone(), - }, - tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]), - traits: Traits { - official: dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]), - indexed: dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]), - }, - extra: extra_details, - role: match misc.data["role".into()].len() > 0 { - true => Some(misc.data["role".into()].clone()), - false => None - } - }); - } + let mut char = Character::new(); + char.parse(&buf); + println!("{:?}", char); } } diff --git a/src/tiles.rs b/src/tiles.rs new file mode 100644 index 0000000..1ed7320 --- /dev/null +++ b/src/tiles.rs @@ -0,0 +1,9 @@ +use super::regex::Regex; + +use std::str::FromStr; + +pub fn parse_tile_link_ids(s: &str, php_file: &str) -> Vec { + let re = Regex::new(&format!(r#"(?is)