From a6b37fa5e1bd505adfae4888896be2a3aa49ec3a Mon Sep 17 00:00:00 2001
From: jan
Date: Sat, 1 Oct 2016 01:16:19 +0200
Subject: parsen von traits und tags, wir koennen den configkram eigentlich
wieder komplett entfernen. klappt so nicht wirklich.
diff --git a/config.yml b/config.yml
index 07b5d03..313d8e2 100644
--- a/config.yml
+++ b/config.yml
@@ -1,11 +1,25 @@
sections:
name:
- pattern: (?is)Romaji Name.*?(.*?)\s? | .*?Japanese Name.*?(.*?)\s? |
+ pattern: (?is)Romaji Name.*?(.*?)\s? | .*?Japanese Name.*?(.*?)\s? | .*?Aliases.*?(.*?)\s? |
groups:
- romaji
- japanese
+ - aliases
+ misc:
+ pattern: (?is)Role.*?(.*?)\s? |
+ groups:
+ - role
image:
pattern: (?is).*
View Full Size Image
groups:
- thumb
- full
+ tags:
+ pattern: (?is)tagged as
.*?(.*?) |
+ groups:
+ - tags_raw
+ traits:
+ pattern: (?is)indexed traits
.*?(.*?)
.*?official traits\s?.*?(.*?)
+ groups:
+ - indexed_raw
+ - official_raw
diff --git a/src/character.rs b/src/character.rs
index 9c548c1..98c4850 100644
--- a/src/character.rs
+++ b/src/character.rs
@@ -1,3 +1,31 @@
+use super::tags::Tag;
+use super::traits::Trait;
+
+#[derive(Debug)]
+pub struct Names {
+ pub romaji: String,
+ pub japanese: String,
+ pub aliases: Vec,
+}
+
+#[derive(Debug)]
+pub struct Images {
+ pub thumb: String,
+ pub full: String,
+}
+
+#[derive(Debug)]
+pub struct Traits {
+ pub official: Vec,
+ pub indexed: Vec,
+}
+
+#[derive(Debug)]
pub struct Character {
-
-}
\ No newline at end of file
+ pub name: Names,
+ pub image: Images,
+ pub tags: Vec,
+ pub traits: Traits,
+
+ pub role: Option
+}
diff --git a/src/config.rs b/src/config.rs
index fc8ee03..f491852 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -15,7 +15,7 @@ pub struct Config {
}
impl Config {
- pub fn from_file(p: &str) -> Self {
+ pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self {
let mut f = File::open(p).unwrap();
let mut buf = String::new();
f.read_to_string(&mut buf).unwrap();
@@ -23,8 +23,6 @@ impl Config {
let doc = &docs[0];
- println!("{:?}", doc);
-
let mut sections: HashMap = HashMap::new();
for (name, entry) in doc["sections"].as_hash().unwrap() {
sections.insert(name.as_str().unwrap().into(),
@@ -39,6 +37,27 @@ impl Config {
});
}
+
+ for ex in &expected {
+ if !sections.contains_key(&ex.to_string()) {
+ panic!("config: section '{}' not found", ex);
+ }
+ }
+
+ {
+ let traits = §ions["traits"];
+ if !traits.groups.contains(&"indexed_raw".to_string()) {
+ panic!("config: no group 'indexed_raw' found in section 'traits'");
+ }
+ if !traits.groups.contains(&"official_raw".to_string()) {
+ panic!("config: no group 'official_raw' found in section 'traits'");
+ }
+ let tags = §ions["tags"];
+ if !tags.groups.contains(&"tags_raw".to_string()) {
+ panic!("config: no group 'tags_raw' found in section 'tags'");
+ }
+ }
+
Config { sections: sections }
}
}
diff --git a/src/main.rs b/src/main.rs
index 7d06fe7..ee8c3eb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -16,15 +16,21 @@ use pre_process::Section;
mod config;
use config::Config;
+mod character;
+use character::{Images, Names, Traits, Character};
+
+mod tags;
+mod traits;
+
fn main() {
let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
let base_path = Path::new(&raw_files);
- let cfg = Config::from_file("config.yml");
+ let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]);
- let mut sections: Vec = vec![];
+ let mut sections: HashMap = HashMap::new();
for (name, sec) in &cfg.sections {
- sections.push(Section::new(&name, &sec.pattern, sec.groups.clone()));
+ sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone()));
}
for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
@@ -39,11 +45,30 @@ fn main() {
pre_process::split_sections(&buf, &mut sections);
- let mut char: HashMap> = HashMap::new();
- for s in §ions {
- char.insert(s.name.clone(), s.data.clone());
+ {
+ let name: &Section = §ions["name".into()];
+ let image: &Section = §ions["image".into()];
+ let misc: &Section = §ions["misc".into()];
+ println!("{:?}", Character {
+ name: Names {
+ romaji: name.data["romaji".into()].clone(),
+ japanese: name.data["japanese".into()].clone(),
+ aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(),
+ },
+ image: Images {
+ thumb: image.data["thumb".into()].clone(),
+ full: image.data["full".into()].clone(),
+ },
+ tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]),
+ traits: Traits {
+ official: traits::parse(&(§ions["traits"] as &Section).data["official_raw".into()]),
+ indexed: traits::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]),
+ },
+ role: match misc.data["role".into()].len() > 0 {
+ true => Some(misc.data["role".into()].clone()),
+ false => None
+ }
+ });
}
-
- println!("{:?}", char);
}
}
diff --git a/src/pre_process.rs b/src/pre_process.rs
index a4d6c14..273562d 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -34,8 +34,8 @@ impl Section {
}
}
-pub fn split_sections(d: &str, s: &mut Vec) {
- for section in s {
+pub fn split_sections(d: &str, s: &mut HashMap) {
+ for (_, section) in s {
for m in section.re.captures_iter(d) {
assert!(m.len() >= section.keys.len() + 1);
diff --git a/src/tags.rs b/src/tags.rs
new file mode 100644
index 0000000..0fdf815
--- /dev/null
+++ b/src/tags.rs
@@ -0,0 +1,15 @@
+use super::regex::Regex;
+
+use std::str::FromStr;
+
+#[derive(Debug)]
+pub struct Tag {
+ pub id: u32,
+ pub name: String,
+}
+
+pub fn parse(s: &str) -> Vec {
+ let reg_tag = Regex::new(r#"(?is)(.*?)"#).unwrap();
+
+ reg_tag.captures_iter(s).map(|c| Tag { id: u32::from_str(c.at(1).unwrap()).unwrap(), name: c.at(2).unwrap().into() }).collect()
+}
diff --git a/src/traits.rs b/src/traits.rs
new file mode 100644
index 0000000..db1b2eb
--- /dev/null
+++ b/src/traits.rs
@@ -0,0 +1,13 @@
+use super::regex::Regex;
+
+#[derive(Debug)]
+pub struct Trait {
+ name: String,
+ value: String,
+}
+
+pub fn parse(s: &str) -> Vec {
+ let reg_trait = Regex::new(r#"(?is)(.*?).*?(.*?)"#).unwrap();
+
+ reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect()
+}
--
cgit v0.10.1