From ad63b49b94bf4b4596e6420e37d265a57b77d731 Mon Sep 17 00:00:00 2001
From: jan
Date: Sat, 1 Oct 2016 12:45:55 +0200
Subject: config entfernt, extra details parsen
diff --git a/src/character.rs b/src/character.rs
index 98c4850..b82ac18 100644
--- a/src/character.rs
+++ b/src/character.rs
@@ -1,5 +1,8 @@
use super::tags::Tag;
-use super::traits::Trait;
+use super::dl_list::DLListItem;
+use super::section::Section;
+
+use std::collections::HashMap;
#[derive(Debug)]
pub struct Names {
@@ -16,8 +19,8 @@ pub struct Images {
#[derive(Debug)]
pub struct Traits {
- pub official: Vec,
- pub indexed: Vec,
+ pub official: Vec,
+ pub indexed: Vec,
}
#[derive(Debug)]
@@ -26,6 +29,18 @@ pub struct Character {
pub image: Images,
pub tags: Vec,
pub traits: Traits,
+ pub extra: Vec,
pub role: Option
}
+
+pub fn get_sections() -> HashMap {
+ let mut s: HashMap = HashMap::new();
+
+ s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?(.*?)\s? | .*?Japanese Name.*?(.*?)\s? | .*?Aliases.*?(.*?)\s? | "#, vec!["romaji", "japanese", "aliases"]));
+ s.insert("misc".into(), Section::new("misc", r#"(?is)Role.*?(.*?)\s? | "#, vec!["role"]));
+ s.insert("image".into(), Section::new("image", r#"(?is).*
View Full Size Image"#, vec!["thumb", "full"]));
+ s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as
.*?(.*?) | "#, vec!["tags_raw"]));
+ s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits
.*?(.*?)
.*?official traits\s?.*?(.*?)
"#, vec!["indexed_raw", "official_raw"]));
+ s
+}
diff --git a/src/config.rs b/src/config.rs
deleted file mode 100644
index f491852..0000000
--- a/src/config.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-extern crate yaml_rust;
-use self::yaml_rust::YamlLoader;
-
-use std::collections::HashMap;
-use std::fs::File;
-use std::io::prelude::*;
-
-pub struct SectionConfig {
- pub pattern: String,
- pub groups: Vec,
-}
-
-pub struct Config {
- pub sections: HashMap,
-}
-
-impl Config {
- pub fn from_file(p: &str, expected: Vec<&'static str>) -> Self {
- let mut f = File::open(p).unwrap();
- let mut buf = String::new();
- f.read_to_string(&mut buf).unwrap();
- let docs = YamlLoader::load_from_str(&buf).unwrap();
-
- let doc = &docs[0];
-
- let mut sections: HashMap = HashMap::new();
- for (name, entry) in doc["sections"].as_hash().unwrap() {
- sections.insert(name.as_str().unwrap().into(),
- SectionConfig {
- pattern: entry["pattern"].as_str().unwrap().into(),
- groups: entry["groups"]
- .as_vec()
- .unwrap()
- .into_iter()
- .map(|v| v.as_str().unwrap().into())
- .collect(),
- });
- }
-
-
- for ex in &expected {
- if !sections.contains_key(&ex.to_string()) {
- panic!("config: section '{}' not found", ex);
- }
- }
-
- {
- let traits = §ions["traits"];
- if !traits.groups.contains(&"indexed_raw".to_string()) {
- panic!("config: no group 'indexed_raw' found in section 'traits'");
- }
- if !traits.groups.contains(&"official_raw".to_string()) {
- panic!("config: no group 'official_raw' found in section 'traits'");
- }
- let tags = §ions["tags"];
- if !tags.groups.contains(&"tags_raw".to_string()) {
- panic!("config: no group 'tags_raw' found in section 'tags'");
- }
- }
-
- Config { sections: sections }
- }
-}
diff --git a/src/dl_list.rs b/src/dl_list.rs
new file mode 100644
index 0000000..979c332
--- /dev/null
+++ b/src/dl_list.rs
@@ -0,0 +1,13 @@
+use super::regex::Regex;
+
+#[derive(Debug)]
+pub struct DLListItem {
+ name: String,
+ value: String,
+}
+
+pub fn parse(s: &str) -> Vec {
+ let reg_list_item = Regex::new(r#"(?is)(.*?).*?(.*?)"#).unwrap();
+
+ reg_list_item.captures_iter(s).map(|c| DLListItem { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect()
+}
diff --git a/src/main.rs b/src/main.rs
index ee8c3eb..e76da66 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
extern crate regex;
+use regex::Regex;
extern crate yaml_rust;
extern crate walkdir;
@@ -11,27 +12,23 @@ use std::path::Path;
use std::collections::HashMap;
mod pre_process;
-use pre_process::Section;
-
-mod config;
-use config::Config;
+mod section;
+use section::Section;
mod character;
use character::{Images, Names, Traits, Character};
mod tags;
-mod traits;
+mod dl_list;
+use dl_list::DLListItem;
fn main() {
let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
let base_path = Path::new(&raw_files);
- let cfg = Config::from_file("config.yml", vec!["name", "image", "misc", "tags", "traits"]);
+ let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+.*?(.*?)
"#).unwrap();
- let mut sections: HashMap = HashMap::new();
- for (name, sec) in &cfg.sections {
- sections.insert(name.clone(), Section::new(&name, &sec.pattern, sec.groups.clone()));
- }
+ let mut sections: HashMap = character::get_sections();
for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
let mut f = File::open(entry.path()).expect("could not open file");
@@ -43,7 +40,14 @@ fn main() {
let buf = pre_process::strip_irrelevant_content(&buf);
- pre_process::split_sections(&buf, &mut sections);
+ section::process(&buf, &mut sections);
+
+ // find optional extra details
+ let mut extra_details: Vec = vec![];
+ let caps = re_extras.captures(&buf);
+ if caps.is_some() {
+ extra_details = dl_list::parse(caps.unwrap().at(1).unwrap());
+ }
{
let name: &Section = §ions["name".into()];
@@ -53,7 +57,10 @@ fn main() {
name: Names {
romaji: name.data["romaji".into()].clone(),
japanese: name.data["japanese".into()].clone(),
- aliases: name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(),
+ aliases: match name.data["aliases".into()].len() > 0 {
+ true => name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect(),
+ false => vec![]
+ }
},
image: Images {
thumb: image.data["thumb".into()].clone(),
@@ -61,9 +68,10 @@ fn main() {
},
tags: tags::parse(&(§ions["tags".into()] as &Section).data["tags_raw".into()]),
traits: Traits {
- official: traits::parse(&(§ions["traits"] as &Section).data["official_raw".into()]),
- indexed: traits::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]),
+ official: dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]),
+ indexed: dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]),
},
+ extra: extra_details,
role: match misc.data["role".into()].len() > 0 {
true => Some(misc.data["role".into()].clone()),
false => None
diff --git a/src/pre_process.rs b/src/pre_process.rs
index 273562d..877ddc3 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -1,7 +1,3 @@
-use super::regex::Regex;
-
-use std::collections::HashMap;
-
pub fn strip_irrelevant_content(s: &str) -> String {
let mut retn = "";
match s.find(r#""#) {
@@ -16,34 +12,4 @@ pub fn strip_irrelevant_content(s: &str) -> String {
return retn.into();
}
-pub struct Section {
- pub name: String,
- pub re: Regex,
- pub keys: Vec,
- pub data: HashMap,
-}
-impl Section {
- pub fn new(name: &str, re: &str, groups: Vec) -> Self {
- Section {
- name: name.into(),
- re: Regex::new(re).unwrap(),
- keys: groups,
- data: HashMap::new(),
- }
- }
-}
-
-pub fn split_sections(d: &str, s: &mut HashMap) {
- for (_, section) in s {
- for m in section.re.captures_iter(d) {
- assert!(m.len() >= section.keys.len() + 1);
-
- let mut idx = 0;
- for key in §ion.keys {
- section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
- idx += 1;
- }
- }
- }
-}
diff --git a/src/section.rs b/src/section.rs
new file mode 100644
index 0000000..7e492b1
--- /dev/null
+++ b/src/section.rs
@@ -0,0 +1,34 @@
+use super::regex::Regex;
+use std::collections::HashMap;
+
+pub struct Section {
+ pub name: String,
+ pub re: Regex,
+ pub keys: Vec,
+ pub data: HashMap,
+}
+
+impl Section {
+ pub fn new(name: &str, re: &str, groups: Vec<&'static str>) -> Self {
+ Section {
+ name: name.into(),
+ re: Regex::new(re).unwrap(),
+ keys: groups.into_iter().map(|s| s.into()).collect(),
+ data: HashMap::new(),
+ }
+ }
+}
+
+pub fn process(d: &str, s: &mut HashMap) {
+ for (_, section) in s {
+ for m in section.re.captures_iter(d) {
+ assert!(m.len() >= section.keys.len() + 1);
+
+ let mut idx = 0;
+ for key in §ion.keys {
+ section.data.insert(key.clone(), m.at(idx + 1).unwrap().into());
+ idx += 1;
+ }
+ }
+ }
+}
diff --git a/src/traits.rs b/src/traits.rs
deleted file mode 100644
index db1b2eb..0000000
--- a/src/traits.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-use super::regex::Regex;
-
-#[derive(Debug)]
-pub struct Trait {
- name: String,
- value: String,
-}
-
-pub fn parse(s: &str) -> Vec {
- let reg_trait = Regex::new(r#"(?is)(.*?).*?(.*?)"#).unwrap();
-
- reg_trait.captures_iter(s).map(|c| Trait { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect()
-}
--
cgit v0.10.1