From 6d7a31be9c6fd413d407334c40d02c008b7334ec Mon Sep 17 00:00:00 2001
From: jan
Date: Sun, 9 Oct 2016 11:28:08 +0200
Subject: fixierung
diff --git a/Cargo.lock b/Cargo.lock
index 3efbb7e..c1f35dc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -173,3 +173,27 @@ name = "winapi-build"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
+[metadata]
+"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
+"checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d"
+"checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
+"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c"
+"checksum quote 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5a5071e94480b788e482dd13592c7221b75da33717fd0fd74aee76a01c40b35b"
+"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
+"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
+"checksum serde 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)" = "f6b2f52afebb4708a21b9d721dbac942daa010f8478c6486daa48d62d5c70508"
+"checksum serde_codegen 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b64ecfe57712501e861b303982b549cfd56aed0ebf58823b36093d1807d69b"
+"checksum serde_codegen_internals 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "318f7e77aa5187391d74aaf4553d2189f56b0ce25e963414c951b97877ffdcec"
+"checksum serde_derive 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)" = "2c7c2b01e85ca1330ba408325f6e85b8b4bf980320b0bd3bc366510e457c443f"
+"checksum serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e5b3bb42fa42265df8a1822b3db2090bc8f9e17e8142599c76a5b854bc4e7b5b"
+"checksum syn 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "96fed4e825d615b0ffd74dabb1dc4c5a078ab44e2c8004798f01510edf6cf515"
+"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "36dff09cafb4ec7c8cf0023eb0b686cb6ce65499116a12201c9e11840ca01beb"
+"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
index 34e4f6e..88c86b4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,7 +6,6 @@ authors = ["jan "]
[dependencies]
regex = "0.1"
walkdir = "0.1"
-
serde = "0.8"
serde_derive = "0.8"
serde_json = "0.8"
diff --git a/src/character.rs b/src/character.rs
index 0630a45..f4686cf 100644
--- a/src/character.rs
+++ b/src/character.rs
@@ -42,19 +42,29 @@ pub struct Character {
impl Names {
pub fn new() -> Self {
- Names { romaji: String::new(), japanese: String::new(), aliases: vec![] }
+ Names {
+ romaji: String::new(),
+ japanese: String::new(),
+ aliases: vec![],
+ }
}
}
impl Images {
pub fn new() -> Self {
- Images { thumb: String::new(), full: String::new() }
+ Images {
+ thumb: String::new(),
+ full: String::new(),
+ }
}
}
impl Traits {
pub fn new() -> Self {
- Traits { official: vec![], indexed: vec![] }
+ Traits {
+ official: vec![],
+ indexed: vec![],
+ }
}
}
@@ -67,13 +77,14 @@ impl Character {
traits: Traits::new(),
assignments: vec![],
extra: vec![],
- role: None
+ role: None,
}
}
pub fn parse(&mut self, buf: &str) {
- let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+.*?(.*?)
"#).unwrap();
- let mut sections = get_sections();
+ let re_extras = Regex::new(r#"(?is)Extra Details \| [0-9]+.*?(.*?)
"#)
+ .unwrap();
+ let mut sections = get_sections();
section::process(&buf, &mut sections);
let caps = re_extras.captures(&buf);
@@ -89,40 +100,57 @@ impl Character {
self.name.romaji = name.data["romaji".into()].clone();
self.name.japanese = name.data["japanese".into()].clone();
-
+
if name.data["aliases".into()].len() > 0 {
- self.name.aliases = name.data["aliases".into()].split(", ").map(|s| s.to_string()).collect();
+ self.name.aliases = name.data["aliases".into()]
+ .split(", ")
+ .map(|s| s.to_string())
+ .collect();
}
-
+
self.image.thumb = image.data["thumb".into()].clone();
self.image.full = image.data["full".into()].clone();
-
+
if tags.data.contains_key("tags_raw".into()) {
self.tags = tags::parse(&tags.data["tags_raw".into()]);
}
- self.traits.official = dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]);
- self.traits.indexed = dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]);
+ self.traits.official =
+ dl_list::parse(&(§ions["traits"] as &Section).data["official_raw".into()]);
+ self.traits.indexed =
+ dl_list::parse(&(§ions["traits"] as &Section).data["indexed_raw".into()]);
self.assignments = tiles::parse_tile_link_ids(&(§ions["assignments"] as &Section).data["raw".into()], "series");
-
+
if misc.data["role".into()].len() > 0 {
self.role = Some(misc.data["role".into()].clone());
}
}
- }
+ }
}
fn get_sections() -> HashMap {
let mut s: HashMap = HashMap::new();
s.insert("name".into(), Section::new("name", r#"(?is)Romaji Name.*?(.*?)\s? | .*?Japanese Name.*?(.*?)\s? | .*?Aliases.*?(.*?)\s? | "#, vec!["romaji", "japanese", "aliases"]));
- s.insert("misc".into(), Section::new("misc", r#"(?is)Role.*?(.*?)\s? | "#, vec!["role"]));
+ s.insert("misc".into(),
+ Section::new("misc",
+ r#"(?is)Role.*?(.*?)\s? | "#,
+ vec!["role"]));
s.insert("image".into(), Section::new("image", r#"(?is).*
View Full Size Image"#, vec!["thumb", "full"]));
- s.insert("tags".into(), Section::new("tags", r#"(?is)tagged as
.*?(.*?) | "#, vec!["tags_raw"]));
+ s.insert("tags".into(),
+ Section::new("tags",
+ r#"(?is)tagged as
.*?(.*?) | "#,
+ vec!["tags_raw"]));
s.insert("traits".into(), Section::new("traits", r#"(?is)indexed traits.*?(.*?)
.*?official traits\s?.*?(.*?)
"#, vec!["indexed_raw", "official_raw"]));
- s.insert("assignments".into(), Section::new("assignments", r#"(?is)appears in the following(.*?)"#, vec!["raw"]));
- s.insert("chars_similar_traits".into(), Section::new("assignments", r#"(?is)with Similar Traits(.*?)"#, vec!["raw"]));
+ s.insert("assignments".into(),
+ Section::new("assignments",
+ r#"(?is)appears in the following(.*?)"#,
+ vec!["raw"]));
+ s.insert("chars_similar_traits".into(),
+ Section::new("assignments",
+ r#"(?is)with Similar Traits(.*?)"#,
+ vec!["raw"]));
s
}
diff --git a/src/dl_list.rs b/src/dl_list.rs
index 3327388..17d49fc 100644
--- a/src/dl_list.rs
+++ b/src/dl_list.rs
@@ -9,5 +9,12 @@ pub struct DLListItem {
pub fn parse(s: &str) -> Vec {
let reg_list_item = Regex::new(r#"(?is)(.*?).*?(.*?)"#).unwrap();
- reg_list_item.captures_iter(s).map(|c| DLListItem { name: c.at(1).unwrap().into(), value: c.at(2).unwrap().into() }).collect()
+ reg_list_item.captures_iter(s)
+ .map(|c| {
+ DLListItem {
+ name: c.at(1).unwrap().into(),
+ value: c.at(2).unwrap().into(),
+ }
+ })
+ .collect()
}
diff --git a/src/main.rs b/src/main.rs
index 7481376..f15a3dd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,9 +1,9 @@
#![feature(proc_macro)]
-#[macro_use] extern crate serde_derive;
+#[macro_use]
+extern crate serde_derive;
extern crate serde_json;
extern crate regex;
-extern crate yaml_rust;
extern crate walkdir;
use walkdir::{WalkDir, DirEntry};
@@ -38,8 +38,12 @@ fn main() {
let active_threads = Arc::new(AtomicUsize::new(0));
-
- let files: Arc>> = Arc::new(Mutex::new(WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()).collect()));
+
+ let files: Arc>> = Arc::new(Mutex::new(WalkDir::new(base_path)
+ .min_depth(1)
+ .into_iter()
+ .filter_map(|e| e.ok())
+ .collect()));
for i in 0..MAX_THREADS {
let files = files.clone();
@@ -58,7 +62,7 @@ fn main() {
}
}
let entry = entry.unwrap();
-
+
let mut f = File::open(entry.path()).expect("could not open file");
let mut buf = String::new();
if let Err(_) = f.read_to_string(&mut buf) {
@@ -68,12 +72,16 @@ fn main() {
let buf = pre_process::strip_irrelevant_content(&buf);
+ println!("{:?}", entry.path());
let mut char = Series::new();
char.parse(&buf);
let json = serde_json::to_string(&char).unwrap();
- let out_file = out_path.join(entry.file_name().to_str().unwrap().replace("html", "json"));
+ let out_file = out_path.join(entry.file_name()
+ .to_str()
+ .unwrap()
+ .replace("html", "json"));
let mut o = File::create(&out_file).unwrap();
o.write_all(json.as_bytes()).unwrap();
println!("{:?}", out_file);
diff --git a/src/pre_process.rs b/src/pre_process.rs
index 877ddc3..c68528f 100644
--- a/src/pre_process.rs
+++ b/src/pre_process.rs
@@ -1,15 +1,13 @@
pub fn strip_irrelevant_content(s: &str) -> String {
let mut retn = "";
- match s.find(r#""#) {
+ match s.find(r#"
"#) {
Some(pos) => retn = &s[pos..],
None => (),
};
- match s.find(r#""#) {
+ match s.find(r#"Characters"#) {
Some(pos) => retn = &s[..pos],
None => (),
};
return retn.into();
}
-
-
diff --git a/src/section.rs b/src/section.rs
index 7e492b1..48f237d 100644
--- a/src/section.rs
+++ b/src/section.rs
@@ -1,6 +1,7 @@
use super::regex::Regex;
use std::collections::HashMap;
+#[derive(Debug)]
pub struct Section {
pub name: String,
pub re: Regex,
diff --git a/src/series.rs b/src/series.rs
index d111fb8..3037e6a 100644
--- a/src/series.rs
+++ b/src/series.rs
@@ -21,12 +21,17 @@ pub struct Names {
#[derive(Debug, Serialize)]
pub struct Series {
pub name: Names,
+ pub tags: Vec,
}
impl Names {
pub fn new() -> Self {
Names {
- String::new(), String::new(), String::new(), String::new(), String::new()
+ english: String::new(),
+ aliases: String::new(),
+ romaji: String::new(),
+ furigana: String::new(),
+ japanese: String::new(),
}
}
}
@@ -34,30 +39,40 @@ impl Names {
impl Series {
pub fn new() -> Self {
Series {
- name: Names::new()
+ name: Names::new(),
+ tags: vec![],
}
}
pub fn parse(&mut self, buf: &str) {
- let mut sections = get_sections();
+ let mut sections = get_sections();
section::process(&buf, &mut sections);
+ let re_genre_tags = Regex::new(r#"(?is)Genre Tags.*?>(.*?)"#).unwrap();
+ let re_genre_tag = Regex::new(r#"[0-9]">(.*?)
"#).unwrap();
+
+ for cap in re_genre_tags.captures_iter(&buf) {
+ self.tags = re_genre_tag.captures_iter(cap.at(1).unwrap())
+ .map(|v| v.at(1).unwrap().to_string())
+ .collect();
+ }
+
{
let name: &Section = §ions["name".into()];
- self.name.english = name["english".into()];
- self.name.aliases = name["aliases".into()];
- self.name.romaji = name["romaji".into()];
- self.name.furigana = name["furigana".into()];
- self.name.japanese = name["japanese".into()];
+ self.name.english = name.data["english".into()].to_string();
+ self.name.aliases = name.data["aliases".into()].to_string();
+ self.name.romaji = name.data["romaji".into()].to_string();
+ self.name.furigana = name.data["furigana".into()].to_string();
+ self.name.japanese = name.data["japanese".into()].to_string();
}
- }
+ }
}
fn get_sections() -> HashMap
{
let mut s: HashMap = HashMap::new();
- s.insert("name".into(), Section::new("name", r#"(?is)English Title.*?(.*?) | .*?Aliases.*?(.*?)? | .*?Romaji Title.*?(.*?).*?Furigana Title.*?(.*?).*?Japanese Title.*?(.*?)"#, vec!["english", "aliases", "romaji", "furigana", "japanese"]));
+ s.insert("name".into(), Section::new("name", r#"(?is)English Title.*?(.*?)<.*?Aliases.*? | (.*?)<.*?Romaji Title.*? | (.*?)(.*?)(.*?)"#, vec!["english", "aliases", "romaji", "furigana", "japanese"]));
s
}
diff --git a/src/tags.rs b/src/tags.rs
index 561c54d..63db847 100644
--- a/src/tags.rs
+++ b/src/tags.rs
@@ -11,5 +11,12 @@ pub struct Tag {
pub fn parse(s: &str) -> Vec {
let reg_tag = Regex::new(r#"(?is)(.*?)"#).unwrap();
- reg_tag.captures_iter(s).map(|c| Tag { id: u32::from_str(c.at(1).unwrap()).unwrap(), name: c.at(2).unwrap().into() }).collect()
+ reg_tag.captures_iter(s)
+ .map(|c| {
+ Tag {
+ id: u32::from_str(c.at(1).unwrap()).unwrap(),
+ name: c.at(2).unwrap().into(),
+ }
+ })
+ .collect()
}
--
cgit v0.10.1