diff options
| -rw-r--r-- | Cargo.lock | 61 | ||||
| -rw-r--r-- | Cargo.toml | 1 | ||||
| -rw-r--r-- | config.yml | 25 | ||||
| -rw-r--r-- | src/main.rs | 6 | ||||
| -rw-r--r-- | src/series.rs | 63 |
5 files changed, 82 insertions, 74 deletions
| @@ -3,11 +3,10 @@ name = "acd_character_parser" | |||
| 3 | version = "0.1.0" | 3 | version = "0.1.0" |
| 4 | dependencies = [ | 4 | dependencies = [ |
| 5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | 5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", |
| 6 | "serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 6 | "serde 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
| 7 | "serde_derive 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 7 | "serde_derive 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
| 8 | "serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 8 | "serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", |
| 9 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", | 9 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", |
| 10 | "yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 11 | ] | 10 | ] |
| 12 | 11 | ||
| 13 | [[package]] | 12 | [[package]] |
| @@ -57,7 +56,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 57 | 56 | ||
| 58 | [[package]] | 57 | [[package]] |
| 59 | name = "quote" | 58 | name = "quote" |
| 60 | version = "0.2.0" | 59 | version = "0.3.0" |
| 61 | source = "registry+https://github.com/rust-lang/crates.io-index" | 60 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 62 | 61 | ||
| 63 | [[package]] | 62 | [[package]] |
| @@ -79,33 +78,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
| 79 | 78 | ||
| 80 | [[package]] | 79 | [[package]] |
| 81 | name = "serde" | 80 | name = "serde" |
| 82 | version = "0.8.10" | 81 | version = "0.8.12" |
| 83 | source = "registry+https://github.com/rust-lang/crates.io-index" | 82 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 84 | 83 | ||
| 85 | [[package]] | 84 | [[package]] |
| 86 | name = "serde_codegen" | 85 | name = "serde_codegen" |
| 87 | version = "0.8.10" | 86 | version = "0.8.12" |
| 88 | source = "registry+https://github.com/rust-lang/crates.io-index" | 87 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 89 | dependencies = [ | 88 | dependencies = [ |
| 90 | "quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", | 89 | "quote 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
| 91 | "serde_codegen_internals 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", | 90 | "serde_codegen_internals 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", |
| 92 | "syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 91 | "syn 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", |
| 93 | ] | 92 | ] |
| 94 | 93 | ||
| 95 | [[package]] | 94 | [[package]] |
| 96 | name = "serde_codegen_internals" | 95 | name = "serde_codegen_internals" |
| 97 | version = "0.9.0" | 96 | version = "0.10.0" |
| 98 | source = "registry+https://github.com/rust-lang/crates.io-index" | 97 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 99 | dependencies = [ | 98 | dependencies = [ |
| 100 | "syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 99 | "syn 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", |
| 101 | ] | 100 | ] |
| 102 | 101 | ||
| 103 | [[package]] | 102 | [[package]] |
| 104 | name = "serde_derive" | 103 | name = "serde_derive" |
| 105 | version = "0.8.10" | 104 | version = "0.8.12" |
| 106 | source = "registry+https://github.com/rust-lang/crates.io-index" | 105 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 107 | dependencies = [ | 106 | dependencies = [ |
| 108 | "serde_codegen 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 107 | "serde_codegen 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
| 109 | ] | 108 | ] |
| 110 | 109 | ||
| 111 | [[package]] | 110 | [[package]] |
| @@ -116,15 +115,15 @@ dependencies = [ | |||
| 116 | "dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | 115 | "dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", |
| 117 | "itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", | 116 | "itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", |
| 118 | "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", | 117 | "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", |
| 119 | "serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 118 | "serde 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
| 120 | ] | 119 | ] |
| 121 | 120 | ||
| 122 | [[package]] | 121 | [[package]] |
| 123 | name = "syn" | 122 | name = "syn" |
| 124 | version = "0.8.2" | 123 | version = "0.9.0" |
| 125 | source = "registry+https://github.com/rust-lang/crates.io-index" | 124 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 126 | dependencies = [ | 125 | dependencies = [ |
| 127 | "quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", | 126 | "quote 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
| 128 | "unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", | 127 | "unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", |
| 129 | ] | 128 | ] |
| 130 | 129 | ||
| @@ -174,33 +173,3 @@ name = "winapi-build" | |||
| 174 | version = "0.1.1" | 173 | version = "0.1.1" |
| 175 | source = "registry+https://github.com/rust-lang/crates.io-index" | 174 | source = "registry+https://github.com/rust-lang/crates.io-index" |
| 176 | 175 | ||
| 177 | [[package]] | ||
| 178 | name = "yaml-rust" | ||
| 179 | version = "0.3.3" | ||
| 180 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 181 | |||
| 182 | [metadata] | ||
| 183 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||
| 184 | "checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d" | ||
| 185 | "checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1" | ||
| 186 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||
| 187 | "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" | ||
| 188 | "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" | ||
| 189 | "checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c" | ||
| 190 | "checksum quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "502983ae4337dd7e30130ea1405d4bfc90424d8a38133baa68340b86f340dbfb" | ||
| 191 | "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" | ||
| 192 | "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" | ||
| 193 | "checksum serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "7a1687e7258b35aa6117cb52d65aa8f289d544fdad0c7a31822973b4465e4066" | ||
| 194 | "checksum serde_codegen 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "44d2c5df2af5b1e603d911d3c4f30febc2f0f25426ef9f468ba256c965f5afe8" | ||
| 195 | "checksum serde_codegen_internals 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fb3bda24f9eee7446793d209f7edac5400f85f002bff1c3d59ec678c728c9e" | ||
| 196 | "checksum serde_derive 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "658f0eea61a92a8574de65f05b84e210e08d5de5943fdc9a3d87ac17553803b9" | ||
| 197 | "checksum serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e5b3bb42fa42265df8a1822b3db2090bc8f9e17e8142599c76a5b854bc4e7b5b" | ||
| 198 | "checksum syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "666432ba8bf7a7bd46ec84b7c71739cc4836b5357aa3b76e0ea6da15f01f6220" | ||
| 199 | "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" | ||
| 200 | "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" | ||
| 201 | "checksum unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "36dff09cafb4ec7c8cf0023eb0b686cb6ce65499116a12201c9e11840ca01beb" | ||
| 202 | "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" | ||
| 203 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" | ||
| 204 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | ||
| 205 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | ||
| 206 | "checksum yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ebfe12f475ad59be6178ebf004d51e682022496535994f8d23fd7ed31084598c" | ||
| @@ -6,7 +6,6 @@ authors = ["jan <jan@ruken.pw>"] | |||
| 6 | [dependencies] | 6 | [dependencies] |
| 7 | regex = "0.1" | 7 | regex = "0.1" |
| 8 | walkdir = "0.1" | 8 | walkdir = "0.1" |
| 9 | yaml-rust = "0.3" | ||
| 10 | 9 | ||
| 11 | serde = "0.8" | 10 | serde = "0.8" |
| 12 | serde_derive = "0.8" | 11 | serde_derive = "0.8" |
diff --git a/config.yml b/config.yml deleted file mode 100644 index 313d8e2..0000000 --- a/config.yml +++ /dev/null | |||
| @@ -1,25 +0,0 @@ | |||
| 1 | sections: | ||
| 2 | name: | ||
| 3 | pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD> | ||
| 4 | groups: | ||
| 5 | - romaji | ||
| 6 | - japanese | ||
| 7 | - aliases | ||
| 8 | misc: | ||
| 9 | pattern: (?is)Role</TH>.*?<TD>(.*?)\s?</TD> | ||
| 10 | groups: | ||
| 11 | - role | ||
| 12 | image: | ||
| 13 | pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image | ||
| 14 | groups: | ||
| 15 | - thumb | ||
| 16 | - full | ||
| 17 | tags: | ||
| 18 | pattern: (?is)tagged as</P>.*?<TH>(.*?)</TH> | ||
| 19 | groups: | ||
| 20 | - tags_raw | ||
| 21 | traits: | ||
| 22 | pattern: (?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl> | ||
| 23 | groups: | ||
| 24 | - indexed_raw | ||
| 25 | - official_raw | ||
diff --git a/src/main.rs b/src/main.rs index 2b96024..7481376 100644 --- a/src/main.rs +++ b/src/main.rs | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #![feature(rustc_macro)] | 1 | #![feature(proc_macro)] |
| 2 | #[macro_use] extern crate serde_derive; | 2 | #[macro_use] extern crate serde_derive; |
| 3 | extern crate serde_json; | 3 | extern crate serde_json; |
| 4 | 4 | ||
| @@ -22,6 +22,8 @@ mod section; | |||
| 22 | 22 | ||
| 23 | mod character; | 23 | mod character; |
| 24 | use character::Character; | 24 | use character::Character; |
| 25 | mod series; | ||
| 26 | use series::Series; | ||
| 25 | 27 | ||
| 26 | mod tags; | 28 | mod tags; |
| 27 | mod dl_list; | 29 | mod dl_list; |
| @@ -66,7 +68,7 @@ fn main() { | |||
| 66 | 68 | ||
| 67 | let buf = pre_process::strip_irrelevant_content(&buf); | 69 | let buf = pre_process::strip_irrelevant_content(&buf); |
| 68 | 70 | ||
| 69 | let mut char = Character::new(); | 71 | let mut char = Series::new(); |
| 70 | char.parse(&buf); | 72 | char.parse(&buf); |
| 71 | 73 | ||
| 72 | let json = serde_json::to_string(&char).unwrap(); | 74 | let json = serde_json::to_string(&char).unwrap(); |
diff --git a/src/series.rs b/src/series.rs new file mode 100644 index 0000000..d111fb8 --- /dev/null +++ b/src/series.rs | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | use super::tags; | ||
| 2 | use super::tags::Tag; | ||
| 3 | use super::dl_list; | ||
| 4 | use super::dl_list::DLListItem; | ||
| 5 | use super::section; | ||
| 6 | use super::section::Section; | ||
| 7 | use super::regex::Regex; | ||
| 8 | use super::tiles; | ||
| 9 | |||
| 10 | use std::collections::HashMap; | ||
| 11 | |||
| 12 | #[derive(Debug, Serialize)] | ||
| 13 | pub struct Names { | ||
| 14 | pub english: String, | ||
| 15 | pub aliases: String, | ||
| 16 | pub romaji: String, | ||
| 17 | pub furigana: String, | ||
| 18 | pub japanese: String, | ||
| 19 | } | ||
| 20 | |||
| 21 | #[derive(Debug, Serialize)] | ||
| 22 | pub struct Series { | ||
| 23 | pub name: Names, | ||
| 24 | } | ||
| 25 | |||
| 26 | impl Names { | ||
| 27 | pub fn new() -> Self { | ||
| 28 | Names { | ||
| 29 | String::new(), String::new(), String::new(), String::new(), String::new() | ||
| 30 | } | ||
| 31 | } | ||
| 32 | } | ||
| 33 | |||
| 34 | impl Series { | ||
| 35 | pub fn new() -> Self { | ||
| 36 | Series { | ||
| 37 | name: Names::new() | ||
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | pub fn parse(&mut self, buf: &str) { | ||
| 42 | let mut sections = get_sections(); | ||
| 43 | section::process(&buf, &mut sections); | ||
| 44 | |||
| 45 | { | ||
| 46 | let name: &Section = §ions["name".into()]; | ||
| 47 | |||
| 48 | self.name.english = name["english".into()]; | ||
| 49 | self.name.aliases = name["aliases".into()]; | ||
| 50 | self.name.romaji = name["romaji".into()]; | ||
| 51 | self.name.furigana = name["furigana".into()]; | ||
| 52 | self.name.japanese = name["japanese".into()]; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | fn get_sections() -> HashMap<String, Section> { | ||
| 58 | let mut s: HashMap<String, Section> = HashMap::new(); | ||
| 59 | |||
| 60 | s.insert("name".into(), Section::new("name", r#"(?is)English Title.*?<TD>(.*?)</TD>.*?Aliases.*?<TD>(.*?)?</TD>.*?Romaji Title.*?<TD.*?>(.*?)</TD>.*?Furigana Title.*?<TD.*?>(.*?)</TD>.*?Japanese Title.*?<TD.*?>(.*?)</TD>"#, vec!["english", "aliases", "romaji", "furigana", "japanese"])); | ||
| 61 | |||
| 62 | s | ||
| 63 | } | ||
