diff options
-rw-r--r-- | Cargo.lock | 61 | ||||
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | config.yml | 25 | ||||
-rw-r--r-- | src/main.rs | 6 | ||||
-rw-r--r-- | src/series.rs | 63 |
5 files changed, 82 insertions, 74 deletions
@@ -3,11 +3,10 @@ name = "acd_character_parser" | |||
3 | version = "0.1.0" | 3 | version = "0.1.0" |
4 | dependencies = [ | 4 | dependencies = [ |
5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | 5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", |
6 | "serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 6 | "serde 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
7 | "serde_derive 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 7 | "serde_derive 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
8 | "serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 8 | "serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", |
9 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", | 9 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", |
10 | "yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
11 | ] | 10 | ] |
12 | 11 | ||
13 | [[package]] | 12 | [[package]] |
@@ -57,7 +56,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
57 | 56 | ||
58 | [[package]] | 57 | [[package]] |
59 | name = "quote" | 58 | name = "quote" |
60 | version = "0.2.0" | 59 | version = "0.3.0" |
61 | source = "registry+https://github.com/rust-lang/crates.io-index" | 60 | source = "registry+https://github.com/rust-lang/crates.io-index" |
62 | 61 | ||
63 | [[package]] | 62 | [[package]] |
@@ -79,33 +78,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" | |||
79 | 78 | ||
80 | [[package]] | 79 | [[package]] |
81 | name = "serde" | 80 | name = "serde" |
82 | version = "0.8.10" | 81 | version = "0.8.12" |
83 | source = "registry+https://github.com/rust-lang/crates.io-index" | 82 | source = "registry+https://github.com/rust-lang/crates.io-index" |
84 | 83 | ||
85 | [[package]] | 84 | [[package]] |
86 | name = "serde_codegen" | 85 | name = "serde_codegen" |
87 | version = "0.8.10" | 86 | version = "0.8.12" |
88 | source = "registry+https://github.com/rust-lang/crates.io-index" | 87 | source = "registry+https://github.com/rust-lang/crates.io-index" |
89 | dependencies = [ | 88 | dependencies = [ |
90 | "quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", | 89 | "quote 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
91 | "serde_codegen_internals 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", | 90 | "serde_codegen_internals 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", |
92 | "syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 91 | "syn 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", |
93 | ] | 92 | ] |
94 | 93 | ||
95 | [[package]] | 94 | [[package]] |
96 | name = "serde_codegen_internals" | 95 | name = "serde_codegen_internals" |
97 | version = "0.9.0" | 96 | version = "0.10.0" |
98 | source = "registry+https://github.com/rust-lang/crates.io-index" | 97 | source = "registry+https://github.com/rust-lang/crates.io-index" |
99 | dependencies = [ | 98 | dependencies = [ |
100 | "syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", | 99 | "syn 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", |
101 | ] | 100 | ] |
102 | 101 | ||
103 | [[package]] | 102 | [[package]] |
104 | name = "serde_derive" | 103 | name = "serde_derive" |
105 | version = "0.8.10" | 104 | version = "0.8.12" |
106 | source = "registry+https://github.com/rust-lang/crates.io-index" | 105 | source = "registry+https://github.com/rust-lang/crates.io-index" |
107 | dependencies = [ | 106 | dependencies = [ |
108 | "serde_codegen 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 107 | "serde_codegen 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
109 | ] | 108 | ] |
110 | 109 | ||
111 | [[package]] | 110 | [[package]] |
@@ -116,15 +115,15 @@ dependencies = [ | |||
116 | "dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | 115 | "dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", |
117 | "itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", | 116 | "itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", |
118 | "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", | 117 | "num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", |
119 | "serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)", | 118 | "serde 0.8.12 (registry+https://github.com/rust-lang/crates.io-index)", |
120 | ] | 119 | ] |
121 | 120 | ||
122 | [[package]] | 121 | [[package]] |
123 | name = "syn" | 122 | name = "syn" |
124 | version = "0.8.2" | 123 | version = "0.9.0" |
125 | source = "registry+https://github.com/rust-lang/crates.io-index" | 124 | source = "registry+https://github.com/rust-lang/crates.io-index" |
126 | dependencies = [ | 125 | dependencies = [ |
127 | "quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", | 126 | "quote 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", |
128 | "unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", | 127 | "unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)", |
129 | ] | 128 | ] |
130 | 129 | ||
@@ -174,33 +173,3 @@ name = "winapi-build" | |||
174 | version = "0.1.1" | 173 | version = "0.1.1" |
175 | source = "registry+https://github.com/rust-lang/crates.io-index" | 174 | source = "registry+https://github.com/rust-lang/crates.io-index" |
176 | 175 | ||
177 | [[package]] | ||
178 | name = "yaml-rust" | ||
179 | version = "0.3.3" | ||
180 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
181 | |||
182 | [metadata] | ||
183 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||
184 | "checksum dtoa 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0dd841b58510c9618291ffa448da2e4e0f699d984d436122372f446dae62263d" | ||
185 | "checksum itoa 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ae3088ea4baeceb0284ee9eea42f591226e6beaecf65373e41b38d95a1b8e7a1" | ||
186 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||
187 | "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" | ||
188 | "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" | ||
189 | "checksum num-traits 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a16a42856a256b39c6d3484f097f6713e14feacd9bfb02290917904fae46c81c" | ||
190 | "checksum quote 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "502983ae4337dd7e30130ea1405d4bfc90424d8a38133baa68340b86f340dbfb" | ||
191 | "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" | ||
192 | "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" | ||
193 | "checksum serde 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "7a1687e7258b35aa6117cb52d65aa8f289d544fdad0c7a31822973b4465e4066" | ||
194 | "checksum serde_codegen 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "44d2c5df2af5b1e603d911d3c4f30febc2f0f25426ef9f468ba256c965f5afe8" | ||
195 | "checksum serde_codegen_internals 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b7fb3bda24f9eee7446793d209f7edac5400f85f002bff1c3d59ec678c728c9e" | ||
196 | "checksum serde_derive 0.8.10 (registry+https://github.com/rust-lang/crates.io-index)" = "658f0eea61a92a8574de65f05b84e210e08d5de5943fdc9a3d87ac17553803b9" | ||
197 | "checksum serde_json 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e5b3bb42fa42265df8a1822b3db2090bc8f9e17e8142599c76a5b854bc4e7b5b" | ||
198 | "checksum syn 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "666432ba8bf7a7bd46ec84b7c71739cc4836b5357aa3b76e0ea6da15f01f6220" | ||
199 | "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" | ||
200 | "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" | ||
201 | "checksum unicode-xid 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "36dff09cafb4ec7c8cf0023eb0b686cb6ce65499116a12201c9e11840ca01beb" | ||
202 | "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" | ||
203 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" | ||
204 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | ||
205 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | ||
206 | "checksum yaml-rust 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ebfe12f475ad59be6178ebf004d51e682022496535994f8d23fd7ed31084598c" | ||
@@ -6,7 +6,6 @@ authors = ["jan <jan@ruken.pw>"] | |||
6 | [dependencies] | 6 | [dependencies] |
7 | regex = "0.1" | 7 | regex = "0.1" |
8 | walkdir = "0.1" | 8 | walkdir = "0.1" |
9 | yaml-rust = "0.3" | ||
10 | 9 | ||
11 | serde = "0.8" | 10 | serde = "0.8" |
12 | serde_derive = "0.8" | 11 | serde_derive = "0.8" |
diff --git a/config.yml b/config.yml deleted file mode 100644 index 313d8e2..0000000 --- a/config.yml +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | sections: | ||
2 | name: | ||
3 | pattern: (?is)Romaji Name.*?<TD>(.*?)\s?</TD>.*?Japanese Name.*?<TD>(.*?)\s?</TD>.*?Aliases.*?<TD>(.*?)\s?</TD> | ||
4 | groups: | ||
5 | - romaji | ||
6 | - japanese | ||
7 | - aliases | ||
8 | misc: | ||
9 | pattern: (?is)Role</TH>.*?<TD>(.*?)\s?</TD> | ||
10 | groups: | ||
11 | - role | ||
12 | image: | ||
13 | pattern: (?is)<H3 id="section99">.*<img src="(.*?)" alt=.*?></a><p><a href="(.*?)">View Full Size Image | ||
14 | groups: | ||
15 | - thumb | ||
16 | - full | ||
17 | tags: | ||
18 | pattern: (?is)tagged as</P>.*?<TH>(.*?)</TH> | ||
19 | groups: | ||
20 | - tags_raw | ||
21 | traits: | ||
22 | pattern: (?is)indexed traits</P>.*?<dl>(.*?)</dl>.*?official traits\s?</P>.*?<dl>(.*?)</dl> | ||
23 | groups: | ||
24 | - indexed_raw | ||
25 | - official_raw | ||
diff --git a/src/main.rs b/src/main.rs index 2b96024..7481376 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,4 +1,4 @@ | |||
1 | #![feature(rustc_macro)] | 1 | #![feature(proc_macro)] |
2 | #[macro_use] extern crate serde_derive; | 2 | #[macro_use] extern crate serde_derive; |
3 | extern crate serde_json; | 3 | extern crate serde_json; |
4 | 4 | ||
@@ -22,6 +22,8 @@ mod section; | |||
22 | 22 | ||
23 | mod character; | 23 | mod character; |
24 | use character::Character; | 24 | use character::Character; |
25 | mod series; | ||
26 | use series::Series; | ||
25 | 27 | ||
26 | mod tags; | 28 | mod tags; |
27 | mod dl_list; | 29 | mod dl_list; |
@@ -66,7 +68,7 @@ fn main() { | |||
66 | 68 | ||
67 | let buf = pre_process::strip_irrelevant_content(&buf); | 69 | let buf = pre_process::strip_irrelevant_content(&buf); |
68 | 70 | ||
69 | let mut char = Character::new(); | 71 | let mut char = Series::new(); |
70 | char.parse(&buf); | 72 | char.parse(&buf); |
71 | 73 | ||
72 | let json = serde_json::to_string(&char).unwrap(); | 74 | let json = serde_json::to_string(&char).unwrap(); |
diff --git a/src/series.rs b/src/series.rs new file mode 100644 index 0000000..d111fb8 --- /dev/null +++ b/src/series.rs | |||
@@ -0,0 +1,63 @@ | |||
1 | use super::tags; | ||
2 | use super::tags::Tag; | ||
3 | use super::dl_list; | ||
4 | use super::dl_list::DLListItem; | ||
5 | use super::section; | ||
6 | use super::section::Section; | ||
7 | use super::regex::Regex; | ||
8 | use super::tiles; | ||
9 | |||
10 | use std::collections::HashMap; | ||
11 | |||
12 | #[derive(Debug, Serialize)] | ||
13 | pub struct Names { | ||
14 | pub english: String, | ||
15 | pub aliases: String, | ||
16 | pub romaji: String, | ||
17 | pub furigana: String, | ||
18 | pub japanese: String, | ||
19 | } | ||
20 | |||
21 | #[derive(Debug, Serialize)] | ||
22 | pub struct Series { | ||
23 | pub name: Names, | ||
24 | } | ||
25 | |||
26 | impl Names { | ||
27 | pub fn new() -> Self { | ||
28 | Names { | ||
29 | String::new(), String::new(), String::new(), String::new(), String::new() | ||
30 | } | ||
31 | } | ||
32 | } | ||
33 | |||
34 | impl Series { | ||
35 | pub fn new() -> Self { | ||
36 | Series { | ||
37 | name: Names::new() | ||
38 | } | ||
39 | } | ||
40 | |||
41 | pub fn parse(&mut self, buf: &str) { | ||
42 | let mut sections = get_sections(); | ||
43 | section::process(&buf, &mut sections); | ||
44 | |||
45 | { | ||
46 | let name: &Section = §ions["name".into()]; | ||
47 | |||
48 | self.name.english = name["english".into()]; | ||
49 | self.name.aliases = name["aliases".into()]; | ||
50 | self.name.romaji = name["romaji".into()]; | ||
51 | self.name.furigana = name["furigana".into()]; | ||
52 | self.name.japanese = name["japanese".into()]; | ||
53 | } | ||
54 | } | ||
55 | } | ||
56 | |||
57 | fn get_sections() -> HashMap<String, Section> { | ||
58 | let mut s: HashMap<String, Section> = HashMap::new(); | ||
59 | |||
60 | s.insert("name".into(), Section::new("name", r#"(?is)English Title.*?<TD>(.*?)</TD>.*?Aliases.*?<TD>(.*?)?</TD>.*?Romaji Title.*?<TD.*?>(.*?)</TD>.*?Furigana Title.*?<TD.*?>(.*?)</TD>.*?Japanese Title.*?<TD.*?>(.*?)</TD>"#, vec!["english", "aliases", "romaji", "furigana", "japanese"])); | ||
61 | |||
62 | s | ||
63 | } | ||