diff options
author | jan <jan@ruken.pw> | 2016-09-25 20:28:51 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-25 20:28:51 (UTC) |
commit | 265ea9e217f15ef387a5580d9b21a1914308389c (patch) | |
tree | a53a6c98e5e302e7712b55c03d5bdd11bf497471 |
initial commit
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | Cargo.lock | 109 | ||||
-rw-r--r-- | Cargo.toml | 8 | ||||
-rw-r--r-- | src/character.rs | 3 | ||||
-rw-r--r-- | src/main.rs | 26 | ||||
-rw-r--r-- | src/pre_process.rs | 13 |
6 files changed, 161 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ab9f5b --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,2 @@ | |||
1 | target | ||
2 | characters \ No newline at end of file | ||
diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..335938c --- /dev/null +++ b/Cargo.lock | |||
@@ -0,0 +1,109 @@ | |||
1 | [root] | ||
2 | name = "acd_character_parser" | ||
3 | version = "0.1.0" | ||
4 | dependencies = [ | ||
5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | ||
6 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
7 | ] | ||
8 | |||
9 | [[package]] | ||
10 | name = "aho-corasick" | ||
11 | version = "0.5.3" | ||
12 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
13 | dependencies = [ | ||
14 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
15 | ] | ||
16 | |||
17 | [[package]] | ||
18 | name = "kernel32-sys" | ||
19 | version = "0.2.2" | ||
20 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
21 | dependencies = [ | ||
22 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
23 | "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", | ||
24 | ] | ||
25 | |||
26 | [[package]] | ||
27 | name = "libc" | ||
28 | version = "0.2.16" | ||
29 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
30 | |||
31 | [[package]] | ||
32 | name = "memchr" | ||
33 | version = "0.1.11" | ||
34 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
35 | dependencies = [ | ||
36 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
37 | ] | ||
38 | |||
39 | [[package]] | ||
40 | name = "regex" | ||
41 | version = "0.1.77" | ||
42 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
43 | dependencies = [ | ||
44 | "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
45 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
46 | "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||
47 | "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", | ||
48 | "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
49 | ] | ||
50 | |||
51 | [[package]] | ||
52 | name = "regex-syntax" | ||
53 | version = "0.3.5" | ||
54 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
55 | |||
56 | [[package]] | ||
57 | name = "thread-id" | ||
58 | version = "2.0.0" | ||
59 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
60 | dependencies = [ | ||
61 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||
62 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
63 | ] | ||
64 | |||
65 | [[package]] | ||
66 | name = "thread_local" | ||
67 | version = "0.2.7" | ||
68 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
69 | dependencies = [ | ||
70 | "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||
71 | ] | ||
72 | |||
73 | [[package]] | ||
74 | name = "utf8-ranges" | ||
75 | version = "0.1.3" | ||
76 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
77 | |||
78 | [[package]] | ||
79 | name = "walkdir" | ||
80 | version = "0.1.8" | ||
81 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
82 | dependencies = [ | ||
83 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||
84 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
85 | ] | ||
86 | |||
87 | [[package]] | ||
88 | name = "winapi" | ||
89 | version = "0.2.8" | ||
90 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
91 | |||
92 | [[package]] | ||
93 | name = "winapi-build" | ||
94 | version = "0.1.1" | ||
95 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
96 | |||
97 | [metadata] | ||
98 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||
99 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||
100 | "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" | ||
101 | "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" | ||
102 | "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" | ||
103 | "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" | ||
104 | "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" | ||
105 | "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" | ||
106 | "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" | ||
107 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" | ||
108 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | ||
109 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | ||
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..540d3a1 --- /dev/null +++ b/Cargo.toml | |||
@@ -0,0 +1,8 @@ | |||
1 | [package] | ||
2 | name = "acd_character_parser" | ||
3 | version = "0.1.0" | ||
4 | authors = ["jan <jan@ruken.pw>"] | ||
5 | |||
6 | [dependencies] | ||
7 | regex = "0.1" | ||
8 | walkdir = "0.1" \ No newline at end of file | ||
diff --git a/src/character.rs b/src/character.rs new file mode 100644 index 0000000..9c548c1 --- /dev/null +++ b/src/character.rs | |||
@@ -0,0 +1,3 @@ | |||
1 | pub struct Character { | ||
2 | |||
3 | } \ No newline at end of file | ||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ca9b1c5 --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,26 @@ | |||
1 | extern crate regex; | ||
2 | use regex::Regex; | ||
3 | |||
4 | extern crate walkdir; | ||
5 | use walkdir::WalkDir; | ||
6 | |||
7 | use std::io::prelude::*; | ||
8 | use std::fs::File; | ||
9 | use std::env; | ||
10 | use std::path::{Path, PathBuf}; | ||
11 | |||
12 | mod pre_process; | ||
13 | |||
14 | fn main() { | ||
15 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | ||
16 | let base_path = Path::new(&raw_files); | ||
17 | |||
18 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | ||
19 | println!("{:?}", entry.path()); | ||
20 | let mut f = File::open(entry.path()).expect("could not open file"); | ||
21 | let mut buf = String::new(); | ||
22 | f.read_to_string(&mut buf).unwrap(); | ||
23 | |||
24 | let buf = pre_process::strip_irrelevant_content(&buf); | ||
25 | } | ||
26 | } | ||
diff --git a/src/pre_process.rs b/src/pre_process.rs new file mode 100644 index 0000000..aa55bd8 --- /dev/null +++ b/src/pre_process.rs | |||
@@ -0,0 +1,13 @@ | |||
1 | pub fn strip_irrelevant_content(s: &str) -> String { | ||
2 | let mut retn = ""; | ||
3 | match s.find(r#"<div class=profile id=profile>"#) { | ||
4 | Some(pos) => retn = &s[pos..], | ||
5 | None => (), | ||
6 | }; | ||
7 | |||
8 | match s.find(r#"<INPUT style="font-size: 2em;" TYPE=SUBMIT NAME="votes" VALUE="Cast Votes">"#) { | ||
9 | Some(pos) => retn = &s[..pos], | ||
10 | None => (), | ||
11 | }; | ||
12 | return retn.into(); | ||
13 | } \ No newline at end of file | ||