diff options
| author | jan <jan@ruken.pw> | 2016-09-25 20:28:51 (UTC) |
|---|---|---|
| committer | jan <jan@ruken.pw> | 2016-09-25 20:28:51 (UTC) |
| commit | 265ea9e217f15ef387a5580d9b21a1914308389c (patch) | |
| tree | a53a6c98e5e302e7712b55c03d5bdd11bf497471 | |
initial commit
| -rw-r--r-- | .gitignore | 2 | ||||
| -rw-r--r-- | Cargo.lock | 109 | ||||
| -rw-r--r-- | Cargo.toml | 8 | ||||
| -rw-r--r-- | src/character.rs | 3 | ||||
| -rw-r--r-- | src/main.rs | 26 | ||||
| -rw-r--r-- | src/pre_process.rs | 13 |
6 files changed, 161 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ab9f5b --- /dev/null +++ b/.gitignore | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | target | ||
| 2 | characters \ No newline at end of file | ||
diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..335938c --- /dev/null +++ b/Cargo.lock | |||
| @@ -0,0 +1,109 @@ | |||
| 1 | [root] | ||
| 2 | name = "acd_character_parser" | ||
| 3 | version = "0.1.0" | ||
| 4 | dependencies = [ | ||
| 5 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 6 | "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 7 | ] | ||
| 8 | |||
| 9 | [[package]] | ||
| 10 | name = "aho-corasick" | ||
| 11 | version = "0.5.3" | ||
| 12 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 13 | dependencies = [ | ||
| 14 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 15 | ] | ||
| 16 | |||
| 17 | [[package]] | ||
| 18 | name = "kernel32-sys" | ||
| 19 | version = "0.2.2" | ||
| 20 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 21 | dependencies = [ | ||
| 22 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 23 | "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 24 | ] | ||
| 25 | |||
| 26 | [[package]] | ||
| 27 | name = "libc" | ||
| 28 | version = "0.2.16" | ||
| 29 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 30 | |||
| 31 | [[package]] | ||
| 32 | name = "memchr" | ||
| 33 | version = "0.1.11" | ||
| 34 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 35 | dependencies = [ | ||
| 36 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 37 | ] | ||
| 38 | |||
| 39 | [[package]] | ||
| 40 | name = "regex" | ||
| 41 | version = "0.1.77" | ||
| 42 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 43 | dependencies = [ | ||
| 44 | "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 45 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 46 | "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 47 | "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 48 | "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 49 | ] | ||
| 50 | |||
| 51 | [[package]] | ||
| 52 | name = "regex-syntax" | ||
| 53 | version = "0.3.5" | ||
| 54 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 55 | |||
| 56 | [[package]] | ||
| 57 | name = "thread-id" | ||
| 58 | version = "2.0.0" | ||
| 59 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 60 | dependencies = [ | ||
| 61 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 62 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 63 | ] | ||
| 64 | |||
| 65 | [[package]] | ||
| 66 | name = "thread_local" | ||
| 67 | version = "0.2.7" | ||
| 68 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 69 | dependencies = [ | ||
| 70 | "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 71 | ] | ||
| 72 | |||
| 73 | [[package]] | ||
| 74 | name = "utf8-ranges" | ||
| 75 | version = "0.1.3" | ||
| 76 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 77 | |||
| 78 | [[package]] | ||
| 79 | name = "walkdir" | ||
| 80 | version = "0.1.8" | ||
| 81 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 82 | dependencies = [ | ||
| 83 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 84 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
| 85 | ] | ||
| 86 | |||
| 87 | [[package]] | ||
| 88 | name = "winapi" | ||
| 89 | version = "0.2.8" | ||
| 90 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 91 | |||
| 92 | [[package]] | ||
| 93 | name = "winapi-build" | ||
| 94 | version = "0.1.1" | ||
| 95 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| 96 | |||
| 97 | [metadata] | ||
| 98 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||
| 99 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||
| 100 | "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" | ||
| 101 | "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" | ||
| 102 | "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" | ||
| 103 | "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" | ||
| 104 | "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" | ||
| 105 | "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" | ||
| 106 | "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" | ||
| 107 | "checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" | ||
| 108 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | ||
| 109 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | ||
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..540d3a1 --- /dev/null +++ b/Cargo.toml | |||
| @@ -0,0 +1,8 @@ | |||
| 1 | [package] | ||
| 2 | name = "acd_character_parser" | ||
| 3 | version = "0.1.0" | ||
| 4 | authors = ["jan <jan@ruken.pw>"] | ||
| 5 | |||
| 6 | [dependencies] | ||
| 7 | regex = "0.1" | ||
| 8 | walkdir = "0.1" \ No newline at end of file | ||
diff --git a/src/character.rs b/src/character.rs new file mode 100644 index 0000000..9c548c1 --- /dev/null +++ b/src/character.rs | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | pub struct Character { | ||
| 2 | |||
| 3 | } \ No newline at end of file | ||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ca9b1c5 --- /dev/null +++ b/src/main.rs | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | extern crate regex; | ||
| 2 | use regex::Regex; | ||
| 3 | |||
| 4 | extern crate walkdir; | ||
| 5 | use walkdir::WalkDir; | ||
| 6 | |||
| 7 | use std::io::prelude::*; | ||
| 8 | use std::fs::File; | ||
| 9 | use std::env; | ||
| 10 | use std::path::{Path, PathBuf}; | ||
| 11 | |||
| 12 | mod pre_process; | ||
| 13 | |||
| 14 | fn main() { | ||
| 15 | let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into()); | ||
| 16 | let base_path = Path::new(&raw_files); | ||
| 17 | |||
| 18 | for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) { | ||
| 19 | println!("{:?}", entry.path()); | ||
| 20 | let mut f = File::open(entry.path()).expect("could not open file"); | ||
| 21 | let mut buf = String::new(); | ||
| 22 | f.read_to_string(&mut buf).unwrap(); | ||
| 23 | |||
| 24 | let buf = pre_process::strip_irrelevant_content(&buf); | ||
| 25 | } | ||
| 26 | } | ||
diff --git a/src/pre_process.rs b/src/pre_process.rs new file mode 100644 index 0000000..aa55bd8 --- /dev/null +++ b/src/pre_process.rs | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | pub fn strip_irrelevant_content(s: &str) -> String { | ||
| 2 | let mut retn = ""; | ||
| 3 | match s.find(r#"<div class=profile id=profile>"#) { | ||
| 4 | Some(pos) => retn = &s[pos..], | ||
| 5 | None => (), | ||
| 6 | }; | ||
| 7 | |||
| 8 | match s.find(r#"<INPUT style="font-size: 2em;" TYPE=SUBMIT NAME="votes" VALUE="Cast Votes">"#) { | ||
| 9 | Some(pos) => retn = &s[..pos], | ||
| 10 | None => (), | ||
| 11 | }; | ||
| 12 | return retn.into(); | ||
| 13 | } \ No newline at end of file | ||
