aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan <jan@ruken.pw>2016-09-25 20:28:51 (UTC)
committerjan <jan@ruken.pw>2016-09-25 20:28:51 (UTC)
commit265ea9e217f15ef387a5580d9b21a1914308389c (patch)
treea53a6c98e5e302e7712b55c03d5bdd11bf497471
initial commit
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock109
-rw-r--r--Cargo.toml8
-rw-r--r--src/character.rs3
-rw-r--r--src/main.rs26
-rw-r--r--src/pre_process.rs13
6 files changed, 161 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7ab9f5b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
1target
2characters \ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..335938c
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,109 @@
1[root]
2name = "acd_character_parser"
3version = "0.1.0"
4dependencies = [
5 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
6 "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
7]
8
9[[package]]
10name = "aho-corasick"
11version = "0.5.3"
12source = "registry+https://github.com/rust-lang/crates.io-index"
13dependencies = [
14 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
15]
16
17[[package]]
18name = "kernel32-sys"
19version = "0.2.2"
20source = "registry+https://github.com/rust-lang/crates.io-index"
21dependencies = [
22 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
23 "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
24]
25
26[[package]]
27name = "libc"
28version = "0.2.16"
29source = "registry+https://github.com/rust-lang/crates.io-index"
30
31[[package]]
32name = "memchr"
33version = "0.1.11"
34source = "registry+https://github.com/rust-lang/crates.io-index"
35dependencies = [
36 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
37]
38
39[[package]]
40name = "regex"
41version = "0.1.77"
42source = "registry+https://github.com/rust-lang/crates.io-index"
43dependencies = [
44 "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
45 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
46 "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
47 "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
48 "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
49]
50
51[[package]]
52name = "regex-syntax"
53version = "0.3.5"
54source = "registry+https://github.com/rust-lang/crates.io-index"
55
56[[package]]
57name = "thread-id"
58version = "2.0.0"
59source = "registry+https://github.com/rust-lang/crates.io-index"
60dependencies = [
61 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
62 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
63]
64
65[[package]]
66name = "thread_local"
67version = "0.2.7"
68source = "registry+https://github.com/rust-lang/crates.io-index"
69dependencies = [
70 "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
71]
72
73[[package]]
74name = "utf8-ranges"
75version = "0.1.3"
76source = "registry+https://github.com/rust-lang/crates.io-index"
77
78[[package]]
79name = "walkdir"
80version = "0.1.8"
81source = "registry+https://github.com/rust-lang/crates.io-index"
82dependencies = [
83 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
84 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
85]
86
87[[package]]
88name = "winapi"
89version = "0.2.8"
90source = "registry+https://github.com/rust-lang/crates.io-index"
91
92[[package]]
93name = "winapi-build"
94version = "0.1.1"
95source = "registry+https://github.com/rust-lang/crates.io-index"
96
97[metadata]
98"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
99"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
100"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
101"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
102"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
103"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
104"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
105"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
106"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
107"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
108"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
109"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..540d3a1
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
1[package]
2name = "acd_character_parser"
3version = "0.1.0"
4authors = ["jan <jan@ruken.pw>"]
5
6[dependencies]
7regex = "0.1"
8walkdir = "0.1" \ No newline at end of file
diff --git a/src/character.rs b/src/character.rs
new file mode 100644
index 0000000..9c548c1
--- /dev/null
+++ b/src/character.rs
@@ -0,0 +1,3 @@
1pub struct Character {
2
3} \ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..ca9b1c5
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,26 @@
1extern crate regex;
2use regex::Regex;
3
4extern crate walkdir;
5use walkdir::WalkDir;
6
7use std::io::prelude::*;
8use std::fs::File;
9use std::env;
10use std::path::{Path, PathBuf};
11
12mod pre_process;
13
14fn main() {
15 let raw_files = env::var("RAW_FILES").unwrap_or("S:\\grilist\\acd\\acd_character_parser\\characters\\".into());
16 let base_path = Path::new(&raw_files);
17
18 for entry in WalkDir::new(base_path).min_depth(1).into_iter().filter_map(|e| e.ok()) {
19 println!("{:?}", entry.path());
20 let mut f = File::open(entry.path()).expect("could not open file");
21 let mut buf = String::new();
22 f.read_to_string(&mut buf).unwrap();
23
24 let buf = pre_process::strip_irrelevant_content(&buf);
25 }
26}
diff --git a/src/pre_process.rs b/src/pre_process.rs
new file mode 100644
index 0000000..aa55bd8
--- /dev/null
+++ b/src/pre_process.rs
@@ -0,0 +1,13 @@
1pub fn strip_irrelevant_content(s: &str) -> String {
2 let mut retn = "";
3 match s.find(r#"<div class=profile id=profile>"#) {
4 Some(pos) => retn = &s[pos..],
5 None => (),
6 };
7
8 match s.find(r#"<INPUT style="font-size: 2em;" TYPE=SUBMIT NAME="votes" VALUE="Cast Votes">"#) {
9 Some(pos) => retn = &s[..pos],
10 None => (),
11 };
12 return retn.into();
13} \ No newline at end of file