diff options
author | jan <jan@ruken.pw> | 2016-09-18 14:44:22 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-18 14:44:22 (UTC) |
commit | 4ed4b0951982475bfacead6de53c505a2bc45f08 (patch) | |
tree | 0c63b22bb0732e8c6219a51e8738c71c8fa23a59 |
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Cargo.lock | 116 | ||||
-rw-r--r-- | Cargo.toml | 9 | ||||
-rw-r--r-- | src/main.rs | 94 |
4 files changed, 222 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42df15e --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1,3 @@ | |||
1 | target | ||
2 | characters | ||
3 | series.txt \ No newline at end of file | ||
diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4868267 --- /dev/null +++ b/Cargo.lock | |||
@@ -0,0 +1,116 @@ | |||
1 | [root] | ||
2 | name = "acd_anime_export" | ||
3 | version = "0.1.0" | ||
4 | dependencies = [ | ||
5 | "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||
6 | "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", | ||
7 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | ||
8 | ] | ||
9 | |||
10 | [[package]] | ||
11 | name = "aho-corasick" | ||
12 | version = "0.5.3" | ||
13 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
14 | dependencies = [ | ||
15 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
16 | ] | ||
17 | |||
18 | [[package]] | ||
19 | name = "env_logger" | ||
20 | version = "0.3.5" | ||
21 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
22 | dependencies = [ | ||
23 | "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", | ||
24 | "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", | ||
25 | ] | ||
26 | |||
27 | [[package]] | ||
28 | name = "kernel32-sys" | ||
29 | version = "0.2.2" | ||
30 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
31 | dependencies = [ | ||
32 | "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", | ||
33 | "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", | ||
34 | ] | ||
35 | |||
36 | [[package]] | ||
37 | name = "libc" | ||
38 | version = "0.2.16" | ||
39 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
40 | |||
41 | [[package]] | ||
42 | name = "log" | ||
43 | version = "0.3.6" | ||
44 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
45 | |||
46 | [[package]] | ||
47 | name = "memchr" | ||
48 | version = "0.1.11" | ||
49 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
50 | dependencies = [ | ||
51 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
52 | ] | ||
53 | |||
54 | [[package]] | ||
55 | name = "regex" | ||
56 | version = "0.1.77" | ||
57 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
58 | dependencies = [ | ||
59 | "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
60 | "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", | ||
61 | "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", | ||
62 | "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", | ||
63 | "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", | ||
64 | ] | ||
65 | |||
66 | [[package]] | ||
67 | name = "regex-syntax" | ||
68 | version = "0.3.5" | ||
69 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
70 | |||
71 | [[package]] | ||
72 | name = "thread-id" | ||
73 | version = "2.0.0" | ||
74 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
75 | dependencies = [ | ||
76 | "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", | ||
77 | "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", | ||
78 | ] | ||
79 | |||
80 | [[package]] | ||
81 | name = "thread_local" | ||
82 | version = "0.2.7" | ||
83 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
84 | dependencies = [ | ||
85 | "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||
86 | ] | ||
87 | |||
88 | [[package]] | ||
89 | name = "utf8-ranges" | ||
90 | version = "0.1.3" | ||
91 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
92 | |||
93 | [[package]] | ||
94 | name = "winapi" | ||
95 | version = "0.2.8" | ||
96 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
97 | |||
98 | [[package]] | ||
99 | name = "winapi-build" | ||
100 | version = "0.1.1" | ||
101 | source = "registry+https://github.com/rust-lang/crates.io-index" | ||
102 | |||
103 | [metadata] | ||
104 | "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" | ||
105 | "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" | ||
106 | "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" | ||
107 | "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" | ||
108 | "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" | ||
109 | "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" | ||
110 | "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" | ||
111 | "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" | ||
112 | "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" | ||
113 | "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" | ||
114 | "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" | ||
115 | "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" | ||
116 | "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" | ||
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bd253c6 --- /dev/null +++ b/Cargo.toml | |||
@@ -0,0 +1,9 @@ | |||
1 | [package] | ||
2 | name = "acd_series_finder" | ||
3 | version = "0.1.0" | ||
4 | authors = ["rknshia"] | ||
5 | |||
6 | [dependencies] | ||
7 | log = "0.3" | ||
8 | env_logger = "0.3" | ||
9 | regex = "0.1" \ No newline at end of file | ||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7cf9df9 --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,94 @@ | |||
1 | #[macro_use] extern crate log; | ||
2 | extern crate env_logger; | ||
3 | |||
4 | extern crate regex; | ||
5 | use regex::Regex; | ||
6 | |||
7 | use std::path::{Path, PathBuf}; | ||
8 | use std::fs::File; | ||
9 | use std::io::prelude::*; | ||
10 | use std::collections::BTreeMap; | ||
11 | use std::env; | ||
12 | use std::str::FromStr; | ||
13 | use std::sync::{Arc, Mutex}; | ||
14 | use std::sync::atomic::{AtomicUsize, Ordering}; | ||
15 | use std::thread; | ||
16 | |||
17 | static MAX_THREADS: u32 = 8; | ||
18 | |||
19 | fn main() { | ||
20 | env_logger::init().unwrap(); | ||
21 | |||
22 | let bp = env::var("HTML_FILES").expect("HTML_FILES not set"); | ||
23 | let p = Path::new(&bp); | ||
24 | if !p.exists() { | ||
25 | panic!("no HTML_FILES directory"); | ||
26 | } | ||
27 | |||
28 | let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap(); | ||
29 | let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap(); | ||
30 | let ws = Regex::new(r"\s").unwrap(); | ||
31 | let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new())); | ||
32 | let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![])); | ||
33 | let mut threads_active = Arc::new(AtomicUsize::new(0)); | ||
34 | |||
35 | let dir = p.read_dir().expect("could not read html directory"); | ||
36 | { | ||
37 | let mut files = files.lock().unwrap(); | ||
38 | for f in dir { | ||
39 | files.push(f.unwrap().path().to_path_buf()); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | for i in 1..MAX_THREADS { | ||
44 | let mut animes = animes.clone(); | ||
45 | let mut files = files.clone(); | ||
46 | let re_assignment_section = re_assignment_section.clone(); | ||
47 | let re_anime_link = re_anime_link.clone(); | ||
48 | let ws = ws.clone(); | ||
49 | let threads_active = threads_active.clone(); | ||
50 | threads_active.fetch_add(1, Ordering::SeqCst); | ||
51 | |||
52 | thread::spawn(move || { | ||
53 | loop { | ||
54 | let mut f: PathBuf; | ||
55 | { | ||
56 | f = match files.lock().unwrap().pop() { | ||
57 | None => { | ||
58 | debug!("thread finished"); | ||
59 | threads_active.fetch_sub(1, Ordering::SeqCst); | ||
60 | return; | ||
61 | }, | ||
62 | Some(s) => s | ||
63 | }; | ||
64 | } | ||
65 | |||
66 | let mut fh = File::open(&f).expect("could not open file"); | ||
67 | let mut buf = String::new(); | ||
68 | if let Err(_) = fh.read_to_string(&mut buf) { | ||
69 | error!("INVALID {:?}", f); | ||
70 | continue; | ||
71 | }; | ||
72 | buf = ws.replace_all(&buf, " "); | ||
73 | |||
74 | let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap(); | ||
75 | |||
76 | let mut animes = animes.lock().unwrap(); | ||
77 | for anime in re_anime_link.captures_iter(§ion) { | ||
78 | debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap()); | ||
79 | animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into()); | ||
80 | } | ||
81 | } | ||
82 | }); | ||
83 | } | ||
84 | |||
85 | while threads_active.load(Ordering::SeqCst) != 0 {} | ||
86 | |||
87 | let mut animes = animes.lock().unwrap(); | ||
88 | println!("found {} series. writing series.txt", animes.len()); | ||
89 | |||
90 | let mut f = File::create("series.txt").expect("could not create series.txt"); | ||
91 | for (k, v) in animes.iter() { | ||
92 | f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed"); | ||
93 | } | ||
94 | } | ||