aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjan <jan@ruken.pw>2016-09-18 14:44:22 (UTC)
committerjan <jan@ruken.pw>2016-09-18 14:44:22 (UTC)
commit4ed4b0951982475bfacead6de53c505a2bc45f08 (patch)
tree0c63b22bb0732e8c6219a51e8738c71c8fa23a59
initial commitHEADmaster
-rw-r--r--.gitignore3
-rw-r--r--Cargo.lock116
-rw-r--r--Cargo.toml9
-rw-r--r--src/main.rs94
4 files changed, 222 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42df15e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
1target
2characters
3series.txt \ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..4868267
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,116 @@
1[root]
2name = "acd_anime_export"
3version = "0.1.0"
4dependencies = [
5 "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
6 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
7 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
8]
9
10[[package]]
11name = "aho-corasick"
12version = "0.5.3"
13source = "registry+https://github.com/rust-lang/crates.io-index"
14dependencies = [
15 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
16]
17
18[[package]]
19name = "env_logger"
20version = "0.3.5"
21source = "registry+https://github.com/rust-lang/crates.io-index"
22dependencies = [
23 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
24 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
25]
26
27[[package]]
28name = "kernel32-sys"
29version = "0.2.2"
30source = "registry+https://github.com/rust-lang/crates.io-index"
31dependencies = [
32 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
33 "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
34]
35
36[[package]]
37name = "libc"
38version = "0.2.16"
39source = "registry+https://github.com/rust-lang/crates.io-index"
40
41[[package]]
42name = "log"
43version = "0.3.6"
44source = "registry+https://github.com/rust-lang/crates.io-index"
45
46[[package]]
47name = "memchr"
48version = "0.1.11"
49source = "registry+https://github.com/rust-lang/crates.io-index"
50dependencies = [
51 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
52]
53
54[[package]]
55name = "regex"
56version = "0.1.77"
57source = "registry+https://github.com/rust-lang/crates.io-index"
58dependencies = [
59 "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
60 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
61 "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
62 "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
63 "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
64]
65
66[[package]]
67name = "regex-syntax"
68version = "0.3.5"
69source = "registry+https://github.com/rust-lang/crates.io-index"
70
71[[package]]
72name = "thread-id"
73version = "2.0.0"
74source = "registry+https://github.com/rust-lang/crates.io-index"
75dependencies = [
76 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
77 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
78]
79
80[[package]]
81name = "thread_local"
82version = "0.2.7"
83source = "registry+https://github.com/rust-lang/crates.io-index"
84dependencies = [
85 "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
86]
87
88[[package]]
89name = "utf8-ranges"
90version = "0.1.3"
91source = "registry+https://github.com/rust-lang/crates.io-index"
92
93[[package]]
94name = "winapi"
95version = "0.2.8"
96source = "registry+https://github.com/rust-lang/crates.io-index"
97
98[[package]]
99name = "winapi-build"
100version = "0.1.1"
101source = "registry+https://github.com/rust-lang/crates.io-index"
102
103[metadata]
104"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
105"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
106"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
107"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
108"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
109"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
110"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
111"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
112"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
113"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
114"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
115"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
116"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..bd253c6
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
1[package]
2name = "acd_series_finder"
3version = "0.1.0"
4authors = ["rknshia"]
5
6[dependencies]
7log = "0.3"
8env_logger = "0.3"
9regex = "0.1" \ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..7cf9df9
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,94 @@
1#[macro_use] extern crate log;
2extern crate env_logger;
3
4extern crate regex;
5use regex::Regex;
6
7use std::path::{Path, PathBuf};
8use std::fs::File;
9use std::io::prelude::*;
10use std::collections::BTreeMap;
11use std::env;
12use std::str::FromStr;
13use std::sync::{Arc, Mutex};
14use std::sync::atomic::{AtomicUsize, Ordering};
15use std::thread;
16
17static MAX_THREADS: u32 = 8;
18
19fn main() {
20 env_logger::init().unwrap();
21
22 let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
23 let p = Path::new(&bp);
24 if !p.exists() {
25 panic!("no HTML_FILES directory");
26 }
27
28 let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap();
29 let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap();
30 let ws = Regex::new(r"\s").unwrap();
31 let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new()));
32 let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![]));
33 let mut threads_active = Arc::new(AtomicUsize::new(0));
34
35 let dir = p.read_dir().expect("could not read html directory");
36 {
37 let mut files = files.lock().unwrap();
38 for f in dir {
39 files.push(f.unwrap().path().to_path_buf());
40 }
41 }
42
43 for i in 1..MAX_THREADS {
44 let mut animes = animes.clone();
45 let mut files = files.clone();
46 let re_assignment_section = re_assignment_section.clone();
47 let re_anime_link = re_anime_link.clone();
48 let ws = ws.clone();
49 let threads_active = threads_active.clone();
50 threads_active.fetch_add(1, Ordering::SeqCst);
51
52 thread::spawn(move || {
53 loop {
54 let mut f: PathBuf;
55 {
56 f = match files.lock().unwrap().pop() {
57 None => {
58 debug!("thread finished");
59 threads_active.fetch_sub(1, Ordering::SeqCst);
60 return;
61 },
62 Some(s) => s
63 };
64 }
65
66 let mut fh = File::open(&f).expect("could not open file");
67 let mut buf = String::new();
68 if let Err(_) = fh.read_to_string(&mut buf) {
69 error!("INVALID {:?}", f);
70 continue;
71 };
72 buf = ws.replace_all(&buf, " ");
73
74 let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();
75
76 let mut animes = animes.lock().unwrap();
77 for anime in re_anime_link.captures_iter(&section) {
78 debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
79 animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
80 }
81 }
82 });
83 }
84
85 while threads_active.load(Ordering::SeqCst) != 0 {}
86
87 let mut animes = animes.lock().unwrap();
88 println!("found {} series. writing series.txt", animes.len());
89
90 let mut f = File::create("series.txt").expect("could not create series.txt");
91 for (k, v) in animes.iter() {
92 f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
93 }
94}