From 4ed4b0951982475bfacead6de53c505a2bc45f08 Mon Sep 17 00:00:00 2001
From: jan 
Date: Sun, 18 Sep 2016 16:44:22 +0200
Subject: initial commit
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42df15e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+target
+characters
+series.txt
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..4868267
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,116 @@
+[root]
+name = "acd_anime_export"
+version = "0.1.0"
+dependencies = [
+ "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "log"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "thread-id"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
+"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
+"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
+"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
+"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
+"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..bd253c6
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "acd_series_finder"
+version = "0.1.0"
+authors = ["rknshia"]
+
+[dependencies]
+log = "0.3"
+env_logger = "0.3"
+regex = "0.1"
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..7cf9df9
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,94 @@
+#[macro_use] extern crate log;
+extern crate env_logger;
+
+extern crate regex;
+use regex::Regex;
+
+use std::path::{Path, PathBuf};
+use std::fs::File;
+use std::io::prelude::*;
+use std::collections::BTreeMap;
+use std::env;
+use std::str::FromStr;
+use std::sync::{Arc, Mutex};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread;
+
+static MAX_THREADS: u32 = 8;
+
+fn main() {
+    env_logger::init().unwrap();
+    
+    let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
+    let p = Path::new(&bp);
+    if !p.exists() {
+        panic!("no HTML_FILES directory");
+    }
+    
+    let re_assignment_section = Regex::new(r#"(?mi)appears in the following
\s*(.*?)\s*\s*?"#).unwrap();
+    let re_anime_link = Regex::new(r#""#).unwrap();
+    let ws = Regex::new(r"\s").unwrap();
+    let mut animes: Arc>> = Arc::new(Mutex::new(BTreeMap::new()));
+    let mut files: Arc>> = Arc::new(Mutex::new(vec![]));
+    let mut threads_active = Arc::new(AtomicUsize::new(0));
+    
+    let dir = p.read_dir().expect("could not read html directory");
+    {
+        let mut files = files.lock().unwrap();
+        for f in dir {
+            files.push(f.unwrap().path().to_path_buf());
+        }
+    }
+
+    for i in 1..MAX_THREADS {
+        let mut animes = animes.clone();
+        let mut files = files.clone();
+        let re_assignment_section = re_assignment_section.clone();
+        let re_anime_link = re_anime_link.clone();
+        let ws = ws.clone();
+        let threads_active = threads_active.clone();
+        threads_active.fetch_add(1, Ordering::SeqCst);
+
+        thread::spawn(move || {    
+            loop {
+                let mut f: PathBuf;
+                {
+                    f = match files.lock().unwrap().pop() {
+                        None => {
+                            debug!("thread finished");
+                            threads_active.fetch_sub(1, Ordering::SeqCst);
+                            return;
+                        },
+                        Some(s) => s
+                    };
+                }
+
+                let mut fh = File::open(&f).expect("could not open file");
+                let mut buf = String::new();
+                if let Err(_) = fh.read_to_string(&mut buf) {
+                    error!("INVALID {:?}", f);
+                    continue;
+                };
+                buf = ws.replace_all(&buf, " ");
+
+                let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();
+
+                let mut animes = animes.lock().unwrap();
+                for anime in re_anime_link.captures_iter(§ion) {
+                    debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
+                    animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
+                }
+            }
+        });
+    }
+    
+    while threads_active.load(Ordering::SeqCst) != 0 {}
+    
+    let mut animes = animes.lock().unwrap();
+    println!("found {} series. writing series.txt", animes.len());
+    
+    let mut f = File::create("series.txt").expect("could not create series.txt");
+    for (k, v) in animes.iter() {
+        f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
+    }
+}
-- 
cgit v0.10.1