From 4ed4b0951982475bfacead6de53c505a2bc45f08 Mon Sep 17 00:00:00 2001
From: jan
Date: Sun, 18 Sep 2016 16:44:22 +0200
Subject: initial commit
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..42df15e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+target
+characters
+series.txt
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..4868267
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,116 @@
+[root]
+name = "acd_anime_export"
+version = "0.1.0"
+dependencies = [
+ "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "kernel32-sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "log"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "memchr"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex"
+version = "0.1.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.3.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "thread-id"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "thread_local"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "utf8-ranges"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi"
+version = "0.2.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "winapi-build"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[metadata]
+"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
+"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
+"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
+"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
+"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
+"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
+"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
+"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
+"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
+"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
+"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
+"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..bd253c6
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "acd_series_finder"
+version = "0.1.0"
+authors = ["rknshia"]
+
+[dependencies]
+log = "0.3"
+env_logger = "0.3"
+regex = "0.1"
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..7cf9df9
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,94 @@
+#[macro_use] extern crate log;
+extern crate env_logger;
+
+extern crate regex;
+use regex::Regex;
+
+use std::path::{Path, PathBuf};
+use std::fs::File;
+use std::io::prelude::*;
+use std::collections::BTreeMap;
+use std::env;
+use std::str::FromStr;
+use std::sync::{Arc, Mutex};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread;
+
+static MAX_THREADS: u32 = 8;
+
+fn main() {
+ env_logger::init().unwrap();
+
+ let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
+ let p = Path::new(&bp);
+ if !p.exists() {
+ panic!("no HTML_FILES directory");
+ }
+
+ let re_assignment_section = Regex::new(r#"(?mi)appears in the following
\s*(.*?)\s*\s*?"#).unwrap();
+ let re_anime_link = Regex::new(r#""#).unwrap();
+ let ws = Regex::new(r"\s").unwrap();
+ let mut animes: Arc>> = Arc::new(Mutex::new(BTreeMap::new()));
+ let mut files: Arc>> = Arc::new(Mutex::new(vec![]));
+ let mut threads_active = Arc::new(AtomicUsize::new(0));
+
+ let dir = p.read_dir().expect("could not read html directory");
+ {
+ let mut files = files.lock().unwrap();
+ for f in dir {
+ files.push(f.unwrap().path().to_path_buf());
+ }
+ }
+
+ for i in 1..MAX_THREADS {
+ let mut animes = animes.clone();
+ let mut files = files.clone();
+ let re_assignment_section = re_assignment_section.clone();
+ let re_anime_link = re_anime_link.clone();
+ let ws = ws.clone();
+ let threads_active = threads_active.clone();
+ threads_active.fetch_add(1, Ordering::SeqCst);
+
+ thread::spawn(move || {
+ loop {
+ let mut f: PathBuf;
+ {
+ f = match files.lock().unwrap().pop() {
+ None => {
+ debug!("thread finished");
+ threads_active.fetch_sub(1, Ordering::SeqCst);
+ return;
+ },
+ Some(s) => s
+ };
+ }
+
+ let mut fh = File::open(&f).expect("could not open file");
+ let mut buf = String::new();
+ if let Err(_) = fh.read_to_string(&mut buf) {
+ error!("INVALID {:?}", f);
+ continue;
+ };
+ buf = ws.replace_all(&buf, " ");
+
+ let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();
+
+ let mut animes = animes.lock().unwrap();
+ for anime in re_anime_link.captures_iter(§ion) {
+ debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
+ animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
+ }
+ }
+ });
+ }
+
+ while threads_active.load(Ordering::SeqCst) != 0 {}
+
+ let mut animes = animes.lock().unwrap();
+ println!("found {} series. writing series.txt", animes.len());
+
+ let mut f = File::create("series.txt").expect("could not create series.txt");
+ for (k, v) in animes.iter() {
+ f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
+ }
+}
--
cgit v0.10.1