From 4ed4b0951982475bfacead6de53c505a2bc45f08 Mon Sep 17 00:00:00 2001 From: jan Date: Sun, 18 Sep 2016 16:44:22 +0200 Subject: initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42df15e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +target +characters +series.txt \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4868267 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,116 @@ +[root] +name = "acd_anime_export" +version = "0.1.0" +dependencies = [ + "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "env_logger" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "log" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" +"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" +"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bd253c6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "acd_series_finder" +version = "0.1.0" +authors = ["rknshia"] + +[dependencies] +log = "0.3" +env_logger = "0.3" +regex = "0.1" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7cf9df9 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,94 @@ +#[macro_use] extern crate log; +extern crate env_logger; + +extern crate regex; +use regex::Regex; + +use std::path::{Path, PathBuf}; +use std::fs::File; +use std::io::prelude::*; +use std::collections::BTreeMap; +use std::env; +use std::str::FromStr; +use std::sync::{Arc, Mutex}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::thread; + +static MAX_THREADS: u32 = 8; + +fn main() { + env_logger::init().unwrap(); + + let bp = env::var("HTML_FILES").expect("HTML_FILES not set"); + let p = Path::new(&bp); + if !p.exists() { + panic!("no HTML_FILES directory"); + } + + let re_assignment_section = Regex::new(r#"(?mi)appears in the following

\s*(.*?)\s*\s*?"#).unwrap(); + let re_anime_link = Regex::new(r#"
(.*?)\s*?
"#).unwrap(); + let ws = Regex::new(r"\s").unwrap(); + let mut animes: Arc>> = Arc::new(Mutex::new(BTreeMap::new())); + let mut files: Arc>> = Arc::new(Mutex::new(vec![])); + let mut threads_active = Arc::new(AtomicUsize::new(0)); + + let dir = p.read_dir().expect("could not read html directory"); + { + let mut files = files.lock().unwrap(); + for f in dir { + files.push(f.unwrap().path().to_path_buf()); + } + } + + for i in 1..MAX_THREADS { + let mut animes = animes.clone(); + let mut files = files.clone(); + let re_assignment_section = re_assignment_section.clone(); + let re_anime_link = re_anime_link.clone(); + let ws = ws.clone(); + let threads_active = threads_active.clone(); + threads_active.fetch_add(1, Ordering::SeqCst); + + thread::spawn(move || { + loop { + let mut f: PathBuf; + { + f = match files.lock().unwrap().pop() { + None => { + debug!("thread finished"); + threads_active.fetch_sub(1, Ordering::SeqCst); + return; + }, + Some(s) => s + }; + } + + let mut fh = File::open(&f).expect("could not open file"); + let mut buf = String::new(); + if let Err(_) = fh.read_to_string(&mut buf) { + error!("INVALID {:?}", f); + continue; + }; + buf = ws.replace_all(&buf, " "); + + let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap(); + + let mut animes = animes.lock().unwrap(); + for anime in re_anime_link.captures_iter(§ion) { + debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap()); + animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into()); + } + } + }); + } + + while threads_active.load(Ordering::SeqCst) != 0 {} + + let mut animes = animes.lock().unwrap(); + println!("found {} series. writing series.txt", animes.len()); + + let mut f = File::create("series.txt").expect("could not create series.txt"); + for (k, v) in animes.iter() { + f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed"); + } +} -- cgit v0.10.1