#[macro_use] extern crate log; extern crate env_logger; extern crate regex; use regex::Regex; use std::path::{Path, PathBuf}; use std::fs::File; use std::io::prelude::*; use std::collections::BTreeMap; use std::env; use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::thread; static MAX_THREADS: u32 = 8; fn main() { env_logger::init().unwrap(); let bp = env::var("HTML_FILES").expect("HTML_FILES not set"); let p = Path::new(&bp); if !p.exists() { panic!("no HTML_FILES directory"); } let re_assignment_section = Regex::new(r#"(?mi)appears in the following

\s*(.*?)\s*\s*?"#).unwrap(); let re_anime_link = Regex::new(r#"
(.*?)\s*?
"#).unwrap(); let ws = Regex::new(r"\s").unwrap(); let mut animes: Arc>> = Arc::new(Mutex::new(BTreeMap::new())); let mut files: Arc>> = Arc::new(Mutex::new(vec![])); let mut threads_active = Arc::new(AtomicUsize::new(0)); let dir = p.read_dir().expect("could not read html directory"); { let mut files = files.lock().unwrap(); for f in dir { files.push(f.unwrap().path().to_path_buf()); } } for i in 1..MAX_THREADS { let mut animes = animes.clone(); let mut files = files.clone(); let re_assignment_section = re_assignment_section.clone(); let re_anime_link = re_anime_link.clone(); let ws = ws.clone(); let threads_active = threads_active.clone(); threads_active.fetch_add(1, Ordering::SeqCst); thread::spawn(move || { loop { let mut f: PathBuf; { f = match files.lock().unwrap().pop() { None => { debug!("thread finished"); threads_active.fetch_sub(1, Ordering::SeqCst); return; }, Some(s) => s }; } let mut fh = File::open(&f).expect("could not open file"); let mut buf = String::new(); if let Err(_) = fh.read_to_string(&mut buf) { error!("INVALID {:?}", f); continue; }; buf = ws.replace_all(&buf, " "); let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap(); let mut animes = animes.lock().unwrap(); for anime in re_anime_link.captures_iter(§ion) { debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap()); animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into()); } } }); } while threads_active.load(Ordering::SeqCst) != 0 {} let mut animes = animes.lock().unwrap(); println!("found {} series. writing series.txt", animes.len()); let mut f = File::create("series.txt").expect("could not create series.txt"); for (k, v) in animes.iter() { f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed"); } }