diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/main.rs | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7cf9df9 --- /dev/null +++ b/src/main.rs | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | #[macro_use] extern crate log; | ||
| 2 | extern crate env_logger; | ||
| 3 | |||
| 4 | extern crate regex; | ||
| 5 | use regex::Regex; | ||
| 6 | |||
| 7 | use std::path::{Path, PathBuf}; | ||
| 8 | use std::fs::File; | ||
| 9 | use std::io::prelude::*; | ||
| 10 | use std::collections::BTreeMap; | ||
| 11 | use std::env; | ||
| 12 | use std::str::FromStr; | ||
| 13 | use std::sync::{Arc, Mutex}; | ||
| 14 | use std::sync::atomic::{AtomicUsize, Ordering}; | ||
| 15 | use std::thread; | ||
| 16 | |||
| 17 | static MAX_THREADS: u32 = 8; | ||
| 18 | |||
| 19 | fn main() { | ||
| 20 | env_logger::init().unwrap(); | ||
| 21 | |||
| 22 | let bp = env::var("HTML_FILES").expect("HTML_FILES not set"); | ||
| 23 | let p = Path::new(&bp); | ||
| 24 | if !p.exists() { | ||
| 25 | panic!("no HTML_FILES directory"); | ||
| 26 | } | ||
| 27 | |||
| 28 | let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap(); | ||
| 29 | let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap(); | ||
| 30 | let ws = Regex::new(r"\s").unwrap(); | ||
| 31 | let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new())); | ||
| 32 | let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![])); | ||
| 33 | let mut threads_active = Arc::new(AtomicUsize::new(0)); | ||
| 34 | |||
| 35 | let dir = p.read_dir().expect("could not read html directory"); | ||
| 36 | { | ||
| 37 | let mut files = files.lock().unwrap(); | ||
| 38 | for f in dir { | ||
| 39 | files.push(f.unwrap().path().to_path_buf()); | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | for i in 1..MAX_THREADS { | ||
| 44 | let mut animes = animes.clone(); | ||
| 45 | let mut files = files.clone(); | ||
| 46 | let re_assignment_section = re_assignment_section.clone(); | ||
| 47 | let re_anime_link = re_anime_link.clone(); | ||
| 48 | let ws = ws.clone(); | ||
| 49 | let threads_active = threads_active.clone(); | ||
| 50 | threads_active.fetch_add(1, Ordering::SeqCst); | ||
| 51 | |||
| 52 | thread::spawn(move || { | ||
| 53 | loop { | ||
| 54 | let mut f: PathBuf; | ||
| 55 | { | ||
| 56 | f = match files.lock().unwrap().pop() { | ||
| 57 | None => { | ||
| 58 | debug!("thread finished"); | ||
| 59 | threads_active.fetch_sub(1, Ordering::SeqCst); | ||
| 60 | return; | ||
| 61 | }, | ||
| 62 | Some(s) => s | ||
| 63 | }; | ||
| 64 | } | ||
| 65 | |||
| 66 | let mut fh = File::open(&f).expect("could not open file"); | ||
| 67 | let mut buf = String::new(); | ||
| 68 | if let Err(_) = fh.read_to_string(&mut buf) { | ||
| 69 | error!("INVALID {:?}", f); | ||
| 70 | continue; | ||
| 71 | }; | ||
| 72 | buf = ws.replace_all(&buf, " "); | ||
| 73 | |||
| 74 | let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap(); | ||
| 75 | |||
| 76 | let mut animes = animes.lock().unwrap(); | ||
| 77 | for anime in re_anime_link.captures_iter(§ion) { | ||
| 78 | debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap()); | ||
| 79 | animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into()); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | }); | ||
| 83 | } | ||
| 84 | |||
| 85 | while threads_active.load(Ordering::SeqCst) != 0 {} | ||
| 86 | |||
| 87 | let mut animes = animes.lock().unwrap(); | ||
| 88 | println!("found {} series. writing series.txt", animes.len()); | ||
| 89 | |||
| 90 | let mut f = File::create("series.txt").expect("could not create series.txt"); | ||
| 91 | for (k, v) in animes.iter() { | ||
| 92 | f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed"); | ||
| 93 | } | ||
| 94 | } | ||
