aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
blob: 7cf9df90a255ce382dc6c83ad22488f98ccf12d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#[macro_use] extern crate log;
extern crate env_logger;

extern crate regex;
use regex::Regex;

use std::path::{Path, PathBuf};
use std::fs::File;
use std::io::prelude::*;
use std::collections::BTreeMap;
use std::env;
use std::str::FromStr;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;

static MAX_THREADS: u32 = 8;

fn main() {
    env_logger::init().unwrap();
    
    let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
    let p = Path::new(&bp);
    if !p.exists() {
        panic!("no HTML_FILES directory");
    }
    
    let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap();
    let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap();
    let ws = Regex::new(r"\s").unwrap();
    let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new()));
    let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![]));
    let mut threads_active = Arc::new(AtomicUsize::new(0));
    
    let dir = p.read_dir().expect("could not read html directory");
    {
        let mut files = files.lock().unwrap();
        for f in dir {
            files.push(f.unwrap().path().to_path_buf());
        }
    }

    for i in 1..MAX_THREADS {
        let mut animes = animes.clone();
        let mut files = files.clone();
        let re_assignment_section = re_assignment_section.clone();
        let re_anime_link = re_anime_link.clone();
        let ws = ws.clone();
        let threads_active = threads_active.clone();
        threads_active.fetch_add(1, Ordering::SeqCst);

        thread::spawn(move || {    
            loop {
                let mut f: PathBuf;
                {
                    f = match files.lock().unwrap().pop() {
                        None => {
                            debug!("thread finished");
                            threads_active.fetch_sub(1, Ordering::SeqCst);
                            return;
                        },
                        Some(s) => s
                    };
                }

                let mut fh = File::open(&f).expect("could not open file");
                let mut buf = String::new();
                if let Err(_) = fh.read_to_string(&mut buf) {
                    error!("INVALID {:?}", f);
                    continue;
                };
                buf = ws.replace_all(&buf, " ");

                let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();

                let mut animes = animes.lock().unwrap();
                for anime in re_anime_link.captures_iter(&section) {
                    debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
                    animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
                }
            }
        });
    }
    
    while threads_active.load(Ordering::SeqCst) != 0 {}
    
    let mut animes = animes.lock().unwrap();
    println!("found {} series. writing series.txt", animes.len());
    
    let mut f = File::create("series.txt").expect("could not create series.txt");
    for (k, v) in animes.iter() {
        f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
    }
}