1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#[macro_use] extern crate log;
extern crate env_logger;
extern crate regex;
use regex::Regex;
use std::path::{Path, PathBuf};
use std::fs::File;
use std::io::prelude::*;
use std::collections::BTreeMap;
use std::env;
use std::str::FromStr;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
static MAX_THREADS: u32 = 8;
fn main() {
env_logger::init().unwrap();
let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
let p = Path::new(&bp);
if !p.exists() {
panic!("no HTML_FILES directory");
}
let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap();
let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap();
let ws = Regex::new(r"\s").unwrap();
let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new()));
let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![]));
let mut threads_active = Arc::new(AtomicUsize::new(0));
let dir = p.read_dir().expect("could not read html directory");
{
let mut files = files.lock().unwrap();
for f in dir {
files.push(f.unwrap().path().to_path_buf());
}
}
for i in 1..MAX_THREADS {
let mut animes = animes.clone();
let mut files = files.clone();
let re_assignment_section = re_assignment_section.clone();
let re_anime_link = re_anime_link.clone();
let ws = ws.clone();
let threads_active = threads_active.clone();
threads_active.fetch_add(1, Ordering::SeqCst);
thread::spawn(move || {
loop {
let mut f: PathBuf;
{
f = match files.lock().unwrap().pop() {
None => {
debug!("thread finished");
threads_active.fetch_sub(1, Ordering::SeqCst);
return;
},
Some(s) => s
};
}
let mut fh = File::open(&f).expect("could not open file");
let mut buf = String::new();
if let Err(_) = fh.read_to_string(&mut buf) {
error!("INVALID {:?}", f);
continue;
};
buf = ws.replace_all(&buf, " ");
let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();
let mut animes = animes.lock().unwrap();
for anime in re_anime_link.captures_iter(§ion) {
debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
}
}
});
}
while threads_active.load(Ordering::SeqCst) != 0 {}
let mut animes = animes.lock().unwrap();
println!("found {} series. writing series.txt", animes.len());
let mut f = File::create("series.txt").expect("could not create series.txt");
for (k, v) in animes.iter() {
f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
}
}
|