aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs94
1 files changed, 94 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..7cf9df9
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,94 @@
1#[macro_use] extern crate log;
2extern crate env_logger;
3
4extern crate regex;
5use regex::Regex;
6
7use std::path::{Path, PathBuf};
8use std::fs::File;
9use std::io::prelude::*;
10use std::collections::BTreeMap;
11use std::env;
12use std::str::FromStr;
13use std::sync::{Arc, Mutex};
14use std::sync::atomic::{AtomicUsize, Ordering};
15use std::thread;
16
17static MAX_THREADS: u32 = 8;
18
19fn main() {
20 env_logger::init().unwrap();
21
22 let bp = env::var("HTML_FILES").expect("HTML_FILES not set");
23 let p = Path::new(&bp);
24 if !p.exists() {
25 panic!("no HTML_FILES directory");
26 }
27
28 let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap();
29 let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap();
30 let ws = Regex::new(r"\s").unwrap();
31 let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new()));
32 let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![]));
33 let mut threads_active = Arc::new(AtomicUsize::new(0));
34
35 let dir = p.read_dir().expect("could not read html directory");
36 {
37 let mut files = files.lock().unwrap();
38 for f in dir {
39 files.push(f.unwrap().path().to_path_buf());
40 }
41 }
42
43 for i in 1..MAX_THREADS {
44 let mut animes = animes.clone();
45 let mut files = files.clone();
46 let re_assignment_section = re_assignment_section.clone();
47 let re_anime_link = re_anime_link.clone();
48 let ws = ws.clone();
49 let threads_active = threads_active.clone();
50 threads_active.fetch_add(1, Ordering::SeqCst);
51
52 thread::spawn(move || {
53 loop {
54 let mut f: PathBuf;
55 {
56 f = match files.lock().unwrap().pop() {
57 None => {
58 debug!("thread finished");
59 threads_active.fetch_sub(1, Ordering::SeqCst);
60 return;
61 },
62 Some(s) => s
63 };
64 }
65
66 let mut fh = File::open(&f).expect("could not open file");
67 let mut buf = String::new();
68 if let Err(_) = fh.read_to_string(&mut buf) {
69 error!("INVALID {:?}", f);
70 continue;
71 };
72 buf = ws.replace_all(&buf, " ");
73
74 let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap();
75
76 let mut animes = animes.lock().unwrap();
77 for anime in re_anime_link.captures_iter(&section) {
78 debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap());
79 animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into());
80 }
81 }
82 });
83 }
84
85 while threads_active.load(Ordering::SeqCst) != 0 {}
86
87 let mut animes = animes.lock().unwrap();
88 println!("found {} series. writing series.txt", animes.len());
89
90 let mut f = File::create("series.txt").expect("could not create series.txt");
91 for (k, v) in animes.iter() {
92 f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed");
93 }
94}