diff options
author | jan <jan@ruken.pw> | 2016-09-18 14:44:22 (UTC) |
---|---|---|
committer | jan <jan@ruken.pw> | 2016-09-18 14:44:22 (UTC) |
commit | 4ed4b0951982475bfacead6de53c505a2bc45f08 (patch) | |
tree | 0c63b22bb0732e8c6219a51e8738c71c8fa23a59 /src |
Diffstat (limited to 'src')
-rw-r--r-- | src/main.rs | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..7cf9df9 --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,94 @@ | |||
1 | #[macro_use] extern crate log; | ||
2 | extern crate env_logger; | ||
3 | |||
4 | extern crate regex; | ||
5 | use regex::Regex; | ||
6 | |||
7 | use std::path::{Path, PathBuf}; | ||
8 | use std::fs::File; | ||
9 | use std::io::prelude::*; | ||
10 | use std::collections::BTreeMap; | ||
11 | use std::env; | ||
12 | use std::str::FromStr; | ||
13 | use std::sync::{Arc, Mutex}; | ||
14 | use std::sync::atomic::{AtomicUsize, Ordering}; | ||
15 | use std::thread; | ||
16 | |||
17 | static MAX_THREADS: u32 = 8; | ||
18 | |||
19 | fn main() { | ||
20 | env_logger::init().unwrap(); | ||
21 | |||
22 | let bp = env::var("HTML_FILES").expect("HTML_FILES not set"); | ||
23 | let p = Path::new(&bp); | ||
24 | if !p.exists() { | ||
25 | panic!("no HTML_FILES directory"); | ||
26 | } | ||
27 | |||
28 | let re_assignment_section = Regex::new(r#"(?mi)appears in the following</P>\s*(.*?)\s*</UL>\s*?</DIV>"#).unwrap(); | ||
29 | let re_anime_link = Regex::new(r#"<div class=tile3top><A href="series\.php\?id=([0-9]+)">(.*?)</A>\s*?</div>"#).unwrap(); | ||
30 | let ws = Regex::new(r"\s").unwrap(); | ||
31 | let mut animes: Arc<Mutex<BTreeMap<u32, String>>> = Arc::new(Mutex::new(BTreeMap::new())); | ||
32 | let mut files: Arc<Mutex<Vec<PathBuf>>> = Arc::new(Mutex::new(vec![])); | ||
33 | let mut threads_active = Arc::new(AtomicUsize::new(0)); | ||
34 | |||
35 | let dir = p.read_dir().expect("could not read html directory"); | ||
36 | { | ||
37 | let mut files = files.lock().unwrap(); | ||
38 | for f in dir { | ||
39 | files.push(f.unwrap().path().to_path_buf()); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | for i in 1..MAX_THREADS { | ||
44 | let mut animes = animes.clone(); | ||
45 | let mut files = files.clone(); | ||
46 | let re_assignment_section = re_assignment_section.clone(); | ||
47 | let re_anime_link = re_anime_link.clone(); | ||
48 | let ws = ws.clone(); | ||
49 | let threads_active = threads_active.clone(); | ||
50 | threads_active.fetch_add(1, Ordering::SeqCst); | ||
51 | |||
52 | thread::spawn(move || { | ||
53 | loop { | ||
54 | let mut f: PathBuf; | ||
55 | { | ||
56 | f = match files.lock().unwrap().pop() { | ||
57 | None => { | ||
58 | debug!("thread finished"); | ||
59 | threads_active.fetch_sub(1, Ordering::SeqCst); | ||
60 | return; | ||
61 | }, | ||
62 | Some(s) => s | ||
63 | }; | ||
64 | } | ||
65 | |||
66 | let mut fh = File::open(&f).expect("could not open file"); | ||
67 | let mut buf = String::new(); | ||
68 | if let Err(_) = fh.read_to_string(&mut buf) { | ||
69 | error!("INVALID {:?}", f); | ||
70 | continue; | ||
71 | }; | ||
72 | buf = ws.replace_all(&buf, " "); | ||
73 | |||
74 | let section = re_assignment_section.captures(&buf).expect("no assignments for character?").at(1).unwrap(); | ||
75 | |||
76 | let mut animes = animes.lock().unwrap(); | ||
77 | for anime in re_anime_link.captures_iter(§ion) { | ||
78 | debug!("{}: {}", anime.at(1).unwrap(), anime.at(2).unwrap()); | ||
79 | animes.insert(u32::from_str(anime.at(1).unwrap()).unwrap(), anime.at(2).unwrap().into()); | ||
80 | } | ||
81 | } | ||
82 | }); | ||
83 | } | ||
84 | |||
85 | while threads_active.load(Ordering::SeqCst) != 0 {} | ||
86 | |||
87 | let mut animes = animes.lock().unwrap(); | ||
88 | println!("found {} series. writing series.txt", animes.len()); | ||
89 | |||
90 | let mut f = File::create("series.txt").expect("could not create series.txt"); | ||
91 | for (k, v) in animes.iter() { | ||
92 | f.write_all(format!("{:?};;{}\n", k, v).as_bytes()).expect("write failed"); | ||
93 | } | ||
94 | } | ||