blob: f15a3dd5ae2f4cb5fa25295beb41586c46772939 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
#![feature(proc_macro)]
#[macro_use]
extern crate serde_derive;
extern crate serde_json;
extern crate regex;
extern crate walkdir;
use walkdir::{WalkDir, DirEntry};
use std::io::prelude::*;
use std::fs::File;
use std::env;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::time;
mod pre_process;
mod section;
mod character;
use character::Character;
mod series;
use series::Series;
mod tags;
mod dl_list;
mod tiles;
static MAX_THREADS: u32 = 12;
fn main() {
let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
let out_files = env::var("OUT").unwrap_or("json".into());
let base_path = Path::new(&raw_files);
let active_threads = Arc::new(AtomicUsize::new(0));
let files: Arc<Mutex<Vec<DirEntry>>> = Arc::new(Mutex::new(WalkDir::new(base_path)
.min_depth(1)
.into_iter()
.filter_map(|e| e.ok())
.collect()));
for i in 0..MAX_THREADS {
let files = files.clone();
let out_files = out_files.clone();
active_threads.fetch_add(1, Ordering::SeqCst);
let active_threads = active_threads.clone();
thread::spawn(move || {
let out_path = Path::new(&out_files);
loop {
let entry: Option<DirEntry>;
{
entry = files.lock().unwrap().pop();
if entry.is_none() {
break;
}
}
let entry = entry.unwrap();
let mut f = File::open(entry.path()).expect("could not open file");
let mut buf = String::new();
if let Err(_) = f.read_to_string(&mut buf) {
println!("invalid file: {}", entry.path().to_str().unwrap());
continue;
}
let buf = pre_process::strip_irrelevant_content(&buf);
println!("{:?}", entry.path());
let mut char = Series::new();
char.parse(&buf);
let json = serde_json::to_string(&char).unwrap();
let out_file = out_path.join(entry.file_name()
.to_str()
.unwrap()
.replace("html", "json"));
let mut o = File::create(&out_file).unwrap();
o.write_all(json.as_bytes()).unwrap();
println!("{:?}", out_file);
}
println!("thread {} finished", i);
active_threads.fetch_sub(1, Ordering::SeqCst);
});
}
loop {
if active_threads.load(Ordering::SeqCst) == 0 {
println!("all threads finished.");
break;
}
thread::sleep(time::Duration::from_millis(100));
}
}
|