aboutsummaryrefslogtreecommitdiff
path: root/src/main.rs
blob: f15a3dd5ae2f4cb5fa25295beb41586c46772939 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#![feature(proc_macro)]
#[macro_use]
extern crate serde_derive;
extern crate serde_json;

extern crate regex;

extern crate walkdir;
use walkdir::{WalkDir, DirEntry};

use std::io::prelude::*;
use std::fs::File;
use std::env;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use std::time;

mod pre_process;
mod section;

mod character;
use character::Character;
mod series;
use series::Series;

mod tags;
mod dl_list;
mod tiles;

static MAX_THREADS: u32 = 12;

fn main() {
    let raw_files = env::var("RAW_FILES").unwrap_or("characters".into());
    let out_files = env::var("OUT").unwrap_or("json".into());
    let base_path = Path::new(&raw_files);

    let active_threads = Arc::new(AtomicUsize::new(0));


    let files: Arc<Mutex<Vec<DirEntry>>> = Arc::new(Mutex::new(WalkDir::new(base_path)
                                                                   .min_depth(1)
                                                                   .into_iter()
                                                                   .filter_map(|e| e.ok())
                                                                   .collect()));

    for i in 0..MAX_THREADS {
        let files = files.clone();
        let out_files = out_files.clone();
        active_threads.fetch_add(1, Ordering::SeqCst);
        let active_threads = active_threads.clone();
        thread::spawn(move || {
            let out_path = Path::new(&out_files);
            loop {
                let entry: Option<DirEntry>;
                {
                    entry = files.lock().unwrap().pop();

                    if entry.is_none() {
                        break;
                    }
                }
                let entry = entry.unwrap();

                let mut f = File::open(entry.path()).expect("could not open file");
                let mut buf = String::new();
                if let Err(_) = f.read_to_string(&mut buf) {
                    println!("invalid file: {}", entry.path().to_str().unwrap());
                    continue;
                }

                let buf = pre_process::strip_irrelevant_content(&buf);

                println!("{:?}", entry.path());
                let mut char = Series::new();
                char.parse(&buf);

                let json = serde_json::to_string(&char).unwrap();

                let out_file = out_path.join(entry.file_name()
                                                  .to_str()
                                                  .unwrap()
                                                  .replace("html", "json"));
                let mut o = File::create(&out_file).unwrap();
                o.write_all(json.as_bytes()).unwrap();
                println!("{:?}", out_file);
            }
            println!("thread {} finished", i);
            active_threads.fetch_sub(1, Ordering::SeqCst);
        });
    }

    loop {
        if active_threads.load(Ordering::SeqCst) == 0 {
            println!("all threads finished.");
            break;
        }
        thread::sleep(time::Duration::from_millis(100));
    }
}