extern crate hyper; use hyper::client::Client; use hyper::header::UserAgent; use hyper::status::StatusCode; #[macro_use] extern crate log; extern crate env_logger; extern crate rand; use rand::{ thread_rng, Rng}; use std::fs; use std::fs::File; use std::sync::Arc; use std::ops::Deref; use std::io::prelude::*; use std::env; use std::thread; use std::time::Duration; use std::str::FromStr; use std::path::Path; static MAX_CHARS: u32 = 77784; static BASE_URL: &'static str = "http://www.animecharactersdatabase.com/character.php?id="; fn get_next(base_path: &str) -> u32 { let base = Path::new(base_path); let mut num: u32 = thread_rng().gen_range(1, MAX_CHARS); while base.join(format!("{}.html", num)).exists() { num = thread_rng().gen_range(1, MAX_CHARS); } return num; } fn download(client: &Client, base_path: &str, char: u32) { debug!("downloading character {}", char); let mut res = client.get(&format!("{}{}", BASE_URL, char)) .header(UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14915".into())) .send() .expect("could not send request"); if res.status != StatusCode::Ok { panic!("invalid status code {}", res.status); } debug!("got response"); let mut buf = String::new(); res.read_to_string(&mut buf).expect("could not read response"); // v I don't know whether that works anymore if buf.find("IP ban").is_some() { panic!("WE ARE BANNED"); } let mut f = File::create(Path::new(base_path).join(format!("{}.html", char))).expect("could not create file"); f.write_all(buf.as_bytes()).expect("could not write response to file"); } fn main() { let timeout = u64::from_str(&(env::var("TIMEOUT").expect("TIMEOUT env var not set"))).expect("invalid TIMEOUT env var"); let base_path = env::var("OUT").expect("OUT not set"); env_logger::init().unwrap(); fs::create_dir_all("html").ok(); let client = Arc::new(Client::new()); loop { info!("getting next character"); download(client.clone().deref(), &base_path, get_next(&base_path)); thread::sleep(Duration::from_secs(timeout)); } }