LPVC545KD6R5KNOTDPFDOD5FVA3KFBZARKVOLLXJXW3YK3RMNZKQC YCWYAX6K2DJKT7FO4IAYL6HJOIJLYFKAPGLFJ5XMYSYAS42LP3FQC C376NCOVRBWVVPEIM5GQ6Z3LI4PCXPIQQTLI3GBQWAABXCKDOWQAC HMOBTVJ4FEPZWMUV2JDPZYH3EFCW6ED5M6KKNKQJQZVKTDAKTVFQC PQ4BG3ZJU5SY6XQDJ3SURLAOWGXGXLONIRZNUJXZVMHLYXWOORXQC RNW6D77774CYWWM7JIFXI5TGKBOU6ADJIEZB5N2FXGDCTLUXEQZQC KUANIPWFDXDFKJ2LH4FQ6APYOPLNYJ5LVGLSBSC75WUUZPFHILCAC for initial in 'A'..='Z' {let mut page = 0;loop {let results =fetch_name_page(&client, initial, page).await.unwrap();for (name, gender) in &results.names {if filter.allow(gender) {tx.send(name.clone()).await.unwrap();}
let client = reqwest::Client::builder().cookie_store(true).user_agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:95.0) Gecko/20100101 Firefox/95.0").build().unwrap();let mut fetch = Fetch::start(&client);loop {dbg!(fetch.current_letter, &fetch.form);let (delay, page) = match fetch.next().await {Err(err) => {dbg!(err);(120, Vec::with_capacity(0))
async fn fetch_name_page(client: &reqwest::Client,letter: char,page: usize,) -> Result<ResultPage, Box<dyn std::error::Error>> {let page_header = format!("+Baby+Names+starting+with+{}", letter);let starts = String::from(letter);let mut form = vec![("advanced", "1"),("starts", starts.borrow()),("end", ""),("meaning", ""),("origin", ""),("nat", ""),("startswith", ""),("endswith", ""),("gender", ""),("cat", ""),("syl", ""),("page_header", page_header.borrow()),];let offset;match page {0 => {form.extend([("offset", "66"), ("offset", "66"), ("Next", "Previous")].iter(),);
impl<'a> Fetch<'a> {fn start(client: &'a reqwest::Client) -> Self {Self {client,current_letter: 'A',url: String::from("https://babynames.com/names/A"),form: None,
_ => {offset = format!("{}", (page - 1) * 66);form.extend([("offset", offset.borrow()),("offset", offset.borrow()),("Next", "Next"),].iter(),);
let response = match &self.form {None => self.client.get(&self.url),Some(fields) => self.client.post(&self.url).form(fields),
StatusError::ensure_success(response.status())?;let text = response.text().await?;Ok(tokio::task::spawn_blocking(move || {let doc = scraper::Html::parse_document(text.borrow());let next_button = scraper::Selector::parse("input.next-btn").unwrap();let mut next_button = doc.select(&next_button);let has_next = match next_button.next() {None => false,Some(button) => match button.value().attr("type").map(str::trim) {Some("hidden") => true,_ => false,},};let name_selector =scraper::Selector::parse("ul.searchresults a").unwrap();let names = doc.select(&name_selector).map(|item| {let gender = match item.value().attr("class") {Some("M") => Gender::Masculine,Some("F") => Gender::Feminine,_ => Gender::Neutral,
StatusError::ensure_success(response.status())?;let text = response.text().await?;let (names, next_url, form) = tokio::task::spawn_blocking(move || {let doc = scraper::Html::parse_document(text.borrow());let next_button =scraper::Selector::parse("input.next-btn").unwrap();let mut next_button = doc.select(&next_button);let has_next = match next_button.next() {None => false,Some(_) => true,};let name_selector =scraper::Selector::parse("ul.searchresults a").unwrap();let names = doc.select(&name_selector).map(|item| {let gender = match item.value().attr("class") {Some("M") => Gender::Masculine,Some("F") => Gender::Feminine,_ => Gender::Neutral,};let name: String = item.text().map(|s| s.trim()).collect();(name, gender)}).collect();let stepper =scraper::Selector::parse("div.next-previous form").unwrap();match doc.select(&stepper).next() {Some(stepper) => {let action = stepper.value().attr("action");let form = scraper::Selector::parse("input").unwrap();let form = stepper.select(&form).filter_map(|input| {let input = input.value();if input.attr("type") == Some("submit")&& input.attr("class") != Some("next-btn"){None} else {Some((input.attr("name").map(String::from)?,input.attr("value").map(String::from)?,))}}).collect();if has_next {(names, action.map(|url| {if url.starts_with("http") {String::from(url)} else {format!("https://babynames.com{}", url)}}), Some(form))} else {(names, None, None)}}None => (names, None, None),}}).await?;match form {None => {self.current_letter = unsafe {char::from_u32_unchecked(self.current_letter as u32 + 1)
let name: String = item.text().map(|s| s.trim()).collect();(name, gender)}).collect();ResultPage { names, has_next }}).await?)
self.form = None;}Some(form) => {self.form = Some(form);}}self.url = match next_url {None => {format!("https://babynames.com/names/{}", self.current_letter)}Some(url) => url,};Ok(Some(names))}
name = "cookie"version = "0.15.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "d5f1c7727e460397e56abc4bddc1d49e07a1ad78fc98eb2e1c8f032a58a2f80d"dependencies = ["percent-encoding","time","version_check",][[package]]name = "cookie_store"version = "0.15.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "b3f7034c0932dc36f5bd8ec37368d971346809435824f277cb3b8299fc56167c"dependencies = ["cookie","idna","log","publicsuffix","serde","serde_json","time","url",][[package]]
name = "psl-types"version = "2.0.10"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "e8eda7c62d9ecaafdf8b62374c006de0adf61666ae96a96ba74a37134aa4e470"[[package]]name = "publicsuffix"version = "2.1.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "292972edad6bbecc137ab84c5e36421a4a6c979ea31d3cc73540dd04315b33e1"dependencies = ["byteorder","hashbrown","idna","psl-types",][[package]]
name = "standback"version = "0.2.17"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "e113fb6f3de07a243d434a56ec6f186dfd51cb08448239fe7bcae73f87ff28ff"dependencies = ["version_check",][[package]]name = "stdweb"version = "0.4.20"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "d022496b16281348b52d0e30ae99e01a73d737b2f45d38fed4edf79f9325a1d5"dependencies = ["discard","rustc_version 0.2.3","stdweb-derive","stdweb-internal-macros","stdweb-internal-runtime","wasm-bindgen",][[package]]name = "stdweb-derive"version = "0.5.3"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "c87a60a40fccc84bef0652345bbbbbe20a605bf5d0ce81719fc476f5c03b50ef"dependencies = ["proc-macro2","quote","serde","serde_derive","syn",][[package]]name = "stdweb-internal-macros"version = "0.2.9"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "58fa5ff6ad0d98d1ffa8cb115892b6e69d67799f6763e162a1c9db421dc22e11"dependencies = ["base-x","proc-macro2","quote","serde","serde_derive","serde_json","sha1","syn",][[package]]name = "stdweb-internal-runtime"version = "0.1.5"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"[[package]]
[[package]]name = "time"version = "0.2.27"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "4752a97f8eebd6854ff91f1c1824cd6160626ac4bd44287f7f4ea2035a02a242"dependencies = ["const_fn","libc","standback","stdweb","time-macros","version_check","winapi",][[package]]name = "time-macros"version = "0.1.1"source = "registry+https://github.com/rust-lang/crates.io-index"checksum = "957e9c6e26f12cb6d0dd7fc776bb67a706312e7299aed74c8dd5b17ebb27e2f1"dependencies = ["proc-macro-hack","time-macros-impl",]