use std::io::BufRead; use anyhow::{anyhow, Result}; use kinode_process_lib::{ http::{client::send_request_await_response, Method}, println, }; use mime::Mime; use serde::{Deserialize, Serialize}; use url::Url; use crate::UIReq; #[derive(Deserialize, Serialize, Debug)] pub enum ScrapeRes { Image(String), HTML(String), } pub fn scrape(url: &str) -> Result { let url = Url::parse(url)?; let mut headers = std::collections::HashMap::new(); headers.insert( "User-Agent".to_string(), "facebookexternalhit/1.1".to_string(), // "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), ); let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?; let h = res.headers().get("content-type"); match h { None => { let b = res.body().to_vec(); let text = String::from_utf8(b)?; Ok(ScrapeRes::HTML(text)) } Some(val) => { let str = val.to_str()?; if str.starts_with("image") { Ok(ScrapeRes::Image(url.to_string())) } else { let b = res.body().to_vec(); let text = String::from_utf8(b)?; Ok(ScrapeRes::HTML(text)) } } } // let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?; } pub fn proxy(url: &str) -> Result> { let url = Url::parse(url)?; let mut headers = std::collections::HashMap::new(); headers.insert( "User-Agent".to_string(), "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), ); let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?; let b = res.body().to_vec(); Ok(b) // let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?; } pub fn run(req: UIReq) -> Result<(Mime, Vec)> { let url = Url::parse(&req.url)?; let body: Vec = match req.body { None => vec![], Some(s) => s.as_bytes().to_vec(), }; let mut headers = req.headers.clone(); headers.insert( "User-Agent".to_string(), "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(), ); // println!("running req {:?} {:?}", url, headers); let res = send_request_await_response(req.method, url.clone(), Some(req.headers), 5000, body)?; let h = res.headers(); println!("res headers {:?}", h); let content_type: Mime = res .headers() .get("content-type") .and_then(|ct| ct.to_str().ok()) .and_then(|ct| ct.parse::().ok()) .ok_or_else(|| anyhow!("invalid content type"))?; println!( "fucking mime {:?} {:?}", content_type.type_(), content_type.subtype() ); let b = res.body().to_vec(); Ok((content_type, b)) // let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?; }