93 lines
3.0 KiB
Rust
93 lines
3.0 KiB
Rust
|
use std::io::BufRead;
|
||
|
|
||
|
use anyhow::{anyhow, Result};
|
||
|
use kinode_process_lib::{
|
||
|
http::{client::send_request_await_response, Method},
|
||
|
println,
|
||
|
};
|
||
|
use mime::Mime;
|
||
|
use serde::{Deserialize, Serialize};
|
||
|
use url::Url;
|
||
|
|
||
|
use crate::UIReq;
|
||
|
|
||
|
#[derive(Deserialize, Serialize, Debug)]
|
||
|
pub enum ScrapeRes {
|
||
|
Image(String),
|
||
|
HTML(String),
|
||
|
}
|
||
|
pub fn scrape(url: &str) -> Result<ScrapeRes> {
|
||
|
let url = Url::parse(url)?;
|
||
|
let mut headers = std::collections::HashMap::new();
|
||
|
headers.insert(
|
||
|
"User-Agent".to_string(),
|
||
|
"facebookexternalhit/1.1".to_string(),
|
||
|
// "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
|
||
|
);
|
||
|
|
||
|
let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?;
|
||
|
let h = res.headers().get("content-type");
|
||
|
match h {
|
||
|
None => {
|
||
|
let b = res.body().to_vec();
|
||
|
let text = String::from_utf8(b)?;
|
||
|
Ok(ScrapeRes::HTML(text))
|
||
|
}
|
||
|
Some(val) => {
|
||
|
let str = val.to_str()?;
|
||
|
if str.starts_with("image") {
|
||
|
Ok(ScrapeRes::Image(url.to_string()))
|
||
|
} else {
|
||
|
let b = res.body().to_vec();
|
||
|
let text = String::from_utf8(b)?;
|
||
|
Ok(ScrapeRes::HTML(text))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
|
||
|
}
|
||
|
pub fn proxy(url: &str) -> Result<Vec<u8>> {
|
||
|
let url = Url::parse(url)?;
|
||
|
let mut headers = std::collections::HashMap::new();
|
||
|
headers.insert(
|
||
|
"User-Agent".to_string(),
|
||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
|
||
|
);
|
||
|
|
||
|
let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?;
|
||
|
let b = res.body().to_vec();
|
||
|
Ok(b)
|
||
|
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
|
||
|
}
|
||
|
pub fn run(req: UIReq) -> Result<(Mime, Vec<u8>)> {
|
||
|
let url = Url::parse(&req.url)?;
|
||
|
let body: Vec<u8> = match req.body {
|
||
|
None => vec![],
|
||
|
Some(s) => s.as_bytes().to_vec(),
|
||
|
};
|
||
|
let mut headers = req.headers.clone();
|
||
|
headers.insert(
|
||
|
"User-Agent".to_string(),
|
||
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
|
||
|
);
|
||
|
// println!("running req {:?} {:?}", url, headers);
|
||
|
|
||
|
let res = send_request_await_response(req.method, url.clone(), Some(req.headers), 5000, body)?;
|
||
|
let h = res.headers();
|
||
|
println!("res headers {:?}", h);
|
||
|
let content_type: Mime = res
|
||
|
.headers()
|
||
|
.get("content-type")
|
||
|
.and_then(|ct| ct.to_str().ok())
|
||
|
.and_then(|ct| ct.parse::<Mime>().ok())
|
||
|
.ok_or_else(|| anyhow!("invalid content type"))?;
|
||
|
println!(
|
||
|
"fucking mime {:?} {:?}",
|
||
|
content_type.type_(),
|
||
|
content_type.subtype()
|
||
|
);
|
||
|
let b = res.body().to_vec();
|
||
|
Ok((content_type, b))
|
||
|
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
|
||
|
}
|