twittok/proxy/src/proxy.rs

93 lines
3.0 KiB
Rust
Raw Normal View History

2024-10-22 13:27:48 +00:00
use std::io::BufRead;
use anyhow::{anyhow, Result};
use kinode_process_lib::{
http::{client::send_request_await_response, Method},
println,
};
use mime::Mime;
use serde::{Deserialize, Serialize};
use url::Url;
use crate::UIReq;
#[derive(Deserialize, Serialize, Debug)]
pub enum ScrapeRes {
Image(String),
HTML(String),
}
pub fn scrape(url: &str) -> Result<ScrapeRes> {
let url = Url::parse(url)?;
let mut headers = std::collections::HashMap::new();
headers.insert(
"User-Agent".to_string(),
"facebookexternalhit/1.1".to_string(),
// "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
);
let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?;
let h = res.headers().get("content-type");
match h {
None => {
let b = res.body().to_vec();
let text = String::from_utf8(b)?;
Ok(ScrapeRes::HTML(text))
}
Some(val) => {
let str = val.to_str()?;
if str.starts_with("image") {
Ok(ScrapeRes::Image(url.to_string()))
} else {
let b = res.body().to_vec();
let text = String::from_utf8(b)?;
Ok(ScrapeRes::HTML(text))
}
}
}
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
}
pub fn proxy(url: &str) -> Result<Vec<u8>> {
let url = Url::parse(url)?;
let mut headers = std::collections::HashMap::new();
headers.insert(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
);
let res = send_request_await_response(Method::GET, url.clone(), Some(headers), 5000, vec![])?;
let b = res.body().to_vec();
Ok(b)
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
}
pub fn run(req: UIReq) -> Result<(Mime, Vec<u8>)> {
let url = Url::parse(&req.url)?;
let body: Vec<u8> = match req.body {
None => vec![],
Some(s) => s.as_bytes().to_vec(),
};
let mut headers = req.headers.clone();
headers.insert(
"User-Agent".to_string(),
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36".to_string(),
);
// println!("running req {:?} {:?}", url, headers);
let res = send_request_await_response(req.method, url.clone(), Some(req.headers), 5000, body)?;
let h = res.headers();
println!("res headers {:?}", h);
let content_type: Mime = res
.headers()
.get("content-type")
.and_then(|ct| ct.to_str().ok())
.and_then(|ct| ct.parse::<Mime>().ok())
.ok_or_else(|| anyhow!("invalid content type"))?;
println!(
"fucking mime {:?} {:?}",
content_type.type_(),
content_type.subtype()
);
let b = res.body().to_vec();
Ok((content_type, b))
// let body = get_blob().ok_or(anyhow::anyhow!("no blob"))?;
}