235 lines
7.5 KiB
Rust

use std::collections::HashMap;
use hyperware_process_lib::http::server::IncomingHttpRequest;
use hyperware_process_lib::kiprintln;
use lol_html::html_content::ContentType;
use lol_html::{element, text, HtmlRewriter, Settings};
use regex::Regex;
use url::Url;
use hyperware_process_lib::{
get_blob, http::client::send_request_await_response, http::server::send_response,
};
fn replace_domain(original_url: &Url, new_domain: &str) -> anyhow::Result<Url> {
let mut new_url = Url::parse(new_domain)?;
new_url.set_path(original_url.path());
Ok(new_url)
}
fn split_first_path_segment(url: &Url) -> Result<(String, Url), url::ParseError> {
let mut new_url = url.clone();
// Get the first segment
let first_segment = url
.path_segments()
.and_then(|mut segments| segments.next().map(|s| s.to_string()))
.unwrap_or_default();
// Collect remaining segments
let segments: Vec<_> = url
.path_segments()
.map(|segments| segments.skip(1).collect::<Vec<_>>())
.unwrap_or_default();
// Create new path from remaining segments
let new_path = if segments.is_empty() {
"/"
} else {
&format!("/{}", segments.join("/"))
};
new_url.set_path(new_path);
Ok((first_segment, new_url))
}
fn replace_files(input: &str, output: &str) -> anyhow::Result<String> {
// TODO single quotes?
let file_ext_regex =
Regex::new(r#"\\\"\/[^"]*\.(css|js|ttf|woff2|ico|png|svg|jpg|jpeg|webp|html)[^"]*\\\""#)?;
let replaced = file_ext_regex
.replace_all(input, |caps: &regex::Captures| {
let capture = caps[0].to_string();
let quoteless = capture.replace(r#"\""#, "");
let news = format!(r#"\"/{}{}\""#, output, quoteless);
news
})
.to_string();
Ok(replaced)
}
fn replace_urls_css(input: &str, output: &str) -> anyhow::Result<String> {
// TODO single quotes?
let file_ext_regex = Regex::new(r#"url\((\/[^)]+)\)"#)?;
let replaced = file_ext_regex
.replace_all(input, |caps: &regex::Captures| {
let capture = caps[1].to_string();
let news = format!(r#"url(/{}{})"#, output, capture);
news
})
.to_string();
Ok(replaced)
}
fn window_shenanigans(s: &str) -> anyhow::Result<()> {
let rx = Regex::new(r#"window.location.(href|replace)[^)]+\)"#)?;
let mut count = 0;
for _mtch in rx.find_iter(s) {
count = count + 1;
}
if count > 1 {
kiprintln!("{} matches", count);
};
Ok(())
}
fn replace_urls_js(input: &str, output: &str) -> anyhow::Result<String> {
window_shenanigans(input);
// let file_ext_regex = Regex::new(r#"http"#)?;
// let replaced = file_ext_regex
// .replace_all(input, |caps: &regex::Captures| {
// let capture = caps[0].to_string();
// let news = format!(r#"/{}{})"#, output, capture);
// kiprintln!("js\n{}\n{}\njs", capture, news);
// news
// })
// .to_string();
// Ok(replaced)
Ok(input.to_owned())
}
fn modify_html(html_bytes: &[u8], prefix: &str) -> anyhow::Result<Vec<u8>> {
// Ensure prefix is clean (no leading/trailing slashes for consistency)
let prefix = prefix.trim_matches('/');
// List of attributes that can contain URLs
let url_attributes = vec![
"href",
"src",
"action",
"background",
"cite",
"data",
"icon",
"longdesc",
"manifest",
"poster",
"profile",
"usemap",
"classid",
"codebase",
"archive",
"code",
];
//
// Build a selector for elements with any of these attributes
let selector: String = url_attributes
.iter()
.map(|attr| format!("[{}]", attr))
.collect::<Vec<String>>()
.join(",");
// Output buffer for the rewritten HTML
let mut output = Vec::new();
// Create an HTML rewriter with element content handlers
let mut rewriter = HtmlRewriter::new(
Settings {
element_content_handlers: vec![
// Handler for elements with URL attributes
element!("head", move |el| {
el.prepend(&mother_script(prefix), ContentType::Html);
Ok(())
}),
element!(selector, move |el| {
for attr in &url_attributes {
if let Some(value) = el.get_attribute(attr) {
if value.starts_with('/') {
let new_value =
format!(r#"/{}/{}"#, prefix, value.trim_start_matches('/'));
el.set_attribute(attr, &new_value)?;
}
}
}
Ok(())
}),
text!("script", |el| {
let text_content = el.as_str();
// window_shenanigans(text_content);
let replaced = replace_files(text_content, prefix)?;
el.replace(&replaced, ContentType::Text);
Ok(())
}),
],
..Settings::default()
},
|c: &[u8]| output.extend_from_slice(c),
);
// Write the input HTML to the rewriter and finalize
rewriter.write(html_bytes)?;
rewriter.end()?;
Ok(output)
}
pub fn run_proxy(
request: &IncomingHttpRequest,
web2_url: &str,
cookie: &str,
) -> anyhow::Result<()> {
let blob = get_blob().unwrap();
let body = blob.bytes().to_vec();
let url = replace_domain(&request.url()?, web2_url)?;
let (first_path_segment, url) = split_first_path_segment(&url)?;
let mut headers = HashMap::new();
headers.insert("Cookie".to_string(), cookie.to_string());
kiprintln!("fetching\n{}", url.to_string());
let response = send_request_await_response(request.method()?, url, Some(headers), 6000, body)?;
let resheaders = response.headers();
// DEVS: choose which headers are necessary for the hyperware client
// don't put them all, that doesn't work
let content_type = resheaders.get("content-type").unwrap().to_str()?;
let mime_regex = Regex::new(";.*")?;
let mime = mime_regex.replace_all(content_type, "").to_string();
// kiprintln!("response headers from proxy {:#?}\n{}", resheaders, mime);
let mut headers = HashMap::new();
headers.insert("Content-type".to_string(), content_type.to_owned());
let body = match mime.as_str() {
"text/html" => {
let html = modify_html(response.body(), &first_path_segment)?;
html
}
"text/css" => {
let text = String::from_utf8_lossy(response.body()).to_string();
let replaced = replace_urls_css(&text, &first_path_segment)?;
replaced.as_bytes().to_vec()
}
"application/javascript" => {
let text = String::from_utf8_lossy(response.body()).to_string();
let replaced = replace_urls_js(&text, &first_path_segment)?;
replaced.as_bytes().to_vec()
}
_ => response.body().to_vec(),
};
// let body = modify_html(response.body(), &first_path_segment)?;
send_response(response.status(), Some(headers), body);
Ok(())
}
fn mother_script(prefix: &str) -> String {
let script_text = format!(
r#"
<script>
const HYPERWARE_APP_PATH = '{0}';
</script>
"#,
prefix
);
script_text
}