import type { AsyncRes, Result } from "@sortug/lib"; import type { Meaning } from "./types"; export function buildWiktionaryURL(word: string) { const params = new URLSearchParams(); params.append("action", "parse"); params.append("page", word); params.append("format", "json"); params.append("prop", "templates|text"); params.append("formatversion", "2"); const p = params.toString(); const url = `https://en.wiktionary.org/w/api.php?${p}`; return url; } // export async function fetchWordInWiki(url: string) { // const opts = { method: "GET", body: null, headers: {} }; // try { // const res = await proxyCall(url, opts); // console.log(res.headers.get("content-type")); // const j = await res.json(); // return { ok: j }; // } catch (e) { // return { error: `${e}` }; // } // } export type WikiRes = { url: string; meanings: Meaning[]; ipa: string[]; }; const poses = [ "noun", "verb", "adjective", "adverb", "conjunction", "determiner", "preposition", "definitions", ]; export function parseWiktionary(html: string, url: string): Result { try { const dp = new DOMParser(); const doc = dp.parseFromString(html, "text/html"); const ipas = doc.querySelectorAll(".IPA"); const headings = doc.querySelectorAll(".mw-heading"); const ms: Meaning[] = []; const doneIdx: number[] = []; let currentRound: Meaning = { pos: "", meaning: [], etymology: "" }; for (let [idx, h] of Array.from(headings).entries()) { const headingType: string = (h.firstChild as any).innerText; if (!headingType) continue; const ht = headingType.toLowerCase(); if (ht.includes("etymology")) currentRound.etymology = fillEtym(h); else if (poses.includes(ht)) { currentRound.pos = ht; currentRound = fillMeaning(h, currentRound); } if (currentRound.pos) { ms.push({ ...currentRound }); currentRound = { pos: "", meaning: [], etymology: "" }; } if (ht === "references") break; // make sure it's one single language lol } const ipaStrings = Array.from(ipas).map((el: any) => el.innerText); return { ok: { meanings: ms, ipa: ipaStrings, url } }; } catch (e) { return { error: `${e}` }; } } function fillMeaning(el: Element, m: Meaning) { const sibling = el.nextElementSibling; if (!sibling) return m; if (sibling?.tagName.toLowerCase() === "ol") { for (let li of Array.from(sibling.children)) { if (li.tagName.toLowerCase() !== "li") continue; if (li.className.includes("empty-elt")) continue; m.meaning.push(li.innerHTML); } } if (m.meaning.length === 0) return fillMeaning(sibling, m); else return m; } function fillEtym(el: Element, acc: string = ""): string { const sibling = el.nextElementSibling; if (!sibling) return acc; if (sibling?.tagName.toLowerCase() === "p") acc += `\n${sibling.innerHTML}`; if (!acc) return fillEtym(sibling, acc); else return acc; } export function parseWiktionaryo(html: string, url: string): Result { try { const dp = new DOMParser(); const doc = dp.parseFromString(html, "text/html"); const ipas = doc.querySelectorAll(".IPA"); const ols = doc.querySelectorAll("ol"); const ms = Array.from(ols).map((el) => { let pos = ""; let etymology = ""; let meaning: string[] = []; let posr = findPos(el); if ("ok" in posr) pos = posr.ok; for (let li of Array.from(el.children)) { if (li.tagName !== "LI") continue; meaning.push((li as any).innerText); } return { pos, meaning, etymology }; }); console.log(ipas, "ipa strings"); console.log(ols, "lists in wiki"); const ipaStrings = Array.from(ipas).map((el: any) => el.innerText); return { ok: { meanings: ms, ipa: ipaStrings, url } }; } catch (e) { return { error: `${e}` }; } } function findPos(el: Element): Result { let pichai = el.previousElementSibling; console.log(pichai, "previous"); if (!pichai) return { error: "no pichai" }; if (pichai.classList.contains("mw-heading")) { const h4 = pichai.querySelector("h4"); const h3 = pichai.querySelector("h3"); if (!h4 && !h3) return findPos(pichai); else { const id = (h4?.innerText || h3?.innerText)!; return { ok: id }; } } else return findPos(pichai); }