diff options
Diffstat (limited to 'packages/prosody-ui/src/logic/wiki.ts')
| -rw-r--r-- | packages/prosody-ui/src/logic/wiki.ts | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/packages/prosody-ui/src/logic/wiki.ts b/packages/prosody-ui/src/logic/wiki.ts new file mode 100644 index 0000000..1325c0f --- /dev/null +++ b/packages/prosody-ui/src/logic/wiki.ts @@ -0,0 +1,138 @@ +import type { AsyncRes, Result } from "sortug"; +import type { Meaning } from "./types"; + +export function buildWiktionaryURL(word: string) { + const params = new URLSearchParams(); + params.append("action", "parse"); + params.append("page", word); + params.append("format", "json"); + params.append("prop", "templates|text"); + params.append("formatversion", "2"); + + const p = params.toString(); + const url = `https://en.wiktionary.org/w/api.php?${p}`; + return url; +} + +// export async function fetchWordInWiki(url: string) { +// const opts = { method: "GET", body: null, headers: {} }; +// try { +// const res = await proxyCall(url, opts); +// console.log(res.headers.get("content-type")); +// const j = await res.json(); +// return { ok: j }; +// } catch (e) { +// return { error: `${e}` }; +// } +// } + +export type WikiRes = { + url: string; + meanings: Meaning[]; + ipa: string[]; +}; +const poses = [ + "noun", + "verb", + "adjective", + "adverb", + "conjunction", + "determiner", + "preposition", + "definitions", +]; + +export function parseWiktionary(html: string, url: string): Result<WikiRes> { + try { + const dp = new DOMParser(); + const doc = dp.parseFromString(html, "text/html"); + const ipas = doc.querySelectorAll(".IPA"); + const headings = doc.querySelectorAll(".mw-heading"); + const ms: Meaning[] = []; + const doneIdx: number[] = []; + let currentRound: Meaning = { pos: "", meaning: [], etymology: "" }; + for (let [idx, h] of Array.from(headings).entries()) { + const headingType: string = (h.firstChild as any).innerText; + if (!headingType) continue; + const ht = headingType.toLowerCase(); + if (ht.includes("etymology")) currentRound.etymology = fillEtym(h); + else if (poses.includes(ht)) { + currentRound.pos = ht; + currentRound = fillMeaning(h, currentRound); + } + if (currentRound.pos) { + ms.push({ ...currentRound }); + currentRound = { pos: "", meaning: [], etymology: "" }; + } + if (ht === "references") break; // make sure it's one single language lol + } + const ipaStrings = Array.from(ipas).map((el: any) => el.innerText); + return { ok: { meanings: ms, ipa: ipaStrings, url } }; + } catch (e) { + return { error: `${e}` }; + } +} + +function fillMeaning(el: Element, m: Meaning) { + const sibling = el.nextElementSibling; + if (!sibling) return m; + if (sibling?.tagName.toLowerCase() === "ol") { + for (let li of Array.from(sibling.children)) { + if (li.tagName.toLowerCase() !== "li") continue; + if (li.className.includes("empty-elt")) continue; + m.meaning.push(li.innerHTML); + } + } + if (m.meaning.length === 0) return fillMeaning(sibling, m); + else return m; +} + +function fillEtym(el: Element, acc: string = ""): string { + const sibling = el.nextElementSibling; + if (!sibling) return acc; + if (sibling?.tagName.toLowerCase() === "p") acc += `\n${sibling.innerHTML}`; + if (!acc) return fillEtym(sibling, acc); + else return acc; +} + +export function parseWiktionaryo(html: string, url: string): Result<WikiRes> { + try { + const dp = new DOMParser(); + const doc = dp.parseFromString(html, "text/html"); + const ipas = doc.querySelectorAll(".IPA"); + const ols = doc.querySelectorAll("ol"); + const ms = Array.from(ols).map((el) => { + let pos = ""; + let etymology = ""; + let meaning: string[] = []; + let posr = findPos(el); + if ("ok" in posr) pos = posr.ok; + for (let li of Array.from(el.children)) { + if (li.tagName !== "LI") continue; + meaning.push((li as any).innerText); + } + return { pos, meaning, etymology }; + }); + console.log(ipas, "ipa strings"); + console.log(ols, "lists in wiki"); + const ipaStrings = Array.from(ipas).map((el: any) => el.innerText); + return { ok: { meanings: ms, ipa: ipaStrings, url } }; + } catch (e) { + return { error: `${e}` }; + } +} + +function findPos(el: Element): Result<string> { + let pichai = el.previousElementSibling; + console.log(pichai, "previous"); + if (!pichai) return { error: "no pichai" }; + if (pichai.classList.contains("mw-heading")) { + const h4 = pichai.querySelector("h4"); + const h3 = pichai.querySelector("h3"); + if (!h4 && !h3) return findPos(pichai); + else { + const id = (h4?.innerText || h3?.innerText)!; + return { ok: id }; + } + } else return findPos(pichai); +} |
