summaryrefslogtreecommitdiff
path: root/packages/prosody-ui/src/logic/wiki.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/prosody-ui/src/logic/wiki.ts')
-rw-r--r--packages/prosody-ui/src/logic/wiki.ts138
1 files changed, 138 insertions, 0 deletions
diff --git a/packages/prosody-ui/src/logic/wiki.ts b/packages/prosody-ui/src/logic/wiki.ts
new file mode 100644
index 0000000..1325c0f
--- /dev/null
+++ b/packages/prosody-ui/src/logic/wiki.ts
@@ -0,0 +1,138 @@
+import type { AsyncRes, Result } from "sortug";
+import type { Meaning } from "./types";
+
+export function buildWiktionaryURL(word: string) {
+ const params = new URLSearchParams();
+ params.append("action", "parse");
+ params.append("page", word);
+ params.append("format", "json");
+ params.append("prop", "templates|text");
+ params.append("formatversion", "2");
+
+ const p = params.toString();
+ const url = `https://en.wiktionary.org/w/api.php?${p}`;
+ return url;
+}
+
+// export async function fetchWordInWiki(url: string) {
+// const opts = { method: "GET", body: null, headers: {} };
+// try {
+// const res = await proxyCall(url, opts);
+// console.log(res.headers.get("content-type"));
+// const j = await res.json();
+// return { ok: j };
+// } catch (e) {
+// return { error: `${e}` };
+// }
+// }
+
+export type WikiRes = {
+ url: string;
+ meanings: Meaning[];
+ ipa: string[];
+};
+const poses = [
+ "noun",
+ "verb",
+ "adjective",
+ "adverb",
+ "conjunction",
+ "determiner",
+ "preposition",
+ "definitions",
+];
+
+export function parseWiktionary(html: string, url: string): Result<WikiRes> {
+ try {
+ const dp = new DOMParser();
+ const doc = dp.parseFromString(html, "text/html");
+ const ipas = doc.querySelectorAll(".IPA");
+ const headings = doc.querySelectorAll(".mw-heading");
+ const ms: Meaning[] = [];
+ const doneIdx: number[] = [];
+ let currentRound: Meaning = { pos: "", meaning: [], etymology: "" };
+ for (let [idx, h] of Array.from(headings).entries()) {
+ const headingType: string = (h.firstChild as any).innerText;
+ if (!headingType) continue;
+ const ht = headingType.toLowerCase();
+ if (ht.includes("etymology")) currentRound.etymology = fillEtym(h);
+ else if (poses.includes(ht)) {
+ currentRound.pos = ht;
+ currentRound = fillMeaning(h, currentRound);
+ }
+ if (currentRound.pos) {
+ ms.push({ ...currentRound });
+ currentRound = { pos: "", meaning: [], etymology: "" };
+ }
+ if (ht === "references") break; // make sure it's one single language lol
+ }
+ const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
+ return { ok: { meanings: ms, ipa: ipaStrings, url } };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+function fillMeaning(el: Element, m: Meaning) {
+ const sibling = el.nextElementSibling;
+ if (!sibling) return m;
+ if (sibling?.tagName.toLowerCase() === "ol") {
+ for (let li of Array.from(sibling.children)) {
+ if (li.tagName.toLowerCase() !== "li") continue;
+ if (li.className.includes("empty-elt")) continue;
+ m.meaning.push(li.innerHTML);
+ }
+ }
+ if (m.meaning.length === 0) return fillMeaning(sibling, m);
+ else return m;
+}
+
+function fillEtym(el: Element, acc: string = ""): string {
+ const sibling = el.nextElementSibling;
+ if (!sibling) return acc;
+ if (sibling?.tagName.toLowerCase() === "p") acc += `\n${sibling.innerHTML}`;
+ if (!acc) return fillEtym(sibling, acc);
+ else return acc;
+}
+
+export function parseWiktionaryo(html: string, url: string): Result<WikiRes> {
+ try {
+ const dp = new DOMParser();
+ const doc = dp.parseFromString(html, "text/html");
+ const ipas = doc.querySelectorAll(".IPA");
+ const ols = doc.querySelectorAll("ol");
+ const ms = Array.from(ols).map((el) => {
+ let pos = "";
+ let etymology = "";
+ let meaning: string[] = [];
+ let posr = findPos(el);
+ if ("ok" in posr) pos = posr.ok;
+ for (let li of Array.from(el.children)) {
+ if (li.tagName !== "LI") continue;
+ meaning.push((li as any).innerText);
+ }
+ return { pos, meaning, etymology };
+ });
+ console.log(ipas, "ipa strings");
+ console.log(ols, "lists in wiki");
+ const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
+ return { ok: { meanings: ms, ipa: ipaStrings, url } };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+function findPos(el: Element): Result<string> {
+ let pichai = el.previousElementSibling;
+ console.log(pichai, "previous");
+ if (!pichai) return { error: "no pichai" };
+ if (pichai.classList.contains("mw-heading")) {
+ const h4 = pichai.querySelector("h4");
+ const h3 = pichai.querySelector("h3");
+ if (!h4 && !h3) return findPos(pichai);
+ else {
+ const id = (h4?.innerText || h3?.innerText)!;
+ return { ok: id };
+ }
+ } else return findPos(pichai);
+}