summaryrefslogtreecommitdiff
path: root/packages/prosody-ui/src/logic
diff options
context:
space:
mode:
Diffstat (limited to 'packages/prosody-ui/src/logic')
-rw-r--r--packages/prosody-ui/src/logic/iso6393to1.ts186
-rw-r--r--packages/prosody-ui/src/logic/stanza.ts86
-rw-r--r--packages/prosody-ui/src/logic/types.ts48
-rw-r--r--packages/prosody-ui/src/logic/utils.ts66
-rw-r--r--packages/prosody-ui/src/logic/wiki.ts138
5 files changed, 524 insertions, 0 deletions
diff --git a/packages/prosody-ui/src/logic/iso6393to1.ts b/packages/prosody-ui/src/logic/iso6393to1.ts
new file mode 100644
index 0000000..4c4deed
--- /dev/null
+++ b/packages/prosody-ui/src/logic/iso6393to1.ts
@@ -0,0 +1,186 @@
+export const iso6393To1: Record<string, string> = {
+ aar: "aa",
+ abk: "ab",
+ afr: "af",
+ aka: "ak",
+ amh: "am",
+ ara: "ar",
+ arg: "an",
+ asm: "as",
+ ava: "av",
+ ave: "ae",
+ aym: "ay",
+ aze: "az",
+ bak: "ba",
+ bam: "bm",
+ bel: "be",
+ ben: "bn",
+ bis: "bi",
+ bod: "bo",
+ bos: "bs",
+ bre: "br",
+ bul: "bg",
+ cat: "ca",
+ ces: "cs",
+ cha: "ch",
+ che: "ce",
+ chu: "cu",
+ chv: "cv",
+ cor: "kw",
+ cos: "co",
+ cre: "cr",
+ cym: "cy",
+ dan: "da",
+ deu: "de",
+ div: "dv",
+ dzo: "dz",
+ ell: "el",
+ eng: "en",
+ epo: "eo",
+ est: "et",
+ eus: "eu",
+ ewe: "ee",
+ fao: "fo",
+ fas: "fa",
+ fij: "fj",
+ fin: "fi",
+ fra: "fr",
+ fry: "fy",
+ ful: "ff",
+ gla: "gd",
+ gle: "ga",
+ glg: "gl",
+ glv: "gv",
+ grn: "gn",
+ guj: "gu",
+ hat: "ht",
+ hau: "ha",
+ hbs: "sh",
+ heb: "he",
+ her: "hz",
+ hin: "hi",
+ hmo: "ho",
+ hrv: "hr",
+ hun: "hu",
+ hye: "hy",
+ ibo: "ig",
+ ido: "io",
+ iii: "ii",
+ iku: "iu",
+ ile: "ie",
+ ina: "ia",
+ ind: "id",
+ ipk: "ik",
+ isl: "is",
+ ita: "it",
+ jav: "jv",
+ jpn: "ja",
+ kal: "kl",
+ kan: "kn",
+ kas: "ks",
+ kat: "ka",
+ kau: "kr",
+ kaz: "kk",
+ khm: "km",
+ kik: "ki",
+ kin: "rw",
+ kir: "ky",
+ kom: "kv",
+ kon: "kg",
+ kor: "ko",
+ kua: "kj",
+ kur: "ku",
+ lao: "lo",
+ lat: "la",
+ lav: "lv",
+ lim: "li",
+ lin: "ln",
+ lit: "lt",
+ ltz: "lb",
+ lub: "lu",
+ lug: "lg",
+ mah: "mh",
+ mal: "ml",
+ mar: "mr",
+ mkd: "mk",
+ mlg: "mg",
+ mlt: "mt",
+ mon: "mn",
+ mri: "mi",
+ msa: "ms",
+ mya: "my",
+ nau: "na",
+ nav: "nv",
+ nbl: "nr",
+ nde: "nd",
+ ndo: "ng",
+ nep: "ne",
+ nld: "nl",
+ nno: "nn",
+ nob: "nb",
+ nor: "no",
+ nya: "ny",
+ oci: "oc",
+ oji: "oj",
+ ori: "or",
+ orm: "om",
+ oss: "os",
+ pan: "pa",
+ pli: "pi",
+ pol: "pl",
+ por: "pt",
+ pus: "ps",
+ que: "qu",
+ roh: "rm",
+ ron: "ro",
+ run: "rn",
+ rus: "ru",
+ sag: "sg",
+ san: "sa",
+ sin: "si",
+ slk: "sk",
+ slv: "sl",
+ sme: "se",
+ smo: "sm",
+ sna: "sn",
+ snd: "sd",
+ som: "so",
+ sot: "st",
+ spa: "es",
+ sqi: "sq",
+ srd: "sc",
+ srp: "sr",
+ ssw: "ss",
+ sun: "su",
+ swa: "sw",
+ swe: "sv",
+ tah: "ty",
+ tam: "ta",
+ tat: "tt",
+ tel: "te",
+ tgk: "tg",
+ tgl: "tl",
+ tha: "th",
+ tir: "ti",
+ ton: "to",
+ tsn: "tn",
+ tso: "ts",
+ tuk: "tk",
+ tur: "tr",
+ twi: "tw",
+ uig: "ug",
+ ukr: "uk",
+ urd: "ur",
+ uzb: "uz",
+ ven: "ve",
+ vie: "vi",
+ vol: "vo",
+ wln: "wa",
+ wol: "wo",
+ xho: "xh",
+ yid: "yi",
+ yor: "yo",
+ zha: "za",
+ zho: "zh",
+ zul: "zu",
+};
diff --git a/packages/prosody-ui/src/logic/stanza.ts b/packages/prosody-ui/src/logic/stanza.ts
new file mode 100644
index 0000000..9e59450
--- /dev/null
+++ b/packages/prosody-ui/src/logic/stanza.ts
@@ -0,0 +1,86 @@
+import type { AsyncRes, Result } from "sortug";
+
+const ENDPOINT = "http://localhost:8102";
+export async function segmenter(text: string, lang: string) {
+ try {
+ const body = JSON.stringify({ lang, string: text });
+ const opts = {
+ headers: { "Content-type": "application/json" },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/segment", opts);
+ console.log("stanza", res);
+ const j = await res.json();
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+export async function idLang(text: string) {
+ try {
+ const body = JSON.stringify({ string: text });
+ const opts = {
+ headers: { "Content-type": "application/json" },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/detect-lang", opts);
+ const j = await res.json();
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+export type Sentence = {
+ text: string;
+ sentiment: number;
+ constituency: string;
+ dependencies: Dependency[];
+ entities: Entity[];
+ tokens: Token[];
+ words: Word[];
+};
+export type Dependency = Array<[Word, string, Word]>;
+export type Word = {
+ id: number;
+ text: string;
+ lemma: string;
+ upos: string;
+ xpos: string;
+ feats: string;
+ head: number;
+ deprel: string;
+ start_char: number;
+ end_char: number;
+};
+export type Token = {
+ id: [number, number];
+ text: string;
+ misc: string;
+ words: Word[];
+ start_char: number;
+ end_char: number;
+ ner: string;
+};
+export type Entity = {
+ text: string;
+ misc: string;
+ start_char: number;
+ end_char: number;
+ type: string;
+};
+// "amod",
+// {
+// "id": 1,
+// "text": "Stony",
+// "lemma": "Stony",
+// "upos": "ADJ",
+// "xpos": "NNP",
+// "feats": "Degree=Pos",
+// "head": 3,
+// "deprel": "amod",
+// "start_char": 0,
+// "end_char": 5
+// }
diff --git a/packages/prosody-ui/src/logic/types.ts b/packages/prosody-ui/src/logic/types.ts
new file mode 100644
index 0000000..ac308cf
--- /dev/null
+++ b/packages/prosody-ui/src/logic/types.ts
@@ -0,0 +1,48 @@
+export type Cookie = {
+ domain: string;
+ path: string;
+ hostOnly: boolean;
+ httpOnly: boolean;
+ secure: boolean;
+ session: boolean;
+ sameSite: SameSite;
+ storeId: null;
+ name: string;
+ value: string;
+};
+export type CookiesMap = Record<string, CookieMap>;
+export type CookieMap = Record<string, Cookie>;
+export type KeyMap = Record<string, string>;
+type SameSite = null | "no_restriction"; // TODO
+
+export type APIRes = { API: { app: string; api_key: string } };
+export type CookieRes = { Cookie: { app: string; cookie: CookieMap } };
+export type CookiesRes = { cookies: CookiesMap; apiKeys: KeyMap };
+
+// words
+export type Meaning = {
+ pos: string; // part of speech;
+ meaning: string[];
+ etymology: string;
+ references?: any;
+};
+
+export type Prompts = {
+ translate: string;
+};
+export type AnalyzeRes = {
+ word: string;
+ syllables: string[];
+ ipa: string;
+ pos: POS;
+};
+type PosTuple = [string, POS];
+type POS = string;
+
+export type WordData = {
+ spelling: string;
+ lang: string;
+ ipa: string;
+ meanings: Meaning[];
+ references?: any;
+};
diff --git a/packages/prosody-ui/src/logic/utils.ts b/packages/prosody-ui/src/logic/utils.ts
new file mode 100644
index 0000000..737a6ec
--- /dev/null
+++ b/packages/prosody-ui/src/logic/utils.ts
@@ -0,0 +1,66 @@
+import type { Result } from "sortug";
+
+export function detectScript(text: string): Result<string> {
+ const scripts = {
+ Latin: /[\u0000-\u007F\u00A0-\u00FF\u0100-\u017F\u0180-\u024F]/g,
+ Cyrillic: /[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]/g,
+ Greek: /[\u0370-\u03FF\u1F00-\u1FFF]/g,
+ Hebrew: /[\u0590-\u05FF]/g,
+ Arabic: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]/g,
+ Devanagari: /[\u0900-\u097F]/g, // Hindi, Sanskrit, etc.
+ Bengali: /[\u0980-\u09FF]/g,
+ Thai: /[\u0E00-\u0E7F]/g,
+ Chinese:
+ /[\u4E00-\u9FFF\u3400-\u4DBF\u20000-\u2A6DF\u2A700-\u2B73F\u2B740-\u2B81F]/g,
+ Japanese: /[\u3040-\u309F\u30A0-\u30FF\uFF00-\uFFEF\u4E00-\u9FAF]/g, // Includes Hiragana, Katakana
+ Korean: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/g, // Includes Hangul
+ Armenian: /[\u0530-\u058F]/g,
+ Georgian: /[\u10A0-\u10FF]/g,
+ Khmer: /[\u1780-\u17FF]/g, // Cambodian
+ Myanmar: /[\u1000-\u109F]/g, // Burmese
+ Tamil: /[\u0B80-\u0BFF]/g,
+ Telugu: /[\u0C00-\u0C7F]/g,
+ Amharic: /[\u1200-\u137F]/g, // Ethiopian
+ };
+ const counts: Record<string, number> = {};
+
+ for (const [scriptName, regex] of Object.entries(scripts)) {
+ // Create an array of matches and count its length
+ const matches = text.match(regex) || [];
+ counts[scriptName] = matches.length;
+ }
+
+ let maxCount = 0;
+ let dominantScript = "Unknown";
+
+ for (const [scriptName, count] of Object.entries(counts)) {
+ if (count > maxCount) {
+ maxCount = count;
+ dominantScript = scriptName;
+ }
+ }
+ if (dominantScript === "Unknown") return { error: "Not detected" };
+ else return { ok: dominantScript };
+}
+
+export function langFromScript(script: string): Result<string> {
+ if (script === "Thai") return { ok: "th" };
+ if (script === "Japanese") return { ok: "ja" };
+ if (script === "Chinese") return { ok: "zh" };
+ if (script === "Korean") return { ok: "ko" };
+ else return { error: "too generic" };
+}
+export function scriptFromLang(lang: string, text: string): string {
+ if (lang == "th") return "Thai";
+ if (lang == "tha") return "Thai";
+ if (lang == "en") return "Engl";
+ if (lang == "es") return "Span";
+ if (lang == "cn") return "Hant";
+ if (lang == "zh") return "Hant";
+ if (lang == "ja") return "Japn";
+ else {
+ const res = detectScript(text);
+ if ("ok" in res) return res.ok;
+ else return "";
+ }
+}
diff --git a/packages/prosody-ui/src/logic/wiki.ts b/packages/prosody-ui/src/logic/wiki.ts
new file mode 100644
index 0000000..1325c0f
--- /dev/null
+++ b/packages/prosody-ui/src/logic/wiki.ts
@@ -0,0 +1,138 @@
+import type { AsyncRes, Result } from "sortug";
+import type { Meaning } from "./types";
+
+export function buildWiktionaryURL(word: string) {
+ const params = new URLSearchParams();
+ params.append("action", "parse");
+ params.append("page", word);
+ params.append("format", "json");
+ params.append("prop", "templates|text");
+ params.append("formatversion", "2");
+
+ const p = params.toString();
+ const url = `https://en.wiktionary.org/w/api.php?${p}`;
+ return url;
+}
+
+// export async function fetchWordInWiki(url: string) {
+// const opts = { method: "GET", body: null, headers: {} };
+// try {
+// const res = await proxyCall(url, opts);
+// console.log(res.headers.get("content-type"));
+// const j = await res.json();
+// return { ok: j };
+// } catch (e) {
+// return { error: `${e}` };
+// }
+// }
+
+export type WikiRes = {
+ url: string;
+ meanings: Meaning[];
+ ipa: string[];
+};
+const poses = [
+ "noun",
+ "verb",
+ "adjective",
+ "adverb",
+ "conjunction",
+ "determiner",
+ "preposition",
+ "definitions",
+];
+
+export function parseWiktionary(html: string, url: string): Result<WikiRes> {
+ try {
+ const dp = new DOMParser();
+ const doc = dp.parseFromString(html, "text/html");
+ const ipas = doc.querySelectorAll(".IPA");
+ const headings = doc.querySelectorAll(".mw-heading");
+ const ms: Meaning[] = [];
+ const doneIdx: number[] = [];
+ let currentRound: Meaning = { pos: "", meaning: [], etymology: "" };
+ for (let [idx, h] of Array.from(headings).entries()) {
+ const headingType: string = (h.firstChild as any).innerText;
+ if (!headingType) continue;
+ const ht = headingType.toLowerCase();
+ if (ht.includes("etymology")) currentRound.etymology = fillEtym(h);
+ else if (poses.includes(ht)) {
+ currentRound.pos = ht;
+ currentRound = fillMeaning(h, currentRound);
+ }
+ if (currentRound.pos) {
+ ms.push({ ...currentRound });
+ currentRound = { pos: "", meaning: [], etymology: "" };
+ }
+ if (ht === "references") break; // make sure it's one single language lol
+ }
+ const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
+ return { ok: { meanings: ms, ipa: ipaStrings, url } };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+function fillMeaning(el: Element, m: Meaning) {
+ const sibling = el.nextElementSibling;
+ if (!sibling) return m;
+ if (sibling?.tagName.toLowerCase() === "ol") {
+ for (let li of Array.from(sibling.children)) {
+ if (li.tagName.toLowerCase() !== "li") continue;
+ if (li.className.includes("empty-elt")) continue;
+ m.meaning.push(li.innerHTML);
+ }
+ }
+ if (m.meaning.length === 0) return fillMeaning(sibling, m);
+ else return m;
+}
+
+function fillEtym(el: Element, acc: string = ""): string {
+ const sibling = el.nextElementSibling;
+ if (!sibling) return acc;
+ if (sibling?.tagName.toLowerCase() === "p") acc += `\n${sibling.innerHTML}`;
+ if (!acc) return fillEtym(sibling, acc);
+ else return acc;
+}
+
+export function parseWiktionaryo(html: string, url: string): Result<WikiRes> {
+ try {
+ const dp = new DOMParser();
+ const doc = dp.parseFromString(html, "text/html");
+ const ipas = doc.querySelectorAll(".IPA");
+ const ols = doc.querySelectorAll("ol");
+ const ms = Array.from(ols).map((el) => {
+ let pos = "";
+ let etymology = "";
+ let meaning: string[] = [];
+ let posr = findPos(el);
+ if ("ok" in posr) pos = posr.ok;
+ for (let li of Array.from(el.children)) {
+ if (li.tagName !== "LI") continue;
+ meaning.push((li as any).innerText);
+ }
+ return { pos, meaning, etymology };
+ });
+ console.log(ipas, "ipa strings");
+ console.log(ols, "lists in wiki");
+ const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
+ return { ok: { meanings: ms, ipa: ipaStrings, url } };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+function findPos(el: Element): Result<string> {
+ let pichai = el.previousElementSibling;
+ console.log(pichai, "previous");
+ if (!pichai) return { error: "no pichai" };
+ if (pichai.classList.contains("mw-heading")) {
+ const h4 = pichai.querySelector("h4");
+ const h3 = pichai.querySelector("h3");
+ if (!h4 && !h3) return findPos(pichai);
+ else {
+ const id = (h4?.innerText || h3?.innerText)!;
+ return { ok: id };
+ }
+ } else return findPos(pichai);
+}