summaryrefslogtreecommitdiff
path: root/packages/prosody-ui/src/logic/wiki.ts
blob: 1325c0fe1772f88ea8beeebf11df92db6a8bedf4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import type { AsyncRes, Result } from "sortug";
import type { Meaning } from "./types";

export function buildWiktionaryURL(word: string) {
  const params = new URLSearchParams();
  params.append("action", "parse");
  params.append("page", word);
  params.append("format", "json");
  params.append("prop", "templates|text");
  params.append("formatversion", "2");

  const p = params.toString();
  const url = `https://en.wiktionary.org/w/api.php?${p}`;
  return url;
}

// export async function fetchWordInWiki(url: string) {
//   const opts = { method: "GET", body: null, headers: {} };
//   try {
//     const res = await proxyCall(url, opts);
//     console.log(res.headers.get("content-type"));
//     const j = await res.json();
//     return { ok: j };
//   } catch (e) {
//     return { error: `${e}` };
//   }
// }

export type WikiRes = {
  url: string;
  meanings: Meaning[];
  ipa: string[];
};
const poses = [
  "noun",
  "verb",
  "adjective",
  "adverb",
  "conjunction",
  "determiner",
  "preposition",
  "definitions",
];

export function parseWiktionary(html: string, url: string): Result<WikiRes> {
  try {
    const dp = new DOMParser();
    const doc = dp.parseFromString(html, "text/html");
    const ipas = doc.querySelectorAll(".IPA");
    const headings = doc.querySelectorAll(".mw-heading");
    const ms: Meaning[] = [];
    const doneIdx: number[] = [];
    let currentRound: Meaning = { pos: "", meaning: [], etymology: "" };
    for (let [idx, h] of Array.from(headings).entries()) {
      const headingType: string = (h.firstChild as any).innerText;
      if (!headingType) continue;
      const ht = headingType.toLowerCase();
      if (ht.includes("etymology")) currentRound.etymology = fillEtym(h);
      else if (poses.includes(ht)) {
        currentRound.pos = ht;
        currentRound = fillMeaning(h, currentRound);
      }
      if (currentRound.pos) {
        ms.push({ ...currentRound });
        currentRound = { pos: "", meaning: [], etymology: "" };
      }
      if (ht === "references") break; // make sure it's one single language lol
    }
    const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
    return { ok: { meanings: ms, ipa: ipaStrings, url } };
  } catch (e) {
    return { error: `${e}` };
  }
}

function fillMeaning(el: Element, m: Meaning) {
  const sibling = el.nextElementSibling;
  if (!sibling) return m;
  if (sibling?.tagName.toLowerCase() === "ol") {
    for (let li of Array.from(sibling.children)) {
      if (li.tagName.toLowerCase() !== "li") continue;
      if (li.className.includes("empty-elt")) continue;
      m.meaning.push(li.innerHTML);
    }
  }
  if (m.meaning.length === 0) return fillMeaning(sibling, m);
  else return m;
}

function fillEtym(el: Element, acc: string = ""): string {
  const sibling = el.nextElementSibling;
  if (!sibling) return acc;
  if (sibling?.tagName.toLowerCase() === "p") acc += `\n${sibling.innerHTML}`;
  if (!acc) return fillEtym(sibling, acc);
  else return acc;
}

export function parseWiktionaryo(html: string, url: string): Result<WikiRes> {
  try {
    const dp = new DOMParser();
    const doc = dp.parseFromString(html, "text/html");
    const ipas = doc.querySelectorAll(".IPA");
    const ols = doc.querySelectorAll("ol");
    const ms = Array.from(ols).map((el) => {
      let pos = "";
      let etymology = "";
      let meaning: string[] = [];
      let posr = findPos(el);
      if ("ok" in posr) pos = posr.ok;
      for (let li of Array.from(el.children)) {
        if (li.tagName !== "LI") continue;
        meaning.push((li as any).innerText);
      }
      return { pos, meaning, etymology };
    });
    console.log(ipas, "ipa strings");
    console.log(ols, "lists in wiki");
    const ipaStrings = Array.from(ipas).map((el: any) => el.innerText);
    return { ok: { meanings: ms, ipa: ipaStrings, url } };
  } catch (e) {
    return { error: `${e}` };
  }
}

function findPos(el: Element): Result<string> {
  let pichai = el.previousElementSibling;
  console.log(pichai, "previous");
  if (!pichai) return { error: "no pichai" };
  if (pichai.classList.contains("mw-heading")) {
    const h4 = pichai.querySelector("h4");
    const h3 = pichai.querySelector("h3");
    if (!h4 && !h3) return findPos(pichai);
    else {
      const id = (h4?.innerText || h3?.innerText)!;
      return { ok: id };
    }
  } else return findPos(pichai);
}