summaryrefslogtreecommitdiff
path: root/src/lib/db/thaiseed.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/db/thaiseed.ts')
-rw-r--r--src/lib/db/thaiseed.ts87
1 files changed, 64 insertions, 23 deletions
diff --git a/src/lib/db/thaiseed.ts b/src/lib/db/thaiseed.ts
index 5c75345..6c69d9c 100644
--- a/src/lib/db/thaiseed.ts
+++ b/src/lib/db/thaiseed.ts
@@ -12,6 +12,7 @@ import pdb from "./prosodydb";
import { cleanIpa } from "../utils";
import { handleFile } from "./utils";
import { Tone } from "../types/phonetics";
+import { AsyncRes } from "../types";
async function readDump(lang: string) {
await pdb.init();
@@ -30,38 +31,77 @@ async function readDump(lang: string) {
const j = JSON.parse(langrow.data);
const word = j.word.trim();
if (!word) continue;
- if (word.includes("ๆ")) await handleWord(word, j);
- else {
+
+ if (word.includes("ๆ")) {
+ const res = await handleWord(word, j);
+ if ("error" in res) {
+ if (res.error.includes("meh")) continue;
+ if (res.error.includes("wtf")) {
+ console.error(res.error);
+ console.error(j.sounds);
+ }
+ break;
+ }
+ } else {
const split = word.split(" ");
- if (split.length > 1) await handleIdiom(word);
- else await handleWord(word, j);
+ if (split.length > 1) {
+ const res = await handleIdiom(word);
+ if ("error" in res) {
+ console.error(res.error);
+ break;
+ }
+ } else {
+ const res = await handleWord(word, j);
+ if ("error" in res) {
+ if (res.error.includes("meh")) continue;
+ if (res.error.includes("wtf")) {
+ console.error(res.error);
+ console.error(j.sounds);
+ }
+ // break;
+ }
+ }
}
}
}
-async function handleWord(word: string, j: any) {
+async function handleWord(word: string, j: any): AsyncRes<string> {
// TODO add categories but add a tag to see what classifying scheme we're using
//
const sounds = j.sounds || [];
const hasIpa = sounds.find((s: any) => "ipa" in s);
- if (!hasIpa) return;
+ if (!hasIpa) return { error: "meh no ipa" };
const freq = await getThaiFreq(word);
const wordId = pdb.addWord(word, "th", freq, null);
+ if (wordId == 478 || word === "และ") {
+ console.log("wtf man");
+ console.dir(j, { depth: null });
+ return { error: "i said wtf" };
+ }
const analyzed = await analyzeTHWord(word);
- for (let snd of sounds) if ("ipa" in snd) handleIpa(wordId, j, snd, analyzed);
+ for (let snd of sounds)
+ if ("ipa" in snd) {
+ const res = await handleIpa(wordId, j, snd, analyzed);
+ if ("error" in res) return res;
+ }
+ return { ok: "" };
}
async function handleIpa(
wordId: number | bigint,
j: any,
snd: any,
analyzed: ThaiNLPRes,
-) {
+): AsyncRes<string> {
const tags = JSON.stringify(snd.tags) || null;
// console.log("handleipa", analyzed.syllables.length);
// console.log(analyzed);
const wikiIpa = cleanIpa(snd.ipa);
const nlpIpa = cleanIpa(analyzed.ipa);
const ipa = wikiIpa || nlpIpa;
+ if (j.word === "และ") {
+ console.log("wtf!!");
+ return { error: "wtf is this" };
+ }
const wikiIpaSplit = wikiIpa.split(".");
const nlpIpaSplit = nlpIpa.split(".");
if (wikiIpaSplit.length !== nlpIpaSplit.length) {
@@ -73,14 +113,15 @@ async function handleIpa(
// console.log("syllable analysis mismatch", j.word);
// console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit });
// console.dir(j, { depth: null });
- return;
+ return { error: "meh syllable analysis mismatch" };
}
- pdb.addPronunciation(wordId, ipa, analyzed.syllables.length, tags, null);
const writtenSyls = analyzed.syllables;
const pronouncedSyls = analyzed.realSyls;
let badSyls = false;
if (writtenSyls.length !== pronouncedSyls.length) badSyls = true;
+ pdb.addPronunciation(wordId, ipa, pronouncedSyls.length, tags, null);
+
for (let i = 0; i < pronouncedSyls.length; i++) {
const pronounced = pronouncedSyls[i]!.replace(/\u{E3A}/u, "");
const written = writtenSyls[i] || "";
@@ -93,14 +134,10 @@ async function handleIpa(
console.log(pronounced);
console.log(written);
}
- try {
- await handleSyllable(syllable, ipa, wordId, i, notes);
- } catch (e) {
- console.error("syl error", j.word, j.sounds);
- console.error({ analyzed, ipa, wikiIpaSplit });
- console.error(e);
- }
+ const res = await handleSyllable(syllable, ipa, wordId, i, notes);
+ if ("error" in res) return res;
}
+ return { ok: "" };
}
const thaiTones: Record<string, string> = {
"˧": "mid",
@@ -122,7 +159,7 @@ function parseTone(ipa: string, spelling: string): Tone {
const numbers = thaiToneNums[ipa]!;
return { letters: ipa, name, numbers };
} catch (e) {
- console.error("wrong tones!!", { s: spelling, ipa });
+ console.error("meh wrong tones!!", { s: spelling, ipa });
throw new Error("");
}
}
@@ -133,7 +170,7 @@ async function handleSyllable(
wordId: number | bigint,
idx: number,
notes: string | null,
-) {
+): AsyncRes<string> {
const sorsyl = await sorSyl(spelling, "th", ipa);
const weird = [
// "a̯n",
@@ -166,14 +203,16 @@ async function handleSyllable(
// // console.dir(j, { depth: null });
// }
if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!");
- const syl = sorsyl.syls[0]!;
+ const syl = sorsyl.syls[0]!.ipa;
const tone = parseTone(syl.tone, spelling);
+ // TODO add actual ortographic data here not just ipa
try {
pdb.addSyllable(
wordId,
idx + 1,
+ null,
"th",
- syl.ipa,
+ syl.all,
syl.long,
spelling,
{ spelling: syl.onset, ipa: syl.onset },
@@ -184,16 +223,18 @@ async function handleSyllable(
tone,
notes,
);
+ return { ok: "" };
} catch (e) {
// console.log("well fuck", syl);
// console.error(e);
- console.log();
+ return { error: `meh ${e}` };
}
}
-async function handleIdiom(idiom: string) {
+async function handleIdiom(idiom: string): AsyncRes<string> {
pdb.addIdiom(idiom, "th");
// TODO later set idiom_words once all words are populated
// console.log();
+ return { ok: "" };
}
// ช้า ๆ
// งก ๆ