diff options
Diffstat (limited to 'src/lib/db/thaiseed.ts')
-rw-r--r-- | src/lib/db/thaiseed.ts | 87 |
1 files changed, 64 insertions, 23 deletions
diff --git a/src/lib/db/thaiseed.ts b/src/lib/db/thaiseed.ts index 5c75345..6c69d9c 100644 --- a/src/lib/db/thaiseed.ts +++ b/src/lib/db/thaiseed.ts @@ -12,6 +12,7 @@ import pdb from "./prosodydb"; import { cleanIpa } from "../utils"; import { handleFile } from "./utils"; import { Tone } from "../types/phonetics"; +import { AsyncRes } from "../types"; async function readDump(lang: string) { await pdb.init(); @@ -30,38 +31,77 @@ async function readDump(lang: string) { const j = JSON.parse(langrow.data); const word = j.word.trim(); if (!word) continue; - if (word.includes("ๆ")) await handleWord(word, j); - else { + + if (word.includes("ๆ")) { + const res = await handleWord(word, j); + if ("error" in res) { + if (res.error.includes("meh")) continue; + if (res.error.includes("wtf")) { + console.error(res.error); + console.error(j.sounds); + } + break; + } + } else { const split = word.split(" "); - if (split.length > 1) await handleIdiom(word); - else await handleWord(word, j); + if (split.length > 1) { + const res = await handleIdiom(word); + if ("error" in res) { + console.error(res.error); + break; + } + } else { + const res = await handleWord(word, j); + if ("error" in res) { + if (res.error.includes("meh")) continue; + if (res.error.includes("wtf")) { + console.error(res.error); + console.error(j.sounds); + } + // break; + } + } } } } -async function handleWord(word: string, j: any) { +async function handleWord(word: string, j: any): AsyncRes<string> { // TODO add categories but add a tag to see what classifying scheme we're using // const sounds = j.sounds || []; const hasIpa = sounds.find((s: any) => "ipa" in s); - if (!hasIpa) return; + if (!hasIpa) return { error: "meh no ipa" }; const freq = await getThaiFreq(word); const wordId = pdb.addWord(word, "th", freq, null); + if (wordId == 478 || word === "และ") { + console.log("wtf man"); + console.dir(j, { depth: null }); + return { error: "i said wtf" }; + } const analyzed = await analyzeTHWord(word); - for (let snd of sounds) if ("ipa" in snd) handleIpa(wordId, j, snd, analyzed); + for (let snd of sounds) + if ("ipa" in snd) { + const res = await handleIpa(wordId, j, snd, analyzed); + if ("error" in res) return res; + } + return { ok: "" }; } async function handleIpa( wordId: number | bigint, j: any, snd: any, analyzed: ThaiNLPRes, -) { +): AsyncRes<string> { const tags = JSON.stringify(snd.tags) || null; // console.log("handleipa", analyzed.syllables.length); // console.log(analyzed); const wikiIpa = cleanIpa(snd.ipa); const nlpIpa = cleanIpa(analyzed.ipa); const ipa = wikiIpa || nlpIpa; + if (j.word === "และ") { + console.log("wtf!!"); + return { error: "wtf is this" }; + } const wikiIpaSplit = wikiIpa.split("."); const nlpIpaSplit = nlpIpa.split("."); if (wikiIpaSplit.length !== nlpIpaSplit.length) { @@ -73,14 +113,15 @@ async function handleIpa( // console.log("syllable analysis mismatch", j.word); // console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit }); // console.dir(j, { depth: null }); - return; + return { error: "meh syllable analysis mismatch" }; } - pdb.addPronunciation(wordId, ipa, analyzed.syllables.length, tags, null); const writtenSyls = analyzed.syllables; const pronouncedSyls = analyzed.realSyls; let badSyls = false; if (writtenSyls.length !== pronouncedSyls.length) badSyls = true; + pdb.addPronunciation(wordId, ipa, pronouncedSyls.length, tags, null); + for (let i = 0; i < pronouncedSyls.length; i++) { const pronounced = pronouncedSyls[i]!.replace(/\u{E3A}/u, ""); const written = writtenSyls[i] || ""; @@ -93,14 +134,10 @@ async function handleIpa( console.log(pronounced); console.log(written); } - try { - await handleSyllable(syllable, ipa, wordId, i, notes); - } catch (e) { - console.error("syl error", j.word, j.sounds); - console.error({ analyzed, ipa, wikiIpaSplit }); - console.error(e); - } + const res = await handleSyllable(syllable, ipa, wordId, i, notes); + if ("error" in res) return res; } + return { ok: "" }; } const thaiTones: Record<string, string> = { "˧": "mid", @@ -122,7 +159,7 @@ function parseTone(ipa: string, spelling: string): Tone { const numbers = thaiToneNums[ipa]!; return { letters: ipa, name, numbers }; } catch (e) { - console.error("wrong tones!!", { s: spelling, ipa }); + console.error("meh wrong tones!!", { s: spelling, ipa }); throw new Error(""); } } @@ -133,7 +170,7 @@ async function handleSyllable( wordId: number | bigint, idx: number, notes: string | null, -) { +): AsyncRes<string> { const sorsyl = await sorSyl(spelling, "th", ipa); const weird = [ // "a̯n", @@ -166,14 +203,16 @@ async function handleSyllable( // // console.dir(j, { depth: null }); // } if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!"); - const syl = sorsyl.syls[0]!; + const syl = sorsyl.syls[0]!.ipa; const tone = parseTone(syl.tone, spelling); + // TODO add actual ortographic data here not just ipa try { pdb.addSyllable( wordId, idx + 1, + null, "th", - syl.ipa, + syl.all, syl.long, spelling, { spelling: syl.onset, ipa: syl.onset }, @@ -184,16 +223,18 @@ async function handleSyllable( tone, notes, ); + return { ok: "" }; } catch (e) { // console.log("well fuck", syl); // console.error(e); - console.log(); + return { error: `meh ${e}` }; } } -async function handleIdiom(idiom: string) { +async function handleIdiom(idiom: string): AsyncRes<string> { pdb.addIdiom(idiom, "th"); // TODO later set idiom_words once all words are populated // console.log(); + return { ok: "" }; } // ช้า ๆ // งก ๆ |