diff options
author | polwex <polwex@sortug.com> | 2025-06-03 09:34:29 +0700 |
---|---|---|
committer | polwex <polwex@sortug.com> | 2025-06-03 09:34:29 +0700 |
commit | 2401217a4019938d1c1cc61b6e33ccb233eb6e74 (patch) | |
tree | 06118284965be5cfd6b417dca86d46db5758217b /src/lib/db/enseed.ts | |
parent | 2b80f7950df34f2a160135d7e20220a9b2ec3352 (diff) |
this is golden thanks claude
Diffstat (limited to 'src/lib/db/enseed.ts')
-rw-r--r-- | src/lib/db/enseed.ts | 85 |
1 files changed, 53 insertions, 32 deletions
diff --git a/src/lib/db/enseed.ts b/src/lib/db/enseed.ts index 58f5876..39dec44 100644 --- a/src/lib/db/enseed.ts +++ b/src/lib/db/enseed.ts @@ -7,12 +7,15 @@ import { type ThaiNLPRes, sorSyl, getThaiFreq, + SorBSyl, } from "../calls/nlp"; import pdb from "./prosodydb"; import { cleanIpa } from "../utils"; import { handleFile } from "./utils"; import { Tone } from "../types/phonetics"; +import { AsyncRes } from "../types"; +const errors: string[] = []; async function readDump(lang: string) { await pdb.init(); pdb.addLanguage("th", "thai"); @@ -27,14 +30,21 @@ async function readDump(lang: string) { count++; console.log(count); // if (count <= 10000) continue; - if (count > 30) break; + if (count > 300) break; const j = JSON.parse(langrow.data); const word = j.word.trim(); if (!word) continue; const split = word.split(" "); - if (split.length > 1) await handleIdiom(lang, word); - else await handleWord(lang, word, j, freqMap); + const res = + split.length > 1 + ? await handleIdiom(lang, word) + : await handleWord(lang, word, j, freqMap); + if ("error" in res) { + console.error(res.error); + break; + } } + console.dir(errors); } async function handleWord( @@ -42,7 +52,7 @@ async function handleWord( word: string, j: any, freqMap: Map<string, number>, -) { +): AsyncRes<string> { // TODO add categories but add a tag to see what classifying scheme we're using // const sounds = j.sounds || []; @@ -50,9 +60,9 @@ async function handleWord( const hwikiRhyme = sounds.find((s: any) => "rhymes" in s); const wikiRhyme = hwikiRhyme ? hwikiRhyme.rhymes : null; if (!hasIpa) { - console.error("no ipa!!", word); - console.dir(j, { depth: null }); - return; + // console.error("no ipa!!", word); + // console.dir(j, { depth: null }); + return { error: "meh no ipa" }; } const freq = freqMap.get(word) || null; // const wordId = pdb.addWord(word, lang, freq, null); @@ -60,7 +70,11 @@ async function handleWord( const wordId = 0; // console.log(analyzed); for (let snd of sounds) - if ("ipa" in snd) handleIpa(wordId, word, lang, j, snd, wikiRhyme); + if ("ipa" in snd) { + const res = await handleIpa(wordId, word, lang, j, snd, wikiRhyme); + if ("error" in res) return res; + } + return { ok: "" }; } async function handleIpa( wordId: number | bigint, @@ -73,58 +87,65 @@ async function handleIpa( const tags = JSON.stringify(snd.tags) || null; const ipa = snd.ipa; const syls = await sorSyl(word, lang, ipa); + // console.log(syls, "sorsyl"); console.log(word); console.log(ipa); - // pdb.addPronunciation(wordId, ipa, syls.syls.length, tags, null); + pdb.addPronunciation(wordId, ipa, syls.syls.length, tags, null); // set word rhyme - const wordRhyme = syls.syls.reduce((acc: string, item: SorSyl) => { + const wordRhyme = syls.syls.reduce((acc: string, itemm: SorBSyl) => { + const item = itemm.ipa; if (!item.stressed && !acc) return acc; if (item.stressed && !acc) return `${acc}${item.rhyme}`; - else return `${acc}${item.ipa}`; + else return `${acc}${item.all}`; }, ""); if (wordRhyme) pdb.addWordRhyme(wordId, wordRhyme, j.lang_code, wikiRhyme); - // + for (let i = 0; i < syls.syls.length; i++) { const syl = syls.syls[i]!; - await handleSyllable(word, syl.ipa, wordId, i); + const res = await handleSyllable(syl, wordId, i); + if ("error" in res) return res; } + return { ok: "" }; } async function handleSyllable( - spelling: string, - ipa: string, + syl: SorBSyl, wordId: number | bigint, idx: number, -) { - const sorsyl = await sorSyl(spelling, "th", ipa); - if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!"); - const syl = sorsyl.syls[0]!; +): AsyncRes<string> { try { pdb.addSyllable( wordId, idx + 1, + syl.ipa.stressed, "th", - syl.ipa, - syl.long, - spelling, - { spelling: syl.onset, ipa: syl.onset }, - { spelling: syl.medial, ipa: syl.medial }, - { spelling: syl.nucleus, ipa: syl.nucleus }, - { spelling: syl.coda, ipa: syl.coda }, - { spelling: syl.rhyme, ipa: syl.rhyme }, + syl.ipa.all, + syl.ipa.long, + syl.spelling.all, + { spelling: syl.spelling.onset, ipa: syl.ipa.onset }, + { spelling: syl.spelling.medial, ipa: syl.ipa.medial }, + { spelling: syl.spelling.nucleus, ipa: syl.ipa.nucleus }, + { spelling: syl.spelling.coda, ipa: syl.ipa.coda }, + { spelling: syl.spelling.rhyme, ipa: syl.ipa.rhyme }, { letters: "", numbers: 0, name: "" }, null, ); + return { ok: "" }; } catch (e) { // console.log("well fuck", syl); // console.error(e); - console.log(); + return { error: `${e}` }; } } -async function handleIdiom(lang: string, idiom: string) { - pdb.addIdiom(idiom, lang); - // TODO later set idiom_words once all words are populated - // console.log(); +async function handleIdiom(lang: string, idiom: string): AsyncRes<string> { + try { + pdb.addIdiom(idiom, lang); + // TODO later set idiom_words once all words are populated + // console.log(); + return { ok: "" }; + } catch (e) { + return { error: `${e}` }; + } } // ช้า ๆ // งก ๆ |