summaryrefslogtreecommitdiff
path: root/src/lib/db/enseed.ts
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-06-03 09:34:29 +0700
committerpolwex <polwex@sortug.com>2025-06-03 09:34:29 +0700
commit2401217a4019938d1c1cc61b6e33ccb233eb6e74 (patch)
tree06118284965be5cfd6b417dca86d46db5758217b /src/lib/db/enseed.ts
parent2b80f7950df34f2a160135d7e20220a9b2ec3352 (diff)
this is golden thanks claude
Diffstat (limited to 'src/lib/db/enseed.ts')
-rw-r--r--src/lib/db/enseed.ts85
1 files changed, 53 insertions, 32 deletions
diff --git a/src/lib/db/enseed.ts b/src/lib/db/enseed.ts
index 58f5876..39dec44 100644
--- a/src/lib/db/enseed.ts
+++ b/src/lib/db/enseed.ts
@@ -7,12 +7,15 @@ import {
type ThaiNLPRes,
sorSyl,
getThaiFreq,
+ SorBSyl,
} from "../calls/nlp";
import pdb from "./prosodydb";
import { cleanIpa } from "../utils";
import { handleFile } from "./utils";
import { Tone } from "../types/phonetics";
+import { AsyncRes } from "../types";
+const errors: string[] = [];
async function readDump(lang: string) {
await pdb.init();
pdb.addLanguage("th", "thai");
@@ -27,14 +30,21 @@ async function readDump(lang: string) {
count++;
console.log(count);
// if (count <= 10000) continue;
- if (count > 30) break;
+ if (count > 300) break;
const j = JSON.parse(langrow.data);
const word = j.word.trim();
if (!word) continue;
const split = word.split(" ");
- if (split.length > 1) await handleIdiom(lang, word);
- else await handleWord(lang, word, j, freqMap);
+ const res =
+ split.length > 1
+ ? await handleIdiom(lang, word)
+ : await handleWord(lang, word, j, freqMap);
+ if ("error" in res) {
+ console.error(res.error);
+ break;
+ }
}
+ console.dir(errors);
}
async function handleWord(
@@ -42,7 +52,7 @@ async function handleWord(
word: string,
j: any,
freqMap: Map<string, number>,
-) {
+): AsyncRes<string> {
// TODO add categories but add a tag to see what classifying scheme we're using
//
const sounds = j.sounds || [];
@@ -50,9 +60,9 @@ async function handleWord(
const hwikiRhyme = sounds.find((s: any) => "rhymes" in s);
const wikiRhyme = hwikiRhyme ? hwikiRhyme.rhymes : null;
if (!hasIpa) {
- console.error("no ipa!!", word);
- console.dir(j, { depth: null });
- return;
+ // console.error("no ipa!!", word);
+ // console.dir(j, { depth: null });
+ return { error: "meh no ipa" };
}
const freq = freqMap.get(word) || null;
// const wordId = pdb.addWord(word, lang, freq, null);
@@ -60,7 +70,11 @@ async function handleWord(
const wordId = 0;
// console.log(analyzed);
for (let snd of sounds)
- if ("ipa" in snd) handleIpa(wordId, word, lang, j, snd, wikiRhyme);
+ if ("ipa" in snd) {
+ const res = await handleIpa(wordId, word, lang, j, snd, wikiRhyme);
+ if ("error" in res) return res;
+ }
+ return { ok: "" };
}
async function handleIpa(
wordId: number | bigint,
@@ -73,58 +87,65 @@ async function handleIpa(
const tags = JSON.stringify(snd.tags) || null;
const ipa = snd.ipa;
const syls = await sorSyl(word, lang, ipa);
+ // console.log(syls, "sorsyl");
console.log(word);
console.log(ipa);
- // pdb.addPronunciation(wordId, ipa, syls.syls.length, tags, null);
+ pdb.addPronunciation(wordId, ipa, syls.syls.length, tags, null);
// set word rhyme
- const wordRhyme = syls.syls.reduce((acc: string, item: SorSyl) => {
+ const wordRhyme = syls.syls.reduce((acc: string, itemm: SorBSyl) => {
+ const item = itemm.ipa;
if (!item.stressed && !acc) return acc;
if (item.stressed && !acc) return `${acc}${item.rhyme}`;
- else return `${acc}${item.ipa}`;
+ else return `${acc}${item.all}`;
}, "");
if (wordRhyme) pdb.addWordRhyme(wordId, wordRhyme, j.lang_code, wikiRhyme);
- //
+
for (let i = 0; i < syls.syls.length; i++) {
const syl = syls.syls[i]!;
- await handleSyllable(word, syl.ipa, wordId, i);
+ const res = await handleSyllable(syl, wordId, i);
+ if ("error" in res) return res;
}
+ return { ok: "" };
}
async function handleSyllable(
- spelling: string,
- ipa: string,
+ syl: SorBSyl,
wordId: number | bigint,
idx: number,
-) {
- const sorsyl = await sorSyl(spelling, "th", ipa);
- if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!");
- const syl = sorsyl.syls[0]!;
+): AsyncRes<string> {
try {
pdb.addSyllable(
wordId,
idx + 1,
+ syl.ipa.stressed,
"th",
- syl.ipa,
- syl.long,
- spelling,
- { spelling: syl.onset, ipa: syl.onset },
- { spelling: syl.medial, ipa: syl.medial },
- { spelling: syl.nucleus, ipa: syl.nucleus },
- { spelling: syl.coda, ipa: syl.coda },
- { spelling: syl.rhyme, ipa: syl.rhyme },
+ syl.ipa.all,
+ syl.ipa.long,
+ syl.spelling.all,
+ { spelling: syl.spelling.onset, ipa: syl.ipa.onset },
+ { spelling: syl.spelling.medial, ipa: syl.ipa.medial },
+ { spelling: syl.spelling.nucleus, ipa: syl.ipa.nucleus },
+ { spelling: syl.spelling.coda, ipa: syl.ipa.coda },
+ { spelling: syl.spelling.rhyme, ipa: syl.ipa.rhyme },
{ letters: "", numbers: 0, name: "" },
null,
);
+ return { ok: "" };
} catch (e) {
// console.log("well fuck", syl);
// console.error(e);
- console.log();
+ return { error: `${e}` };
}
}
-async function handleIdiom(lang: string, idiom: string) {
- pdb.addIdiom(idiom, lang);
- // TODO later set idiom_words once all words are populated
- // console.log();
+async function handleIdiom(lang: string, idiom: string): AsyncRes<string> {
+ try {
+ pdb.addIdiom(idiom, lang);
+ // TODO later set idiom_words once all words are populated
+ // console.log();
+ return { ok: "" };
+ } catch (e) {
+ return { error: `${e}` };
+ }
}
// ช้า ๆ
// งก ๆ