summaryrefslogtreecommitdiff
path: root/src/lib/db/prosodydb.ts
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-06-03 09:34:29 +0700
committerpolwex <polwex@sortug.com>2025-06-03 09:34:29 +0700
commit2401217a4019938d1c1cc61b6e33ccb233eb6e74 (patch)
tree06118284965be5cfd6b417dca86d46db5758217b /src/lib/db/prosodydb.ts
parent2b80f7950df34f2a160135d7e20220a9b2ec3352 (diff)
this is golden thanks claude
Diffstat (limited to 'src/lib/db/prosodydb.ts')
-rw-r--r--src/lib/db/prosodydb.ts120
1 files changed, 117 insertions, 3 deletions
diff --git a/src/lib/db/prosodydb.ts b/src/lib/db/prosodydb.ts
index 9e76b8d..d6da389 100644
--- a/src/lib/db/prosodydb.ts
+++ b/src/lib/db/prosodydb.ts
@@ -1,12 +1,14 @@
import Database from "bun:sqlite";
import { Phoneme, Tone } from "../types/phonetics";
+import { ProsodyWord, ProsodyWordDB } from "../types/cards";
type Str = string | null;
type ItemType = "word" | "syllable" | "idiom";
class DatabaseHandler {
db: Database;
constructor() {
- const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/phon.db";
+ // const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/phon.db";
+ const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/thaiphon.db";
const db = new Database(dbPath, { create: true });
db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
db.exec("PRAGMA foreign_keys = ON");
@@ -18,12 +20,123 @@ class DatabaseHandler {
this.db.exec(sql);
}
// selects
+ fetchFrequent(lang: string) {
+ const query = this.db.query(
+ `SELECT
+ w.id,
+ w.spelling,
+ w.lang,
+ w.frequency,
+ w.lang,
+ wp.ipa,
+ wp.syllables,
+ wp.tag,
+ w.notes,
+ (SELECT
+ json_group_array(json_object(
+ 'ipa', s.ipa,
+ 'spelling', s.text,
+ 'long', s.long,
+ 'notes', s.notes,
+ 'onseto', os.text,
+ 'onset', os.ipa,
+ 'nucleuso', ns.text,
+ 'nucleus', ns.ipa,
+ 'codao', co.text,
+ 'coda', co.ipa,
+ 'rhymeo', rh.text,
+ 'rhyme', rh.ipa,
+ 'tonen', tns.name,
+ 'tonenm', tns.nums,
+ 'tone', tns.ipa
+ )
+ )
+ FROM syllables s
+ JOIN onsets os ON os.id = s.onset
+ JOIN nucleus ns ON ns.id = s.nucleus
+ JOIN codas co ON co.id = s.coda
+ JOIN rhymes rh ON rh.id = s.rhyme
+ JOIN tones tns ON tns.id = s.tone
+ WHERE s.id= sw.syl_id
+ ) as syllables
+ FROM words w
+ JOIN word_phonetics wp ON wp.word_id = w.id
+ JOIN syllables_words sw ON sw.word_id = w.id
+ WHERE w.frequency IS NOT NULL
+ AND w.lang = ?
+ ORDER BY w.frequency ASC
+ LIMIT 300
+ `,
+ );
+ return query.all(lang) as ProsodyWordDB[];
+ }
fetchWords(words: string[]) {
const query = this.db.query(
`SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`,
);
return query.all() as Array<{ id: number }>;
}
+ fetchSyllables(words: string[]) {
+ const query = this.db.query(
+ `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`,
+ );
+ return query.all() as Array<{ id: number }>;
+ }
+ fetchOnsets(onset: string) {
+ const query = this.db.query(
+ `SELECT
+ w.id,
+ w.spelling,
+ w.frequency,
+ wp.ipa
+ FROM words w
+ JOIN word_phonetics wp ON wp.word_id = w.id
+ JOIN syllables_words sw ON sw.word_id = w.id
+ JOIN syllables s ON s.id = sw.syl_id
+ JOIN onsets os ON os.id = syl.onset
+ `,
+ );
+ return query.all(onset) as any[];
+ }
+ // tones
+ fetchWordsByToneAndSyls(tones: Array<string | null>) {
+ const toneString = tones.reduce((acc: string, item) => {
+ if (!item) return `${acc},%`;
+ else return `${acc},${item}`;
+ }, "");
+ console.log({ toneString });
+ const query = this.db.query(
+ `
+ WITH word_tone_sequences AS (
+ SELECT
+ w.id as word_id,
+ w.spelling,
+ wp.ipa,
+ w.frequency,
+ GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+ COUNT(sw.syl_id) as syllable_count
+ FROM words w
+ JOIN word_phonetics wp ON w.id = wp.word_id
+ JOIN syllables_words sw ON w.id = sw.word_id
+ JOIN syllables s ON sw.syl_id = s.id
+ JOIN tones t ON s.tone = t.id
+ GROUP BY w.id, w.spelling, w.lang, w.frequency
+ )
+ SELECT
+ word_id,
+ spelling,
+ ipa,
+ frequency,
+ tone_sequence,
+ syllable_count
+ FROM word_tone_sequences
+ WHERE tone_sequence LIKE ?
+ AND syllable_count = ?
+ ORDER BY frequency DESC NULLS LAST;
+ `,
+ );
+ return query.all(toneString.slice(1), tones.length) as any[];
+ }
// inserts
addLanguage(code: string, name: string) {
@@ -109,6 +222,7 @@ class DatabaseHandler {
addSyllable(
wordId: number | bigint,
sylIdx: number,
+ stressed: boolean | null,
lang: string,
ipa: string,
long: boolean,
@@ -197,9 +311,9 @@ class DatabaseHandler {
//
const res1 = this.db
.query(
- `INSERT INTO syllables_words(syl_id, word_id, idx) VALUES(?, ?, ?)`,
+ `INSERT INTO syllables_words(syl_id, word_id, idx, stressed) VALUES(?, ?, ?, ?)`,
)
- .run(sylId, wordId, sylIdx);
+ .run(sylId, wordId, sylIdx, stressed);
//
return sylId;
});