diff options
Diffstat (limited to 'src/lib/db/prosodydb.ts')
-rw-r--r-- | src/lib/db/prosodydb.ts | 120 |
1 files changed, 117 insertions, 3 deletions
diff --git a/src/lib/db/prosodydb.ts b/src/lib/db/prosodydb.ts index 9e76b8d..d6da389 100644 --- a/src/lib/db/prosodydb.ts +++ b/src/lib/db/prosodydb.ts @@ -1,12 +1,14 @@ import Database from "bun:sqlite"; import { Phoneme, Tone } from "../types/phonetics"; +import { ProsodyWord, ProsodyWordDB } from "../types/cards"; type Str = string | null; type ItemType = "word" | "syllable" | "idiom"; class DatabaseHandler { db: Database; constructor() { - const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/phon.db"; + // const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/phon.db"; + const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/thaiphon.db"; const db = new Database(dbPath, { create: true }); db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance db.exec("PRAGMA foreign_keys = ON"); @@ -18,12 +20,123 @@ class DatabaseHandler { this.db.exec(sql); } // selects + fetchFrequent(lang: string) { + const query = this.db.query( + `SELECT + w.id, + w.spelling, + w.lang, + w.frequency, + w.lang, + wp.ipa, + wp.syllables, + wp.tag, + w.notes, + (SELECT + json_group_array(json_object( + 'ipa', s.ipa, + 'spelling', s.text, + 'long', s.long, + 'notes', s.notes, + 'onseto', os.text, + 'onset', os.ipa, + 'nucleuso', ns.text, + 'nucleus', ns.ipa, + 'codao', co.text, + 'coda', co.ipa, + 'rhymeo', rh.text, + 'rhyme', rh.ipa, + 'tonen', tns.name, + 'tonenm', tns.nums, + 'tone', tns.ipa + ) + ) + FROM syllables s + JOIN onsets os ON os.id = s.onset + JOIN nucleus ns ON ns.id = s.nucleus + JOIN codas co ON co.id = s.coda + JOIN rhymes rh ON rh.id = s.rhyme + JOIN tones tns ON tns.id = s.tone + WHERE s.id= sw.syl_id + ) as syllables + FROM words w + JOIN word_phonetics wp ON wp.word_id = w.id + JOIN syllables_words sw ON sw.word_id = w.id + WHERE w.frequency IS NOT NULL + AND w.lang = ? + ORDER BY w.frequency ASC + LIMIT 300 + `, + ); + return query.all(lang) as ProsodyWordDB[]; + } fetchWords(words: string[]) { const query = this.db.query( `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, ); return query.all() as Array<{ id: number }>; } + fetchSyllables(words: string[]) { + const query = this.db.query( + `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, + ); + return query.all() as Array<{ id: number }>; + } + fetchOnsets(onset: string) { + const query = this.db.query( + `SELECT + w.id, + w.spelling, + w.frequency, + wp.ipa + FROM words w + JOIN word_phonetics wp ON wp.word_id = w.id + JOIN syllables_words sw ON sw.word_id = w.id + JOIN syllables s ON s.id = sw.syl_id + JOIN onsets os ON os.id = syl.onset + `, + ); + return query.all(onset) as any[]; + } + // tones + fetchWordsByToneAndSyls(tones: Array<string | null>) { + const toneString = tones.reduce((acc: string, item) => { + if (!item) return `${acc},%`; + else return `${acc},${item}`; + }, ""); + console.log({ toneString }); + const query = this.db.query( + ` + WITH word_tone_sequences AS ( + SELECT + w.id as word_id, + w.spelling, + wp.ipa, + w.frequency, + GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, + COUNT(sw.syl_id) as syllable_count + FROM words w + JOIN word_phonetics wp ON w.id = wp.word_id + JOIN syllables_words sw ON w.id = sw.word_id + JOIN syllables s ON sw.syl_id = s.id + JOIN tones t ON s.tone = t.id + GROUP BY w.id, w.spelling, w.lang, w.frequency + ) + SELECT + word_id, + spelling, + ipa, + frequency, + tone_sequence, + syllable_count + FROM word_tone_sequences + WHERE tone_sequence LIKE ? + AND syllable_count = ? + ORDER BY frequency DESC NULLS LAST; + `, + ); + return query.all(toneString.slice(1), tones.length) as any[]; + } // inserts addLanguage(code: string, name: string) { @@ -109,6 +222,7 @@ class DatabaseHandler { addSyllable( wordId: number | bigint, sylIdx: number, + stressed: boolean | null, lang: string, ipa: string, long: boolean, @@ -197,9 +311,9 @@ class DatabaseHandler { // const res1 = this.db .query( - `INSERT INTO syllables_words(syl_id, word_id, idx) VALUES(?, ?, ?)`, + `INSERT INTO syllables_words(syl_id, word_id, idx, stressed) VALUES(?, ?, ?, ?)`, ) - .run(sylId, wordId, sylIdx); + .run(sylId, wordId, sylIdx, stressed); // return sylId; }); |