import Database from "bun:sqlite"; import { Phoneme, Tone } from "../types/phonetics"; import { ProsodyWord, ProsodyWordDB } from "../types/cards"; type Str = string | null; type ItemType = "word" | "syllable" | "idiom"; class DatabaseHandler { db: Database; constructor() { // const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/phon.db"; const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/thaiphon.db"; const db = new Database(dbPath, { create: true }); db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance db.exec("PRAGMA foreign_keys = ON"); this.db = db; } async init() { const file = Bun.file("./prosodyschema.sql"); const sql = await file.text(); this.db.exec(sql); } // selects fetchFrequent(lang: string) { const query = this.db.query( `SELECT w.id, w.spelling, w.lang, w.frequency, w.lang, wp.ipa, wp.syllables, wp.tag, w.notes, (SELECT json_group_array(json_object( 'ipa', s.ipa, 'spelling', s.text, 'long', s.long, 'notes', s.notes, 'onseto', os.text, 'onset', os.ipa, 'nucleuso', ns.text, 'nucleus', ns.ipa, 'codao', co.text, 'coda', co.ipa, 'rhymeo', rh.text, 'rhyme', rh.ipa, 'tonen', tns.name, 'tonenm', tns.nums, 'tone', tns.ipa ) ) FROM syllables s JOIN onsets os ON os.id = s.onset JOIN nucleus ns ON ns.id = s.nucleus JOIN codas co ON co.id = s.coda JOIN rhymes rh ON rh.id = s.rhyme JOIN tones tns ON tns.id = s.tone WHERE s.id= sw.syl_id ) as syllables FROM words w JOIN word_phonetics wp ON wp.word_id = w.id JOIN syllables_words sw ON sw.word_id = w.id WHERE w.frequency IS NOT NULL AND w.lang = ? ORDER BY w.frequency ASC LIMIT 300 `, ); return query.all(lang) as ProsodyWordDB[]; } fetchWords(words: string[]) { const query = this.db.query( `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, ); return query.all() as Array<{ id: number }>; } fetchSyllables(words: string[]) { const query = this.db.query( `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, ); return query.all() as Array<{ id: number }>; } fetchOnsets(onset: string) { const query = this.db.query( `SELECT w.id, w.spelling, w.frequency, wp.ipa FROM words w JOIN word_phonetics wp ON wp.word_id = w.id JOIN syllables_words sw ON sw.word_id = w.id JOIN syllables s ON s.id = sw.syl_id JOIN onsets os ON os.id = syl.onset `, ); return query.all(onset) as any[]; } // tones fetchWordsByToneAndSyls(tones: Array) { const toneString = tones.reduce((acc: string, item) => { if (!item) return `${acc},%`; else return `${acc},${item}`; }, ""); console.log({ toneString }); const query = this.db.query( ` WITH word_tone_sequences AS ( SELECT w.id as word_id, w.spelling, wp.ipa, w.frequency, GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, COUNT(sw.syl_id) as syllable_count FROM words w JOIN word_phonetics wp ON w.id = wp.word_id JOIN syllables_words sw ON w.id = sw.word_id JOIN syllables s ON sw.syl_id = s.id JOIN tones t ON s.tone = t.id GROUP BY w.id, w.spelling, w.lang, w.frequency ) SELECT word_id, spelling, ipa, frequency, tone_sequence, syllable_count FROM word_tone_sequences WHERE tone_sequence LIKE ? AND syllable_count = ? ORDER BY frequency DESC NULLS LAST; `, ); return query.all(toneString.slice(1), tones.length) as any[]; } // inserts addLanguage(code: string, name: string) { const query = this.db .query(`INSERT OR IGNORE INTO languages(iso6392, english) VALUES(?, ?)`) .run(code, name); } addPronunciation( wordId: number | bigint, ipa: string, syllables: number, tags: Str, notes: Str, ) { const query = this.db .query( `INSERT OR IGNORE INTO word_phonetics(word_id,ipa, syllables, tag, notes) VALUES(?, ?, ?, ?, ?)`, ) .run(wordId, ipa, syllables, tags, notes); } addWordRhyme(wordId: number | bigint, ipa: string, lang: string, notes: Str) { const query = this.db .query( `INSERT INTO word_rhymes(text, lang, notes) VALUES(?, ?, ?) ON CONFLICT(text,lang) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(ipa, lang, notes) as { id: number }; const query2 = this.db .query( ` INSERT INTO words_wrhymes(word_id, wrhyme_id) VALUES(?, ?) `, ) .run(wordId, query.id); } addIdiom(spelling: string, lang: string) { const query = this.db.query( `INSERT OR IGNORE INTO idioms(spelling, lang) VALUES(?, ?)`, ); const res = query.run(spelling, lang); return res; } findIdiomWords(spelling: string, idId: number | bigint) { const split = spelling.split(" "); const words = this.fetchWords(split); console.log({ words }); const tx = this.db.transaction(() => { for (const w of words) { this.db .query( ` INSERT INTO words_idioms(word_id, idiom_id) VALUES(?, ?) `, ) .run(w.id, idId); } }); tx(); } findIdiomsWords() { const rows: any = this.db.query(`SELECT id, spelling FROM idioms`); for (const row of rows) { this.findIdiomWords(row.spelling, row.id); } } addWord( spelling: string, lang: string, frequency: number | null, notes: Str, ) { const query = this.db.query( `INSERT OR IGNORE INTO words(spelling, lang, frequency, notes) VALUES(?, ?, ?, ?)`, // `INSERT INTO words(spelling, lang) VALUES(?, ?)`, ); const res = query.run(spelling, lang, frequency, notes); const wordId = res.lastInsertRowid; return wordId; } addSyllable( wordId: number | bigint, sylIdx: number, stressed: boolean | null, lang: string, ipa: string, long: boolean, text: string, onset: Phoneme, medial: Phoneme, nucleus: Phoneme, coda: Phoneme, rhyme: Phoneme, tone: Tone, notes: Str, ) { const tx = this.db.transaction(() => { const onsetId = this.db .query( `INSERT INTO onsets(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(onset.ipa, lang, onset.spelling) as { id: number }; const medialId = this.db .query( `INSERT INTO medials(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(medial.ipa, lang, medial.spelling) as { id: number }; const nucleusId = this.db .query( `INSERT INTO nucleus(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(nucleus.ipa, lang, nucleus.spelling) as { id: number }; const codaId = this.db .query( `INSERT INTO codas(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(coda.ipa, lang, coda.spelling) as { id: number }; const rhymeId = this.db .query( `INSERT INTO rhymes(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(rhyme.ipa, lang, rhyme.spelling) as { id: number }; const toneId = this.db .query( `INSERT INTO tones(ipa, lang, name, nums) VALUES(?, ?, ?, ?) ON CONFLICT(ipa, lang) DO UPDATE SET ipa = excluded.ipa RETURNING rowid `, ) .get(tone.letters, lang, tone.name, tone.numbers) as { id: number }; const query = this.db.query( `INSERT INTO syllables(lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, ); const res = query.run( lang, ipa, long, text, onsetId.id, medialId.id, nucleusId.id, codaId.id, rhymeId.id, toneId.id, notes, ); const sylId = res.lastInsertRowid; // const res1 = this.db .query( `INSERT INTO syllables_words(syl_id, word_id, idx, stressed) VALUES(?, ?, ?, ?)`, ) .run(sylId, wordId, sylIdx, stressed); // return sylId; }); const sylId = tx(); } // reads } const db = new DatabaseHandler(); export default db;