import Database from "bun:sqlite"; import { MutationOrder, Phoneme, PhoneticData, Tone } from "../types/phonetics"; import { ProsodyWord, ProsodyWordDB } from "../types/cards"; type Str = string | null; type ItemType = "word" | "syllable" | "idiom"; class DatabaseHandler { db: Database; constructor() { const dbPath = "/home/y/code/bun/ssr/sorlang/bulkdata/thaiphon.db"; // const dbPath = "/home/y/code/bun/ssr/sorlang/bulkdata/enphon.db"; const db = new Database(dbPath, { create: true }); db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance db.exec("PRAGMA foreign_keys = ON"); db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster this.db = db; } async init() { const file = Bun.file("./prosodyschema.sql"); const sql = await file.text(); this.db.exec(sql); } // selects fetchFrequent(lang: string) { const query = this.db.query( `SELECT w.id, w.spelling, w.lang, w.frequency, w.lang, wp.ipa, wp.syllables, wp.tag, w.notes, (SELECT json_group_array(json_object( 'ipa', s.ipa, 'spelling', s.text, 'long', s.long, 'notes', s.notes, 'onseto', os.text, 'onset', os.ipa, 'nucleuso', ns.text, 'nucleus', ns.ipa, 'codao', co.text, 'coda', co.ipa, 'rhymeo', rh.text, 'rhyme', rh.ipa, 'tonen', tns.name, 'tonenm', tns.nums, 'tone', tns.ipa ) ) FROM syllables s JOIN onsets os ON os.id = s.onset JOIN nucleus ns ON ns.id = s.nucleus JOIN codas co ON co.id = s.coda JOIN rhymes rh ON rh.id = s.rhyme JOIN tones tns ON tns.id = s.tone WHERE s.id= sw.syl_id ) as syllables FROM words w JOIN word_phonetics wp ON wp.word_id = w.id JOIN syllables_words sw ON sw.word_id = w.id WHERE w.lang = ? ORDER BY w.frequency ASC NULLS LAST LIMIT 300 `, ); return query.all(lang) as ProsodyWordDB[]; } fetchWords(words: string[]) { const query = this.db.query( `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, ); return query.all() as Array<{ id: number }>; } fetchSyllables(words: string[]) { const query = this.db.query( `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`, ); return query.all() as Array<{ id: number }>; } fetchOnsets(onset: string) { const query = this.db.query( `SELECT w.id, w.spelling, w.frequency, wp.ipa FROM words w JOIN word_phonetics wp ON wp.word_id = w.id JOIN syllables_words sw ON sw.word_id = w.id JOIN syllables s ON s.id = sw.syl_id JOIN onsets os ON os.id = syl.onset `, ); return query.all(onset) as any[]; } // tones fetchWordsByToneAndSyls(tones: Array): PhoneticData[] { const toneString = tones .reduce((acc: string, item) => { if (!item) return `${acc},%`; else return `${acc},${item}`; }, "") .slice(1); console.log({ toneString }); const query = this.db.query( ` WITH word_tone_sequences AS ( SELECT w.id as word_id, w.spelling, wp.ipa, w.frequency, GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq, GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, COUNT(sw.syl_id) as syllable_count FROM words w JOIN word_phonetics wp ON w.id = wp.word_id JOIN syllables_words sw ON w.id = sw.word_id JOIN syllables s ON sw.syl_id = s.id JOIN tones t ON s.tone = t.id GROUP BY w.id, w.spelling, w.lang, w.frequency ) SELECT word_id, spelling, ipa, frequency, syl_seq, tone_sequence, syllable_count FROM word_tone_sequences WHERE tone_sequence LIKE ? AND syllable_count = ? ORDER BY frequency ASC NULLS LAST; `, ); return query.all(toneString, tones.length) as any[]; } // fetchWordsByToneAndSyls(tones: Array) { // const toneString = tones.reduce((acc: string, item) => { // if (!item) return `${acc},%`; // else return `${acc},${item}`; // }, ""); // console.log({ toneString }); // const query = this.db.query( // ` // WITH word_tone_sequences AS ( // SELECT // w.id as word_id, // w.spelling, // wp.ipa, // w.frequency, // GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq, // GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, // COUNT(sw.syl_id) as syllable_count // FROM words w // JOIN word_phonetics wp ON w.id = wp.word_id // JOIN syllables_words sw ON w.id = sw.word_id // JOIN syllables s ON sw.syl_id = s.id // JOIN tones t ON s.tone = t.id // GROUP BY w.id, w.spelling, w.lang, w.frequency // ) // SELECT // word_id, // spelling, // ipa, // frequency, // syl_seq, // tone_sequence, // syllable_count // FROM word_tone_sequences // WHERE tone_sequence LIKE ? // AND syllable_count = ? // ORDER BY frequency DESC NULLS LAST; // `, // ); // return query.all(toneString.slice(1), tones.length) as any[]; // } fetchWordsByToneSylsWords(order: MutationOrder) { console.log({ order }); type Acc = { tones: string; syls: string }; const strings = order.reduce( (acc: Acc, item, idx) => { const startString = idx === 0 ? "" : ","; if ("change" in item) return { tones: `${acc.tones}${startString}${item.change}`, syls: `${acc.syls}${startString}%`, }; else return { tones: `${acc.tones}${startString}%`, syls: `${acc.syls}${startString}${item.keep}`, }; }, { tones: "", syls: "" }, ); const query = this.db.query(` SELECT w.id as word_id, w.spelling, w.lang, w.frequency, wp.ipa, wp.syllable_sequence, wp.tone_sequence, wp.ipa_sequence, GROUP_CONCAT(s.text ORDER BY sw.idx) as syllable_pattern, GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_pattern FROM words w JOIN syllables_words sw ON w.id = sw.word_id JOIN syllables s ON sw.syl_id = s.id JOIN tones t ON s.tone = t.id JOIN word_phonetics wp ON wp.word_id= w.id WHERE wp.syllable_sequence LIKE ?1 AND tone_sequence LIKE ?2 AND syllable_count = ?3 GROUP BY w.id, w.spelling, w.lang, w.frequency ORDER BY w.frequency ASC NULLS LAST; `); return query.all(strings.syls, strings.tones, order.length) as any[]; } // inserts superAdd(p: { word: string; lang: string; frequency: number | null; wordNotes: Str; phonetics: Array<{ ipa: string; syllable_count: number; syllable_sequence: string; tone_sequence: string; ipa_sequence: string; tags: Str; notes: Str; wordRhyme: Str; syllables: Array<{ idx: number; stressed: boolean | null; spelling: string; ipa: string; long: boolean; onset: Phoneme; medial: Phoneme; nucleus: Phoneme; coda: Phoneme; rhyme: Phoneme; tone: Tone; notes: Str; }>; }>; }) { const tx = this.db.transaction(() => { const wordId = this.addWord(p.word, p.lang, p.frequency, p.wordNotes); for (const ph of p.phonetics) { this.addPronunciation( wordId, ph.ipa, ph.syllable_count, ph.syllable_sequence, ph.tone_sequence, ph.ipa_sequence, ph.tags, ph.notes, ); for (const syl of ph.syllables) { this.addSyllable( wordId, syl.idx, syl.stressed, p.lang, syl.ipa, syl.long, syl.spelling, syl.onset, syl.medial, syl.nucleus, syl.coda, syl.rhyme, syl.tone, syl.notes, ); } } }); tx(); } addLanguage(code: string, name: string) { const query = this.db .query(`INSERT OR IGNORE INTO languages(iso6392, english) VALUES(?, ?)`) .run(code, name); } addPronunciation( wordId: number | bigint, ipa: string, syllable_count: number, syllable_sequence: string, tone_sequence: string, ipa_sequence: string, tags: Str, notes: Str, ) { console.log({ wordId, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, }); const query = this.db .query( `INSERT OR IGNORE INTO word_phonetics( word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?)`, ) .run( wordId, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tags, notes, ); } addWordRhyme(wordId: number | bigint, ipa: string, lang: string, notes: Str) { const query = this.db .query( `INSERT INTO word_rhymes(text, lang, notes) VALUES(?, ?, ?) ON CONFLICT(text,lang) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(ipa, lang, notes) as { id: number }; const query2 = this.db .query( ` INSERT INTO words_wrhymes(word_id, wrhyme_id) VALUES(?, ?) `, ) .run(wordId, query.id); } addIdiom(spelling: string, lang: string) { const query = this.db.query( `INSERT OR IGNORE INTO idioms(spelling, lang) VALUES(?, ?)`, ); const res = query.run(spelling, lang); return res; } findIdiomWords(spelling: string, idId: number | bigint) { const split = spelling.split(" "); const words = this.fetchWords(split); console.log({ words }); const tx = this.db.transaction(() => { for (const w of words) { this.db .query( ` INSERT INTO words_idioms(word_id, idiom_id) VALUES(?, ?) `, ) .run(w.id, idId); } }); tx(); } findIdiomsWords() { const rows: any = this.db.query(`SELECT id, spelling FROM idioms`); for (const row of rows) { this.findIdiomWords(row.spelling, row.id); } } addWord( spelling: string, lang: string, frequency: number | null, notes: Str, ) { const query = this.db.query( `INSERT INTO words(spelling, lang, frequency, notes) VALUES(?, ?, ?, ?) ON CONFLICT(spelling, lang) DO UPDATE SET lang = excluded.lang RETURNING rowid `, ); const res = query.get(spelling, lang, frequency, notes) as { id: number }; return res.id; } addSyllable( wordId: number | bigint, sylIdx: number, stressed: boolean | null, lang: string, ipa: string, long: boolean, text: string, onset: Phoneme, medial: Phoneme, nucleus: Phoneme, coda: Phoneme, rhyme: Phoneme, tone: Tone, notes: Str, ) { const tx = this.db.transaction(() => { const onsetId = this.db .query( `INSERT INTO onsets(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(onset.ipa, lang, onset.spelling) as { id: number }; const medialId = this.db .query( `INSERT INTO medials(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(medial.ipa, lang, medial.spelling) as { id: number }; const nucleusId = this.db .query( `INSERT INTO nucleus(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(nucleus.ipa, lang, nucleus.spelling) as { id: number }; const codaId = this.db .query( `INSERT INTO codas(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(coda.ipa, lang, coda.spelling) as { id: number }; const rhymeId = this.db .query( `INSERT INTO rhymes(ipa, lang, text) VALUES(?, ?, ?) ON CONFLICT(ipa, lang, text) DO UPDATE SET text = excluded.text RETURNING rowid `, ) .get(rhyme.ipa, lang, rhyme.spelling) as { id: number }; const toneId = this.db .query( `INSERT INTO tones(ipa, lang, name, nums) VALUES(?, ?, ?, ?) ON CONFLICT(ipa, lang) DO UPDATE SET ipa = excluded.ipa RETURNING rowid `, ) .get(tone.letters, lang, tone.name, tone.numbers) as { id: number }; const query = this.db.query( `INSERT INTO syllables( lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(text, ipa, lang) DO UPDATE SET lang = excluded.lang RETURNING rowid `, ); const res = query.get( lang, ipa, long, text, onsetId.id, medialId.id, nucleusId.id, codaId.id, rhymeId.id, toneId.id, notes, ) as { id: number }; const sylId = res.id; // const res1 = this.db .query( `INSERT INTO syllables_words(syl_id, word_id, idx, stressed) VALUES(?, ?, ?, ?)`, ) .run(sylId, wordId, sylIdx, stressed); // return sylId; }); const sylId = tx(); } // reads } const db = new DatabaseHandler(); export default db;