summaryrefslogtreecommitdiff
path: root/packages/db/src
diff options
context:
space:
mode:
Diffstat (limited to 'packages/db/src')
-rw-r--r--packages/db/src/index.ts253
-rw-r--r--packages/db/src/migration.ts430
-rw-r--r--packages/db/src/phonetics.ts523
-rw-r--r--packages/db/src/schema.sql729
-rw-r--r--packages/db/src/semantic.ts554
-rw-r--r--packages/db/src/srs.ts402
-rw-r--r--packages/db/src/test.ts149
-rw-r--r--packages/db/src/types.ts84
-rw-r--r--packages/db/src/users.ts305
9 files changed, 3429 insertions, 0 deletions
diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts
new file mode 100644
index 0000000..6a89ee2
--- /dev/null
+++ b/packages/db/src/index.ts
@@ -0,0 +1,253 @@
+import { Database } from "bun:sqlite";
+import type { FullWordDataDB } from "./types";
+import { SRSQueries } from "./srs";
+
+export class Queries {
+ db: Database;
+ srs: SRSQueries;
+ constructor() {
+ const db = new Database("/home/y/code/bun/sorlang/bulkdata/unified.db");
+ db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
+ db.exec("PRAGMA foreign_keys = ON");
+ db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB
+ db.exec("PRAGMA mmap_size = 30000000000");
+ db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory
+ db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster
+
+ this.db = db;
+ this.srs = new SRSQueries(db);
+ }
+ fetchExpressionById(id: number) {
+ const query = this.db.query(
+ `
+ SELECT * FROM expressions WHERE id = ?
+ `,
+ );
+ return query.get(id) as any;
+ }
+ fetchWordsByToneAndSyls1(tones: Array<string | null>) {
+ const toneString = tones
+ .reduce((acc: string, item) => {
+ if (!item) return `${acc},%`;
+ else return `${acc},${item}`;
+ }, "")
+ .slice(1);
+ const query = this.db.query(
+ `
+ WITH word_tone_sequences AS (
+ SELECT
+ wp.ipa,
+ GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+ GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+ COUNT(sw.syl_id) as syllable_count
+ FROM word_phonetics wp
+ JOIN syllables_words sw ON wp.id = sw.word_id
+ JOIN syllables sy ON sw.syl_id = sy.id
+ JOIN tones t ON sy.tone = t.id
+ GROUP BY wp.ipa
+ )
+ SELECT *
+ FROM word_tone_sequences
+ WHERE tone_sequence LIKE ?
+ AND syllable_count = ?
+ `,
+ );
+ return query.all(toneString, tones.length) as any[];
+ }
+ fetchWordsByToneAndSylsO(tones: Array<string | null>) {
+ const toneString = tones
+ .reduce((acc: string, item) => {
+ if (!item) return `${acc},%`;
+ else return `${acc},${item}`;
+ }, "")
+ .slice(1);
+ const query = this.db.query(
+ `
+ WITH word_tone_sequences AS (
+ SELECT
+ w.id as word_id,
+ w.spelling,
+ wp.ipa,
+ w.frequency,
+ GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+ GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+ COUNT(sw.syl_id) as syllable_count
+ FROM expressions w
+ JOIN word_phonetics wp ON w.id = wp.word_id
+ JOIN syllables_words sw ON wp.id = sw.word_id
+ JOIN syllables sy ON sw.syl_id = sy.id
+ JOIN tones t ON sy.tone = t.id
+ GROUP BY w.id, w.spelling, w.lang, w.frequency
+ )
+ SELECT word_id,
+ spelling,
+ ipa,
+ frequency,
+ syl_seq,
+ tone_sequence,
+ syllable_count
+ FROM word_tone_sequences
+ WHERE tone_sequence LIKE ?
+ AND syllable_count = ?
+ ORDER BY frequency ASC NULLS LAST;
+ `,
+ );
+ // TODO combine with this old query to get both phonetic and semantic data
+ // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+ return query.all(toneString, tones.length) as any[];
+ }
+ fetchWordsByToneAndSyls(tones: Array<string | null>): FullWordDataDB[] {
+ const toneString = tones
+ .reduce((acc: string, item) => {
+ if (!item) return `${acc},%`;
+ else return `${acc},${item}`;
+ }, "")
+ .slice(1);
+ const query = this.db.query(
+ `
+ WITH word_tone_sequences AS (
+ SELECT
+ w.id as word_id,
+ w.spelling,
+ wp.ipa,
+ w.frequency,
+ GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+ GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+ COUNT(sw.syl_id) as syllable_count,
+ (SELECT
+ json_group_array(json_object(
+ 'id', s.id,
+ 'pos', s.pos,
+ 'etymology', s.etymology,
+ 'confidence', s.confidence,
+ 'glosses', (
+ SELECT json_group_array(ss.gloss)
+ FROM subsenses ss
+ WHERE ss.sid = s.id
+ ),
+ 'examples', (
+ SELECT json_group_array(json_object(
+ 'example', ex.example,
+ 'ref', ex.ref
+ ))
+ FROM examples ex
+ WHERE ex.sid = s.id
+ ),
+ 'derivation', (
+ SELECT json_group_array(json_object(
+ 'type', d.type,
+ 'text', d.text,
+ 'tags', d.tags
+ ))
+ FROM derivation d
+ WHERE d.sid = s.id
+ ),
+ 'categories', (
+ SELECT json_group_array(wc.category)
+ FROM word_categories wc
+ WHERE wc.word_id = s.id
+ )
+ ))
+ FROM senses s
+ WHERE s.parent_id = w.id
+ ) as senses_array
+ FROM expressions w
+ JOIN word_phonetics wp ON w.id = wp.word_id
+ JOIN syllables_words sw ON wp.id = sw.word_id
+ JOIN syllables sy ON sw.syl_id = sy.id
+ JOIN tones t ON sy.tone = t.id
+ GROUP BY w.id, w.spelling, w.lang, w.frequency
+ )
+ SELECT *
+ FROM word_tone_sequences
+ WHERE tone_sequence LIKE ?
+ AND syllable_count = ?
+ ORDER BY frequency ASC NULLS LAST;
+ `,
+ );
+ // TODO combine with this old query to get both phonetic and semantic data
+ // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+ return query.all(toneString, tones.length) as any[];
+ }
+ // TODO combine with this old query to get both phonetic and semantic data
+ // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+ fetchSenses(spelling: string, lang: string) {
+ const query = this.db.query(`
+ WITH sense_data AS (
+ SELECT
+ s.*,
+ GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+ GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+ GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+ GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+ FROM senses s
+ LEFT JOIN subsenses ss ON ss.sid = s.id
+ LEFT JOIN examples ex ON ex.sid = s.id
+ LEFT JOIN derivation d ON d.sid = s.id
+ LEFT JOIN word_categories wc ON wc.word_id = s.id
+ GROUP BY s.id
+ )
+ SELECT e.*,
+ (SELECT
+ json_group_array(json_object(
+ 'id', sd.id,
+ 'pos', sd.pos,
+ 'etymology', sd.etymology,
+ 'confidence', sd.confidence,
+ 'subsenses_data', sd.subsenses_data,
+ 'examples_data', sd.examples_data,
+ 'derivation_data', sd.derivation_data,
+ 'categories_data', sd.categories_data
+ ))
+ FROM sense_data sd
+ WHERE sd.parent_id = e.id
+ ) as senses_array
+ FROM expressions e
+ WHERE e.spelling = ? AND e.lang = ?
+ ORDER BY e.frequency DESC`);
+ return query.all(spelling, lang);
+ }
+}
+export default Queries;
+// `
+// WITH word_tone_sequences AS (
+// SELECT
+// w.id as word_id,
+// w.spelling,
+// wp.ipa,
+// w.frequency,
+// s.etymology,
+// s.pos,
+// GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq,
+// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+// // GROUP_CONCAT(t.name ORDER BY s.id) as senses,
+// // GROUP_CONCAT(ex.example ORDER BY s.id) as examples,
+// // GROUP_CONCAT(cat.category) as tags,
+// COUNT(sw.syl_id) as syllable_count
+// FROM expressions w
+// JOIN word_phonetics wp ON w.id = wp.word_id
+// JOIN syllables_words sw ON wp.id = sw.word_id
+// JOIN syllables s ON sw.syl_id = s.id
+// JOIN tones t ON s.tone = t.id
+// JOIN senses s ON s.parent_id = w.id
+// JOIN word_categories cat ON s.id = cat.word_id
+// JOIN subsenses ss ON ss.sid = s.id
+// JOIN examples ex ON ex.sid = s.id
+// GROUP BY w.id, w.spelling, w.lang, w.frequency
+// )
+// SELECT
+// word_id,
+// spelling,
+// ipa,
+// frequency,
+// syl_seq,
+// tone_sequence,
+// syllable_count
+// FROM word_tone_sequences
+// WHERE tone_sequence LIKE ?
+// AND syllable_count = ?
+// ORDER BY frequency ASC NULLS LAST;
+// `
diff --git a/packages/db/src/migration.ts b/packages/db/src/migration.ts
new file mode 100644
index 0000000..c4150f8
--- /dev/null
+++ b/packages/db/src/migration.ts
@@ -0,0 +1,430 @@
+import { Database } from "bun:sqlite";
+
+const PROSODY_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/prosody.db";
+const THAIPHON_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/thaiphon.db";
+const UNIFIED_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/unified.db";
+
+async function migrateSenses() {
+ console.log("Starting database migration to unified.db...\n");
+ const expressionIdMap: Map<number, number> = new Map();
+ const senseIdMap: Map<number, number> = new Map();
+ const wordExpressionMap: Map<number, number> = new Map();
+
+ // Open all databases
+ const prosodyDb = new Database(PROSODY_DB, { readonly: true });
+ const thaiphonDb = new Database(THAIPHON_DB, { readonly: true });
+ const unifiedDb = new Database(UNIFIED_DB);
+
+ // Enable foreign keys and WAL mode for unified database
+ unifiedDb.run("PRAGMA foreign_keys = ON");
+ unifiedDb.run("PRAGMA journal_mode = WAL");
+
+ try {
+ // Start transaction for atomic migration
+ unifiedDb.run("BEGIN TRANSACTION");
+
+ console.log("📦 Migrating from prosody.db...");
+
+ // 1. Migrate languages (combining both sources, preferring prosody.db)
+ console.log(" → Migrating categories...");
+ // categories
+ const prosodyCats = prosodyDb
+ .query("SELECT * FROM categories")
+ .all() as any[];
+ const insertCat = unifiedDb.prepare(
+ `INSERT INTO categories (name) VALUES (?)
+ ON CONFLICT(name) DO NOTHING
+ RETURNING rowid
+ `,
+ );
+ for (const cat of prosodyCats) {
+ const cr = insertCat.run(cat.name.toLowerCase().trim());
+ // console.log({ cr });
+ if (cr.changes !== 1) console.log({ cr });
+ }
+ console.log(` ✓ Migrated ${prosodyCats.length} categories`);
+
+ console.log(" → Migrating languages...");
+ const prosodyLangs = prosodyDb
+ .query("SELECT * FROM languages")
+ .all() as any[];
+
+ // Add thaiphon languages first
+ // Override with prosody languages (they have more complete data)
+ for (const lang of prosodyLangs) {
+ // Insert merged languages
+ const insertLang = unifiedDb.prepare(
+ "INSERT OR IGNORE INTO languages (code, name, native_name, iso6392) VALUES (?, ?, ?, ?)",
+ );
+
+ insertLang.run(lang.code, lang.name, lang.native_name, null);
+ }
+ console.log(` ✓ Migrated ${prosodyLangs.length} languages`);
+
+ // 2. Migrate expressions from prosody.db
+ console.log(" → Migrating expressions...");
+ const expressions = prosodyDb
+ .query("SELECT * FROM expressions")
+ .all() as any[];
+ const insertExpression = unifiedDb.prepare(
+ "INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?)",
+ );
+
+ for (const expr of expressions) {
+ const ei = insertExpression.run(
+ expr.spelling,
+ expr.lang,
+ expr.frequency,
+ expr.type,
+ expr.notes,
+ );
+ if (ei.changes > 0)
+ expressionIdMap.set(expr.id, Number(ei.lastInsertRowid));
+ }
+
+ const insertExample = unifiedDb.prepare(
+ "INSERT INTO examples(sid, example, ref) VALUES (?, ?, ?)",
+ );
+ const insertSubsense = unifiedDb.prepare(
+ "INSERT INTO subsenses (sid, gloss) VALUES (?, ?)",
+ );
+ const insertDerivation = unifiedDb.prepare(
+ "INSERT INTO derivation (sid, type, text) VALUES (?, ?, ?)",
+ );
+ const insertCatw = unifiedDb.prepare(
+ "INSERT OR IGNORE INTO word_categories(category, word_id) VALUES (?, ?)",
+ );
+ const insertWp = unifiedDb.prepare(
+ "INSERT INTO word_phonetics(word_id, sense_id, ipa, ipa_sequence, syllable_count, syllable_sequence, stressed, tone_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ );
+ console.log(" → Migrating senses...");
+ const senses = prosodyDb.query("SELECT * FROM senses").all() as any[];
+ const insertSense = unifiedDb.prepare(
+ "INSERT INTO senses (parent_id, spelling, pos, etymology, confidence) VALUES (?, ?, ?, ?, ?)",
+ );
+
+ for (const sense of senses) {
+ const parentId = expressionIdMap.get(sense.parent_id);
+ const senseRow = insertSense.run(
+ parentId,
+ sense.spelling,
+ sense.pos,
+ sense.etymology,
+ sense.confidence,
+ );
+ const senseId = senseRow.lastInsertRowid;
+ senseIdMap.set(sense.id, Number(senseId));
+ const deriv = (
+ name: string,
+ type: Array<{ word: string }> | undefined,
+ ) => {
+ if (type)
+ for (const t of type) insertDerivation.run(senseId, name, t.word);
+ };
+ const subsenses = JSON.parse(sense.senses);
+ for (const ss of subsenses) {
+ const {
+ examples,
+ glosses,
+ tags,
+ synonyms,
+ antonyms,
+ hypernyms,
+ meronyms,
+ hyponyms,
+ holonyms,
+ } = ss;
+ if (examples)
+ for (const ex of examples) {
+ const ref = ex.ref || null;
+
+ if (ex.text) insertExample.run(senseId, ex.text, ref);
+ // else console.log("bad example", ex);
+ }
+ if (glosses)
+ for (const gloss of glosses) insertSubsense.run(senseId, gloss);
+ if (tags)
+ for (const tag of tags) {
+ const lw = tag.toLowerCase().trim();
+ insertCat.run(lw);
+ insertCatw.run(lw, senseId);
+ }
+ if (synonyms) deriv("synonym", synonyms);
+ if (antonyms) deriv("antonym", antonyms);
+ if (hypernyms) deriv("hypernym", hypernyms);
+ if (meronyms) deriv("meronym", meronyms);
+ if (hyponyms) deriv("hyponym", hyponyms);
+ if (holonyms) deriv("holonym", holonyms);
+ }
+ const prosody = JSON.parse(sense.prosody);
+ const stressed = prosody?.stressedSyllable || null;
+ if (!sense.ipa) {
+ console.log("no ipa", sense);
+ continue;
+ }
+ const ipa = JSON.parse(sense.ipa);
+ for (const ip of ipa) {
+ insertWp.run(
+ parentId,
+ senseId,
+ ip.ipa,
+ ip.ipa,
+ 0,
+ "",
+ stressed,
+ "",
+ ip.tag,
+ null,
+ );
+ }
+ }
+ console.log(` ✓ Migrated ${senses.length} senses`);
+
+ console.log("\n📦 Migrating from thaiphon.db...");
+
+ // 6. Migrate phonetic components from thaiphon.db
+ console.log(" → Migrating phonetic components...");
+
+ // Tones
+ const tones = thaiphonDb.query("SELECT * FROM tones").all() as any;
+ const insertTone = unifiedDb.prepare(
+ "INSERT INTO tones (id, ipa, lang, name, nums) VALUES (?, ?, ?, ?, ?)",
+ );
+ for (const tone of tones) {
+ insertTone.run(tone.id, tone.ipa, tone.lang, tone.name, tone.nums);
+ }
+ console.log(` ✓ Migrated ${tones.length} tones`);
+
+ // Onsets
+ const onsets = thaiphonDb.query("SELECT * FROM onsets").all() as any;
+ const insertOnset = unifiedDb.prepare(
+ "INSERT INTO onsets (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+ );
+ for (const onset of onsets) {
+ insertOnset.run(onset.id, onset.ipa, onset.text, onset.lang);
+ }
+ console.log(` ✓ Migrated ${onsets.length} onsets`);
+
+ // Medials
+ const medials = thaiphonDb.query("SELECT * FROM medials").all() as any;
+ const insertMedial = unifiedDb.prepare(
+ "INSERT INTO medials (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+ );
+ for (const medial of medials) {
+ insertMedial.run(medial.id, medial.ipa, medial.text, medial.lang) as any;
+ }
+ console.log(` ✓ Migrated ${medials.length} medials`);
+
+ // Nucleus
+ const nucleus = thaiphonDb.query("SELECT * FROM nucleus").all() as any;
+ const insertNucleus = unifiedDb.prepare(
+ "INSERT INTO nucleus (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+ );
+ for (const nucl of nucleus) {
+ insertNucleus.run(nucl.id, nucl.ipa, nucl.text, nucl.lang);
+ }
+ console.log(` ✓ Migrated ${nucleus.length} nucleus`);
+
+ // Codas
+ const codas = thaiphonDb.query("SELECT * FROM codas").all() as any;
+ const insertCoda = unifiedDb.prepare(
+ "INSERT INTO codas (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+ );
+ for (const coda of codas) {
+ insertCoda.run(coda.id, coda.ipa, coda.text, coda.lang) as any;
+ }
+ console.log(` ✓ Migrated ${codas.length} codas`);
+
+ // Rhymes
+ const rhymes = thaiphonDb.query("SELECT * FROM rhymes").all() as any;
+ const insertRhyme = unifiedDb.prepare(
+ "INSERT INTO rhymes (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+ );
+ for (const rhyme of rhymes) {
+ insertRhyme.run(rhyme.id, rhyme.ipa, rhyme.text, rhyme.lang);
+ }
+ console.log(` ✓ Migrated ${rhymes.length} rhymes`);
+
+ // 7. Migrate syllables from thaiphon.db
+ console.log(" → Migrating syllables...");
+ const syllables = thaiphonDb.query("SELECT * FROM syllables").all() as any;
+ const insertSyllable = unifiedDb.prepare(
+ "INSERT INTO syllables (id, lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ );
+ for (const syl of syllables) {
+ insertSyllable.run(
+ syl.id,
+ syl.lang,
+ syl.ipa,
+ syl.long,
+ syl.text,
+ syl.onset,
+ syl.medial,
+ syl.nucleus,
+ syl.coda,
+ syl.rhyme,
+ syl.tone,
+ syl.notes,
+ );
+ }
+ console.log(` ✓ Migrated ${syllables.length} syllables`);
+
+ // 8. Migrate words from thaiphon.db (as expressions if not exists)
+ console.log(" → Migrating words as expressions...");
+ const words = thaiphonDb.query("SELECT * FROM words").all() as any[];
+ const insertWordAsExpr = unifiedDb.query(
+ `INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?)
+ ON CONFLICT(spelling, lang) DO UPDATE SET spelling = excluded.spelling
+ RETURNING rowid`,
+ );
+
+ // Also keep track for syllables_words mapping
+ let prev: any = null;
+ for (const word of words) {
+ const result = insertWordAsExpr.get(
+ word.spelling,
+ word.lang,
+ word.frequency,
+ "word",
+ word.notes,
+ ) as { id: number };
+ // 41085
+ console.log({ result });
+ // if (prev === result.id) {
+ // console.log(word, result);
+ // throw new Error("fucked up again");
+ // }
+ // prev = result.id;
+ wordExpressionMap.set(word.id, result.id);
+ }
+ console.log(` ✓ Processed ${words.length} words`);
+
+ console.log(" → Migrating word phonetics...");
+ const wordPhonetics = thaiphonDb
+ .query("SELECT * FROM word_phonetics")
+ .all() as any[];
+ const insertWordPhonetic = unifiedDb.prepare(
+ "INSERT INTO word_phonetics (word_id, sense_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ );
+ // TODO find the sense id later I guess
+ const wpMap: Map<number, number> = new Map();
+ for (const wp of wordPhonetics) {
+ const newWordId = wordExpressionMap.get(wp.word_id);
+ if (newWordId) {
+ const wr = insertWordPhonetic.run(
+ newWordId,
+ null,
+ wp.ipa,
+ wp.syllable_count,
+ wp.syllable_sequence,
+ wp.tone_sequence,
+ wp.ipa_sequence,
+ wp.tag,
+ wp.notes,
+ );
+ if (wr.changes !== 0) wpMap.set(wp.word_id, Number(wr.lastInsertRowid));
+ }
+ }
+
+ // 9. Migrate syllables_words junction
+ console.log(" → Migrating syllables_words...");
+ const sylWords = thaiphonDb
+ .query("SELECT * FROM syllables_words")
+ .all() as any;
+ const insertSylWord = unifiedDb.prepare(
+ "INSERT OR IGNORE INTO syllables_words (syl_id, word_id, idx, stressed) VALUES (?, ?, ?, ?)",
+ );
+ for (const sw of sylWords) {
+ const newWordId = wpMap.get(sw.word_id);
+ if (newWordId)
+ insertSylWord.run(sw.syl_id, newWordId, sw.idx, sw.stressed);
+ }
+ console.log(
+ ` ✓ Migrated ${sylWords.length} syllable-word relationships`,
+ );
+
+ unifiedDb.run("COMMIT");
+
+ console.log("\n✅ Migration completed successfully!");
+ console.log(" Unified database is ready at:", UNIFIED_DB);
+ } catch (error) {
+ console.error("\n❌ Migration failed:", error);
+ unifiedDb.run("ROLLBACK");
+ throw error;
+ } finally {
+ // Close all databases
+ prosodyDb.close();
+ // thaiphonDb.close();
+ unifiedDb.close();
+ }
+}
+
+// Run the migration
+migrateSenses();
+// migrateTones();
+// test();
+//
+
+async function test() {
+ console.log("Starting database migration to unified.db...\n");
+ // Open all databases
+ const prosodyDb = new Database(PROSODY_DB, { readonly: true });
+ const thaiphonDb = new Database(THAIPHON_DB, { readonly: true });
+ const unifiedDb = new Database(UNIFIED_DB, { readonly: true });
+
+ // Enable foreign keys and WAL mode for unified database
+ unifiedDb.run("PRAGMA foreign_keys = ON");
+ unifiedDb.run("PRAGMA journal_mode = WAL");
+ const oldE = prosodyDb.query("SELECT COUNT(*) FROM expressions").get();
+ const oldS = prosodyDb.query("SELECT COUNT(*) FROM senses").get();
+ const oldW = thaiphonDb.query("SELECT COUNT(*) FROM words").get();
+ const oldWp = thaiphonDb.query("SELECT COUNT(*) FROM word_phonetics").get();
+ const oldSw = thaiphonDb.query("SELECT COUNT(*) FROM syllables_words").get();
+ const oldSy = thaiphonDb.query("SELECT COUNT(*) FROM syllables").get();
+ const oldTo = thaiphonDb.query("SELECT COUNT(*) FROM tones").get();
+ const oldCod = thaiphonDb.query("SELECT COUNT(*) FROM codas").get();
+ const oldNuc = thaiphonDb.query("SELECT COUNT(*) FROM nucleus").get();
+ const oldOns = thaiphonDb.query("SELECT COUNT(*) FROM onsets").get();
+ const oldRhy = thaiphonDb.query("SELECT COUNT(*) FROM rhymes").get();
+ const oldWrh = thaiphonDb.query("SELECT COUNT(*) FROM word_rhymes").get();
+ const oldWry = thaiphonDb.query("SELECT COUNT(*) FROM words_wrhymes").get();
+ const newSw = unifiedDb.query("SELECT COUNT(*) FROM syllables_words").get();
+ const newSy = unifiedDb.query("SELECT COUNT(*) FROM syllables").get();
+ const newTo = unifiedDb.query("SELECT COUNT(*) FROM tones").get();
+ const newCod = unifiedDb.query("SELECT COUNT(*) FROM codas").get();
+ const newNuc = unifiedDb.query("SELECT COUNT(*) FROM nucleus").get();
+ const newOns = unifiedDb.query("SELECT COUNT(*) FROM onsets").get();
+ const newRhy = unifiedDb.query("SELECT COUNT(*) FROM rhymes").get();
+ const newWrh = unifiedDb.query("SELECT COUNT(*) FROM word_rhymes").get();
+ const newWry = unifiedDb.query("SELECT COUNT(*) FROM words_wrhymes").get();
+ const newE = unifiedDb.query("SELECT COUNT(*) FROM expressions").get();
+ const newS = unifiedDb.query("SELECT COUNT(*) FROM senses").get();
+ const newWp = unifiedDb.query("SELECT COUNT(*) FROM word_phonetics").get();
+ console.log({
+ oldE,
+ oldS,
+ oldW,
+ oldWp,
+ newE,
+ newS,
+ newWp,
+ oldSw,
+ oldSy,
+ oldTo,
+ oldCod,
+ oldNuc,
+ oldOns,
+ oldRhy,
+ oldWrh,
+ oldWry,
+ newSw,
+ newSy,
+ newTo,
+ newCod,
+ newNuc,
+ newOns,
+ newRhy,
+ newWrh,
+ newWry,
+ });
+}
diff --git a/packages/db/src/phonetics.ts b/packages/db/src/phonetics.ts
new file mode 100644
index 0000000..cf62434
--- /dev/null
+++ b/packages/db/src/phonetics.ts
@@ -0,0 +1,523 @@
+import { Database } from 'bun:sqlite';
+
+export interface Syllable {
+ id: number;
+ lang: string;
+ ipa: string;
+ long: number;
+ text: string;
+ onset: number;
+ medial: number;
+ nucleus: number;
+ coda: number;
+ rhyme: number;
+ tone: number;
+ notes?: string;
+}
+
+export interface Tone {
+ id: number;
+ ipa: string;
+ lang: string;
+ name: string;
+ nums: number;
+}
+
+export interface Onset {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Medial {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Nucleus {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Coda {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Rhyme {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface WordPhonetics {
+ id?: number;
+ word_id: number;
+ ipa: string;
+ syllable_count: number;
+ syllable_sequence: string;
+ tone_sequence: string;
+ ipa_sequence: string;
+ tag?: string;
+ notes?: string;
+}
+
+export interface WordRhyme {
+ id: number;
+ text: string;
+ lang: string;
+ notes?: string;
+}
+
+export interface Idiom {
+ id: number;
+ spelling: string;
+ lang: string;
+ frequency?: number;
+}
+
+export interface SyllableWordMapping {
+ syl_id: number;
+ word_id: number;
+ idx: number;
+ stressed?: number;
+}
+
+export class PhoneticsQueries {
+ constructor(private db: Database) {}
+
+ // Tone operations
+ getTones(lang?: string): Tone[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE lang = ?
+ ORDER BY nums
+ `);
+ return query.all(lang) as Tone[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ ORDER BY lang, nums
+ `);
+ return query.all() as Tone[];
+ }
+ }
+
+ getToneById(id: number): Tone | null {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE id = ?
+ `);
+ return query.get(id) as Tone | null;
+ }
+
+ getToneByName(name: string, lang: string): Tone | null {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE name = ? AND lang = ?
+ `);
+ return query.get(name, lang) as Tone | null;
+ }
+
+ // Syllable component operations
+ getOnsets(lang?: string): Onset[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM onsets
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Onset[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM onsets
+ ORDER BY lang, text
+ `);
+ return query.all() as Onset[];
+ }
+ }
+
+ getMedials(lang?: string): Medial[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM medials
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Medial[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM medials
+ ORDER BY lang, text
+ `);
+ return query.all() as Medial[];
+ }
+ }
+
+ getNucleus(lang?: string): Nucleus[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM nucleus
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Nucleus[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM nucleus
+ ORDER BY lang, text
+ `);
+ return query.all() as Nucleus[];
+ }
+ }
+
+ getCodas(lang?: string): Coda[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM codas
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Coda[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM codas
+ ORDER BY lang, text
+ `);
+ return query.all() as Coda[];
+ }
+ }
+
+ getRhymes(lang?: string): Rhyme[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM rhymes
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Rhyme[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM rhymes
+ ORDER BY lang, text
+ `);
+ return query.all() as Rhyme[];
+ }
+ }
+
+ // Complete syllable operations
+ getSyllables(lang?: string, tone?: number): Syllable[] {
+ if (lang && tone !== undefined) {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ? AND s.tone = ?
+ ORDER BY s.text
+ `);
+ return query.all(lang, tone) as Syllable[];
+ } else if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ?
+ ORDER BY s.text, s.tone
+ `);
+ return query.all(lang) as Syllable[];
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ ORDER BY s.lang, s.text, s.tone
+ `);
+ return query.all() as Syllable[];
+ }
+ }
+
+ getSyllableById(id: number): Syllable | null {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.id = ?
+ `);
+ return query.get(id) as Syllable | null;
+ }
+
+ // Word phonetics operations
+ getWordPhonetics(wordId: number): WordPhonetics | null {
+ const query = this.db.query(`
+ SELECT id, word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes
+ FROM word_phonetics
+ WHERE word_id = ?
+ `);
+ return query.get(wordId) as WordPhonetics | null;
+ }
+
+ getWordsByTonePattern(toneSequence: string, syllableCount?: number, limit: number = 50): number[] {
+ if (syllableCount !== undefined) {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE tone_sequence = ? AND syllable_count = ?
+ ORDER BY id
+ LIMIT ?
+ `);
+ const results = query.all(toneSequence, syllableCount, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ } else {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE tone_sequence = ?
+ ORDER BY syllable_count, id
+ LIMIT ?
+ `);
+ const results = query.all(toneSequence, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+ }
+
+ getWordsBySyllablePattern(syllableSequence: string, limit: number = 50): number[] {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE syllable_sequence = ?
+ ORDER BY id
+ LIMIT ?
+ `);
+ const results = query.all(syllableSequence, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ searchWordsByPhoneticPattern(pattern: {
+ toneSequence?: string;
+ syllableCount?: number;
+ minSyllableCount?: number;
+ maxSyllableCount?: number;
+ limit?: number;
+ }): number[] {
+ let conditions: string[] = [];
+ let params: any[] = [];
+
+ if (pattern.toneSequence) {
+ conditions.push('tone_sequence LIKE ?');
+ params.push(`%${pattern.toneSequence}%`);
+ }
+
+ if (pattern.syllableCount !== undefined) {
+ conditions.push('syllable_count = ?');
+ params.push(pattern.syllableCount);
+ }
+
+ if (pattern.minSyllableCount !== undefined) {
+ conditions.push('syllable_count >= ?');
+ params.push(pattern.minSyllableCount);
+ }
+
+ if (pattern.maxSyllableCount !== undefined) {
+ conditions.push('syllable_count <= ?');
+ params.push(pattern.maxSyllableCount);
+ }
+
+ const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
+ const limit = pattern.limit || 50;
+ params.push(limit);
+
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ ${whereClause}
+ ORDER BY syllable_count, id
+ LIMIT ?
+ `);
+
+ const results = query.all(...params) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Word-syllable mapping operations
+ getSyllablesForWord(wordId: number): SyllableWordMapping[] {
+ const query = this.db.query(`
+ SELECT sw.syl_id, sw.word_id, sw.idx, sw.stressed,
+ s.text as syllable_text, s.ipa as syllable_ipa, s.tone, s.long,
+ t.name as tone_name
+ FROM syllables_words sw
+ JOIN syllables s ON sw.syl_id = s.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE sw.word_id = ?
+ ORDER BY sw.idx
+ `);
+ return query.all(wordId) as SyllableWordMapping[];
+ }
+
+ getWordsForSyllable(syllableId: number): number[] {
+ const query = this.db.query(`
+ SELECT DISTINCT word_id
+ FROM syllables_words
+ WHERE syl_id = ?
+ ORDER BY word_id
+ `);
+ const results = query.all(syllableId) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Rhyme operations
+ getWordRhymes(wordId: number): WordRhyme[] {
+ const query = this.db.query(`
+ SELECT wr.id, wr.text, wr.lang, wr.notes
+ FROM word_rhymes wr
+ JOIN words_wrhymes wwr ON wr.id = wwr.wrhyme_id
+ WHERE wwr.word_id = ?
+ `);
+ return query.all(wordId) as WordRhyme[];
+ }
+
+ getWordsByRhyme(rhymeText: string, lang: string, limit: number = 50): number[] {
+ const query = this.db.query(`
+ SELECT DISTINCT ww.word_id
+ FROM word_rhymes wr
+ JOIN words_wrhymes ww ON wr.id = ww.wrhyme_id
+ WHERE wr.text = ? AND wr.lang = ?
+ LIMIT ?
+ `);
+ const results = query.all(rhymeText, lang, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Idiom operations
+ getIdioms(lang?: string): Idiom[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency
+ FROM idioms
+ WHERE lang = ?
+ ORDER BY frequency DESC, spelling
+ `);
+ return query.all(lang) as Idiom[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency
+ FROM idioms
+ ORDER BY lang, frequency DESC, spelling
+ `);
+ return query.all() as Idiom[];
+ }
+ }
+
+ getIdiomsForWord(wordId: number): Idiom[] {
+ const query = this.db.query(`
+ SELECT i.id, i.spelling, i.lang, i.frequency
+ FROM idioms i
+ JOIN words_idioms wi ON i.id = wi.idiom_id
+ WHERE wi.word_id = ?
+ ORDER BY i.frequency DESC, i.spelling
+ `);
+ return query.all(wordId) as Idiom[];
+ }
+
+ // Phonetic statistics and analysis
+ getPhoneticStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(DISTINCT s.id) as total_syllables,
+ COUNT(DISTINCT s.tone) as unique_tones,
+ AVG(s.long) as avg_vowel_length,
+ COUNT(DISTINCT s.onset) as unique_onsets,
+ COUNT(DISTINCT s.medial) as unique_medials,
+ COUNT(DISTINCT s.nucleus) as unique_nucleus,
+ COUNT(DISTINCT s.coda) as unique_codas,
+ COUNT(DISTINCT s.rhyme) as unique_rhymes,
+ COUNT(DISTINCT wp.id) as total_word_phonetics,
+ AVG(wp.syllable_count) as avg_syllables_per_word
+ FROM syllables s
+ LEFT JOIN word_phonetics wp ON 1=1
+ WHERE s.lang = ?
+ `);
+ return query.get(lang);
+ }
+
+ getToneDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ t.name as tone_name,
+ t.nums as tone_number,
+ COUNT(s.id) as syllable_count,
+ ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM syllables WHERE lang = ?), 2) as percentage
+ FROM syllables s
+ JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ?
+ GROUP BY t.id, t.name, t.nums
+ ORDER BY t.nums
+ `);
+ return query.all(lang, lang);
+ }
+
+ getSyllableComplexityStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ syllable_count,
+ COUNT(*) as word_count,
+ ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM word_phonetics), 2) as percentage
+ FROM word_phonetics wp
+ JOIN expressions e ON wp.word_id = e.id
+ WHERE e.lang = ?
+ GROUP BY syllable_count
+ ORDER BY syllable_count
+ `);
+ return query.all(lang);
+ }
+} \ No newline at end of file
diff --git a/packages/db/src/schema.sql b/packages/db/src/schema.sql
new file mode 100644
index 0000000..c4a7c76
--- /dev/null
+++ b/packages/db/src/schema.sql
@@ -0,0 +1,729 @@
+/**
+ * UNIFIED DATABASE SCHEMA FOR THAI LANGUAGE LEARNING APPLICATION
+ *
+ * This file consolidates 3 redundant database schemas:
+ * - schema.sql (main schema with courses and SRS)
+ * - prosodyschema.sql (phonetic analysis with syllable breakdown)
+ * - senseschema.sql (semantic analysis with subsenses and derivations)
+ *
+ * REDUNDANCY ANALYSIS:
+ * ====================
+ *
+ ** MAJOR CONFLICTS RESOLVED:
+ * 1. Languages table:
+ * - schema.sql: uses 'code' as PRIMARY KEY
+ * - prosodyschema.sql: uses 'iso6392' as PRIMARY KEY
+ * - RESOLVED: Unified with both code systems supported
+ *
+ * 2. Senses table:
+ * - schema.sql: includes ipa/prosody JSONB fields
+ * - senseschema.sql: missing phonetic fields
+ * - RESOLVED: Enhanced version with all fields
+ *
+ * 3. Word/Expression entities:
+ * - schema.sql: uses 'expressions' table
+ * - prosodyschema.sql: uses 'words' table
+ * - RESOLVED: Standardized on 'expressions' terminology
+ *
+ * 4. Categories tables:
+ * - EXACT DUPLICATES in schema.sql and senseschema.sql
+ * - RESOLVED: Keep one instance, remove duplicate
+ *
+ * UNIQUE FEATURES PRESERVED:
+ * ========================
+ *
+ * FROM schema.sql (Main Course Learning):
+ * - User management: users, cookies
+ * - Course structure: lessons, cards, cards_lessons, cards_expressions
+ * - SRS tracking: user_progress, attempts
+ * - Bookmarks and user features
+ *
+ * FROM prosodyschema.sql (Phonetic Analysis):
+ * - Detailed syllable breakdown: tones, onsets, medials, nucleus, codas, rhymes
+ * - Phonetic patterns: word_phonetics, syllables_words
+ * - Rhyme analysis: word_rhymes, words_wrhymes
+ *
+ * FROM senseschema.sql (Semantic Analysis):
+ * - Detailed definitions: subsenses
+ * - Etymology tracking: derivation
+ *
+ * FOREIGN KEY RELATIONSHIPS:
+ * =========================
+ * - Updated all references from 'words.id' to 'expressions.id'
+ * - Unified language references to use languages.code
+ * - Maintained proper cascade relationships
+ *
+ * BENEFITS OF UNIFICATION:
+ * =======================
+ * 1. Single source of truth for database structure
+ * 2. Eliminated conflicting table definitions
+ * 3. Preserved all unique functionality
+ * 4. Improved foreign key consistency
+ * 5. Better support for comprehensive Thai language analysis
+ */
+
+-- Enable foreign key support and performance optimizations
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+PRAGMA cache_size = -2000;
+PRAGMA mmap_size = 30000000000;
+
+/**
+ * UNIFIED LANGUAGES TABLE
+ *
+ * RESOLVES CONFLICT between:
+ * - schema.sql: code (PRIMARY KEY), name, native_name
+ * - prosodyschema.sql: iso6392 (PRIMARY KEY), english
+ *
+ * NOW SUPPORTS multiple language code systems for maximum compatibility
+ */
+CREATE TABLE IF NOT EXISTS languages (
+ code TEXT PRIMARY KEY, -- Primary language code (ISO 639-1 preferred)
+ name TEXT NOT NULL, -- English name
+ native_name TEXT, -- Native name
+ iso6392 TEXT UNIQUE, -- ISO 639-2 code alternative
+ CONSTRAINT name_unique UNIQUE (name)
+);
+
+/**
+ * CORE CONTENT TABLES
+ * Standardized on 'expressions' terminology from schema.sql
+ * Enhanced with fields from both schemas
+ */
+
+-- Main expressions table (formerly 'words' in prosodyschema.sql)
+-- UNIFIED from schema.sql expressions and prosodyschema.sql words
+CREATE TABLE IF NOT EXISTS expressions (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ spelling TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ frequency INTEGER,
+ type TEXT NOT NULL, -- word | idiom | w/e
+ notes TEXT, -- Additional notes (from prosodyschema.sql)
+ FOREIGN KEY (lang) REFERENCES languages(code),
+ CONSTRAINT spell_unique UNIQUE (spelling, lang)
+);
+
+-- Enhanced senses table with phonetic capabilities
+-- MERGED from schema.sql senses (with ipa/prosody) and senseschema.sql senses
+CREATE TABLE IF NOT EXISTS senses (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ parent_id INTEGER NOT NULL,
+ spelling TEXT NOT NULL,
+ pos TEXT,
+ etymology TEXT,
+ confidence INTEGER NOT NULL DEFAULT 0,
+ FOREIGN KEY (parent_id) REFERENCES expressions(id) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS categories (
+ name TEXT PRIMARY KEY
+);
+
+-- Word-category relationships
+CREATE TABLE IF NOT EXISTS word_categories (
+ word_id INTEGER NOT NULL,
+ category TEXT NOT NULL,
+ PRIMARY KEY (word_id, category),
+ FOREIGN KEY (word_id) REFERENCES senses(id) ON DELETE CASCADE,
+ FOREIGN KEY (category) REFERENCES categories(name) ON DELETE CASCADE
+);
+
+-- Semantic analysis tables (FROM senseschema.sql)
+CREATE TABLE IF NOT EXISTS examples(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ sid INTEGER NOT NULL,
+ example TEXT NOT NULL, -- Example sentence
+ ref TEXT, -- source of the quote, llm if generated etc.
+ FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+CREATE TABLE IF NOT EXISTS subsenses (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ sid INTEGER NOT NULL,
+ gloss TEXT NOT NULL, -- Definition/explanation
+ FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS derivation (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ sid INTEGER NOT NULL,
+ type TEXT NOT NULL, -- Type of derivation (prefix, suffix, compound, etc.)
+ text TEXT NOT NULL, -- Derivative text
+ tags JSONB, -- Additional metadata
+ FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+
+/**
+ * PHONETIC ANALYSIS TABLES (FROM prosodyschema.sql)
+ *
+ * Comprehensive syllable breakdown system for Thai language analysis
+ * These tables provide detailed phonetic analysis beyond basic JSONB fields
+ */
+
+
+-- Syllable component tables
+CREATE TABLE IF NOT EXISTS tones (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ name TEXT NOT NULL,
+ nums INTEGER NOT NULL, -- Tone number (1-5 for Thai)
+ CONSTRAINT tone_unique UNIQUE (ipa, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS onsets (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT onset_unique UNIQUE (ipa, text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS medials (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT medial_unique UNIQUE (ipa, text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS nucleus (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT nucleus_unique UNIQUE (ipa, text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS codas (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT coda_unique UNIQUE (ipa, text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS rhymes (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ipa TEXT NOT NULL,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT rhyme_unique UNIQUE (ipa, text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+-- Complete syllable breakdown (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS syllables (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ lang TEXT NOT NULL,
+ ipa TEXT NOT NULL,
+ long INTEGER NOT NULL, -- Vowel length (0=short, 1=long)
+ text TEXT NOT NULL,
+ onset INTEGER NOT NULL,
+ medial INTEGER NOT NULL,
+ nucleus INTEGER NOT NULL,
+ coda INTEGER NOT NULL,
+ rhyme INTEGER NOT NULL,
+ tone INTEGER NOT NULL,
+ notes TEXT,
+ FOREIGN KEY (lang) REFERENCES languages(code),
+ FOREIGN KEY (onset) REFERENCES onsets(id),
+ FOREIGN KEY (medial) REFERENCES medials(id),
+ FOREIGN KEY (nucleus) REFERENCES nucleus(id),
+ FOREIGN KEY (coda) REFERENCES codas(id),
+ FOREIGN KEY (rhyme) REFERENCES rhymes(id),
+ FOREIGN KEY (tone) REFERENCES tones(id),
+ CONSTRAINT syllable_unique UNIQUE (text, ipa, lang)
+);
+
+-- Phonetic pattern storage (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS word_phonetics (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ word_id INTEGER NOT NULL,
+ sense_id INTEGER,
+ ipa TEXT NOT NULL,
+ ipa_sequence TEXT NOT NULL, -- IPA representation sequence
+ syllable_count INTEGER NOT NULL,
+ syllable_sequence TEXT NOT NULL, -- Comma-separated syllables
+ stressed INTEGER, -- index of stressed syllable
+ tone_sequence TEXT, -- Comma-separated tones
+ tag JSONB, -- Pattern/usage tag
+ notes TEXT,
+ FOREIGN KEY (word_id) REFERENCES expressions(id),
+ FOREIGN KEY (sense_id) REFERENCES senses(id)
+);
+
+-- Rhyme analysis tables (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS word_rhymes (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ notes TEXT,
+ CONSTRAINT wrhyme_unique UNIQUE (text, lang),
+ FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS words_wrhymes (
+ word_id INTEGER NOT NULL,
+ wrhyme_id INTEGER NOT NULL,
+ FOREIGN KEY (word_id) REFERENCES word_phonetics(id),
+ FOREIGN KEY (wrhyme_id) REFERENCES word_rhymes(id),
+ PRIMARY KEY (word_id, wrhyme_id)
+);
+
+-- Junction tables for relationships
+
+-- Expressions to syllables mapping
+CREATE TABLE IF NOT EXISTS syllables_words (
+ syl_id INTEGER NOT NULL,
+ word_id INTEGER NOT NULL,
+ idx INTEGER NOT NULL, -- Position in word
+ stressed INTEGER, -- Stress accent (0=none, 1=stressed)
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (word_id) REFERENCES word_phonetics(id),
+ PRIMARY KEY (syl_id, word_id, idx)
+);
+
+/**
+ * COURSE AND LEARNING TABLES (FROM schema.sql)
+ *
+ * Complete course structure and lesson management system
+ */
+
+-- User management
+CREATE TABLE IF NOT EXISTS users (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ name TEXT NOT NULL,
+ creds TEXT NOT NULL,
+ CONSTRAINT name_unique UNIQUE (name)
+);
+
+CREATE TABLE sessions (
+ id TEXT PRIMARY KEY, -- Session ID/token (usually UUID or random hash)
+ user_id INTEGER NOT NULL,
+ created_at INTEGER NOT NULL, -- Timestamp when created
+ expires_at INTEGER NOT NULL, -- Timestamp when expires
+ last_activity INTEGER, -- Last activity timestamp
+ ip_address TEXT, -- Optional: track IP
+ user_agent TEXT, -- Optional: track browser/client
+ data JSONB, -- Optional: session-specific data
+ FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_sessions_user ON sessions(user_id);
+CREATE INDEX idx_sessions_expires ON sessions(expires_at);
+
+ -- The separation is cleaner:
+ -- - sessions table - Server-side session management
+ -- - cookies are just the client-side token that references the session ID
+
+ -- Some apps also add:
+ -- - refresh_tokens table for JWT refresh tokens
+ -- - remember_tokens table for "remember me" functionality
+ -- - active_sessions view for currently valid sessions
+
+ -- The cookie itself just stores the session ID, while all the actual session data lives in the database. This is more secure and gives you better control over session
+ -- invalidation.
+-- Course structure
+CREATE TABLE IF NOT EXISTS lessons (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ name TEXT NOT NULL,
+ position INTEGER NOT NULL DEFAULT 0,
+ description TEXT
+);
+
+CREATE TABLE IF NOT EXISTS lang_lessons (
+ lesson_id INTEGER NOT NULL,
+ lang TEXT NOT NULL,
+ PRIMARY KEY (lang, lesson_id),
+ FOREIGN KEY (lang) REFERENCES languages(code),
+ FOREIGN KEY (lesson_id) REFERENCES lessons(id)
+);
+
+CREATE TABLE IF NOT EXISTS cards (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ note TEXT
+);
+
+CREATE TABLE IF NOT EXISTS cards_expressions (
+ expression_id INTEGER NOT NULL,
+ card_id INTEGER NOT NULL,
+ PRIMARY KEY (card_id, expression_id),
+ FOREIGN KEY (card_id) REFERENCES cards(id),
+ FOREIGN KEY (expression_id) REFERENCES expressions(id)
+);
+
+CREATE TABLE IF NOT EXISTS cards_lessons (
+ lesson_id INTEGER,
+ card_id INTEGER NOT NULL,
+ PRIMARY KEY (card_id, lesson_id),
+ FOREIGN KEY (card_id) REFERENCES cards(id),
+ FOREIGN KEY (lesson_id) REFERENCES lessons(id)
+);
+
+
+/**
+ * USER PROGRESS AND SRS TRACKING (FROM schema.sql)
+ *
+ * Complete spaced repetition system with attempt tracking
+ */
+
+CREATE TABLE IF NOT EXISTS user_progress (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ card_id INTEGER NOT NULL,
+ repetition_count INTEGER DEFAULT 0,
+ ease_factor REAL DEFAULT 2.5,
+ interval INTEGER DEFAULT 1,
+ next_review_date INTEGER,
+ last_reviewed INTEGER,
+ is_mastered BOOLEAN DEFAULT FALSE,
+ FOREIGN KEY (user_id) REFERENCES users(id),
+ FOREIGN KEY (card_id) REFERENCES cards(id),
+ CONSTRAINT progress_unique UNIQUE (user_id, card_id)
+);
+
+CREATE TABLE IF NOT EXISTS attempts (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ timestamp INTEGER NOT NULL,
+ card_id INTEGER NOT NULL,
+ good INTEGER NOT NULL, -- 0 or 1 for success/failure
+ FOREIGN KEY (user_id) REFERENCES users(id),
+ FOREIGN KEY (card_id) REFERENCES cards(id)
+);
+
+/**
+ * USER FEATURES (FROM schema.sql)
+ */
+
+CREATE TABLE IF NOT EXISTS bookmarks (
+ word_id INTEGER NOT NULL,
+ user_id INTEGER NOT NULL,
+ notes TEXT,
+ created INTEGER NOT NULL,
+ PRIMARY KEY (word_id, user_id),
+ FOREIGN KEY (word_id) REFERENCES expressions(id),
+ FOREIGN KEY (user_id) REFERENCES users(id)
+);
+
+/**
+ * INDEXES FOR PERFORMANCE OPTIMIZATION
+ *
+ * Comprehensive indexes based on analysis of query patterns
+ * from all three schemas
+ */
+
+-- Language and expression indexes
+CREATE INDEX IF NOT EXISTS idx_expressions_spelling ON expressions(spelling);
+CREATE INDEX IF NOT EXISTS idx_expressions_type ON expressions(type);
+CREATE INDEX IF NOT EXISTS idx_expressions_lang ON expressions(lang);
+CREATE INDEX IF NOT EXISTS idx_expressions_lang_freq ON expressions(lang, frequency DESC);
+
+-- Sense and semantic indexes
+CREATE INDEX IF NOT EXISTS idx_senses_parent ON senses(parent_id);
+CREATE INDEX IF NOT EXISTS idx_senses_pos ON senses(pos);
+CREATE INDEX IF NOT EXISTS idx_subsenses_sid ON subsenses(sid);
+CREATE INDEX IF NOT EXISTS idx_derivation_sid ON derivation(sid);
+
+-- Phonetic analysis indexes (FROM prosodyschema.sql)
+CREATE INDEX IF NOT EXISTS idx_tones_name_lang ON tones(name, lang);
+CREATE INDEX IF NOT EXISTS idx_tones_nums_lang ON tones(nums, lang);
+CREATE INDEX IF NOT EXISTS idx_syllables_text_lang ON syllables(text, lang);
+CREATE INDEX IF NOT EXISTS idx_syllables_tone ON syllables(tone);
+
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_word_id ON word_phonetics(word_id);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_syllables ON word_phonetics(syllable_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_tones ON word_phonetics(tone_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_count ON word_phonetics(syllable_count);
+
+-- Junction table indexes
+CREATE INDEX IF NOT EXISTS idx_syllables_words_word_idx ON syllables_words(word_id, idx);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_syl ON syllables_words(syl_id);
+CREATE INDEX IF NOT EXISTS idx_cards_expressions ON cards_expressions(expression_id, card_id);
+
+-- User progress and SRS indexes
+CREATE INDEX IF NOT EXISTS idx_user_progress_user ON user_progress(user_id);
+CREATE INDEX IF NOT EXISTS idx_user_progress_card ON user_progress(card_id);
+CREATE INDEX IF NOT EXISTS idx_user_progress_next_review ON user_progress(next_review_date);
+CREATE INDEX IF NOT EXISTS idx_attempts_user ON attempts(user_id);
+CREATE INDEX IF NOT EXISTS idx_attempts_card ON attempts(card_id);
+CREATE INDEX IF NOT EXISTS idx_attempts_user_resource ON attempts(user_id, card_id);
+
+-- User feature indexes
+CREATE INDEX IF NOT EXISTS idx_bookmarks ON bookmarks(word_id);
+CREATE INDEX IF NOT EXISTS idx_word_categories_category ON word_categories(category);
+
+-- Composite indexes for common query patterns
+CREATE INDEX IF NOT EXISTS idx_syllables_compound ON syllables(lang, text, tone);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_compound ON syllables_words(word_id, idx, syl_id);
+CREATE INDEX IF NOT EXISTS idx_word_patterns_mixed ON word_phonetics(syllable_count, syllable_sequence, tone_sequence);
+
+/**
+ * TONE-SPECIFIC SRS SYSTEM
+ *
+ * Advanced tone pattern learning with detailed progress tracking
+ * and analytics for effective Thai tone acquisition
+ */
+
+-- Tone pattern progress tracking
+CREATE TABLE IF NOT EXISTS tone_pattern_progress (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ word_id INTEGER NOT NULL,
+ tone_sequence TEXT NOT NULL,
+ syllable_sequence TEXT NOT NULL,
+ repetition_count INTEGER DEFAULT 0,
+ ease_factor REAL DEFAULT 2.5,
+ interval INTEGER DEFAULT 1,
+ next_review_date INTEGER NOT NULL,
+ last_reviewed INTEGER NOT NULL,
+ is_mastered BOOLEAN DEFAULT FALSE,
+ difficulty REAL DEFAULT 2.5,
+ tone_accuracy REAL DEFAULT 0.0,
+ created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+ FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE,
+ UNIQUE(user_id, word_id)
+);
+
+-- Tone pattern attempt tracking with detailed metrics
+CREATE TABLE IF NOT EXISTS tone_attempts (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ word_id INTEGER NOT NULL,
+ timestamp INTEGER NOT NULL,
+ accuracy REAL NOT NULL CHECK (accuracy >= 0 AND accuracy <= 1),
+ tone_accuracy REAL NOT NULL CHECK (tone_accuracy >= 0 AND tone_accuracy <= 1),
+ pronunciation_score REAL DEFAULT 0.0 CHECK (pronunciation_score >= 0 AND pronunciation_score <= 1),
+ review_time INTEGER NOT NULL,
+ difficulty REAL NOT NULL,
+ mode TEXT DEFAULT 'exploration' CHECK (mode IN ('exploration', 'practice', 'review')),
+ created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+ FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE
+);
+
+-- User tone learning preferences and settings
+CREATE TABLE IF NOT EXISTS tone_learning_settings (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ preferred_session_length INTEGER DEFAULT 20 CHECK (preferred_session_length > 0),
+ target_daily_reviews INTEGER DEFAULT 50 CHECK (target_daily_reviews > 0),
+ difficulty_preference REAL DEFAULT 2.5 CHECK (difficulty_preference >= 1.0 AND difficulty_preference <= 4.0),
+ audio_enabled BOOLEAN DEFAULT TRUE,
+ tone_visualization_enabled BOOLEAN DEFAULT TRUE,
+ auto_advance BOOLEAN DEFAULT FALSE,
+ created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+ UNIQUE(user_id)
+);
+
+-- Tone pattern mastery statistics and analytics
+CREATE TABLE IF NOT EXISTS tone_mastery_stats (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ user_id INTEGER NOT NULL,
+ tone_pattern TEXT NOT NULL,
+ syllable_count INTEGER NOT NULL CHECK (syllable_count > 0),
+ total_attempts INTEGER DEFAULT 0,
+ successful_attempts INTEGER DEFAULT 0,
+ avg_accuracy REAL DEFAULT 0.0 CHECK (avg_accuracy >= 0 AND avg_accuracy <= 1),
+ avg_review_time REAL DEFAULT 0.0,
+ best_accuracy REAL DEFAULT 0.0 CHECK (best_accuracy >= 0 AND best_accuracy <= 1),
+ last_attempted INTEGER,
+ mastered_at INTEGER,
+ created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+ FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+ UNIQUE(user_id, tone_pattern, syllable_count)
+);
+
+/**
+ * TONE-SPECIFIC INDEXES
+ * Optimized for tone pattern queries and SRS operations
+ */
+
+CREATE INDEX IF NOT EXISTS idx_tone_progress_user_word ON tone_pattern_progress(user_id, word_id);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_next_review ON tone_pattern_progress(next_review_date);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_user ON tone_pattern_progress(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_difficulty ON tone_pattern_progress(difficulty);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_mastered ON tone_pattern_progress(is_mastered);
+
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_user_word ON tone_attempts(user_id, word_id);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_timestamp ON tone_attempts(timestamp);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_user ON tone_attempts(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_mode ON tone_attempts(mode);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_accuracy ON tone_attempts(accuracy, tone_accuracy);
+
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_pattern ON tone_mastery_stats(tone_pattern, syllable_count);
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_user ON tone_mastery_stats(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_accuracy ON tone_mastery_stats(avg_accuracy);
+
+/**
+ * TRIGGERS FOR TONE LEARNING SYSTEM
+ * Automatic timestamp updates and statistics calculation
+ */
+
+-- Update timestamps automatically
+CREATE TRIGGER IF NOT EXISTS update_tone_progress_updated_at
+AFTER UPDATE ON tone_pattern_progress
+FOR EACH ROW
+BEGIN
+ UPDATE tone_pattern_progress SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS update_tone_learning_settings_updated_at
+AFTER UPDATE ON tone_learning_settings
+FOR EACH ROW
+BEGIN
+ UPDATE tone_learning_settings SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_updated_at
+AFTER UPDATE ON tone_mastery_stats
+FOR EACH ROW
+BEGIN
+ UPDATE tone_mastery_stats SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+-- Update mastery stats when a pattern is mastered
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_on_progress
+AFTER UPDATE ON tone_pattern_progress
+FOR EACH ROW
+WHEN NEW.is_mastered = 1 AND OLD.is_mastered = 0
+BEGIN
+ INSERT OR REPLACE INTO tone_mastery_stats
+ (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, mastered_at, updated_at)
+ VALUES (
+ NEW.user_id,
+ NEW.tone_sequence,
+ (SELECT COUNT(*) FROM (SELECT value FROM json_each(NEW.syllable_sequence))),
+ NEW.repetition_count,
+ CAST(NEW.repetition_count * NEW.tone_accuracy AS INTEGER),
+ NEW.tone_accuracy,
+ (strftime('%s', 'now') * 1000),
+ (strftime('%s', 'now') * 1000)
+ );
+END;
+
+-- Update tone mastery stats after each attempt
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_after_attempt
+AFTER INSERT ON tone_attempts
+FOR EACH ROW
+BEGIN
+ -- Get the current tone progress
+ INSERT OR REPLACE INTO tone_mastery_stats
+ (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, avg_review_time, best_accuracy, last_attempted, updated_at)
+ SELECT
+ ta.user_id,
+ tpp.tone_sequence,
+ (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence))) as syllable_count,
+ COALESCE(tms.total_attempts + 1, 1) as total_attempts,
+ COALESCE(tms.successful_attempts + CAST(ta.tone_accuracy >= 0.8 AS INTEGER), CAST(ta.tone_accuracy >= 0.8 AS INTEGER)) as successful_attempts,
+ CASE
+ WHEN tms.total_attempts IS NULL THEN ta.tone_accuracy
+ ELSE (tms.avg_accuracy * tms.total_attempts + ta.tone_accuracy) / (tms.total_attempts + 1)
+ END as avg_accuracy,
+ CASE
+ WHEN tms.total_attempts IS NULL THEN ta.review_time
+ ELSE (tms.avg_review_time * tms.total_attempts + ta.review_time) / (tms.total_attempts + 1)
+ END as avg_review_time,
+ CASE
+ WHEN tms.best_accuracy IS NULL THEN ta.tone_accuracy
+ WHEN ta.tone_accuracy > tms.best_accuracy THEN ta.tone_accuracy
+ ELSE tms.best_accuracy
+ END as best_accuracy,
+ (strftime('%s', 'now') * 1000) as last_attempted,
+ (strftime('%s', 'now') * 1000) as updated_at
+ FROM tone_attempts ta
+ JOIN tone_pattern_progress tpp ON tpp.word_id = ta.word_id AND tpp.user_id = ta.user_id
+ LEFT JOIN tone_mastery_stats tms ON tms.user_id = ta.user_id
+ AND tms.tone_pattern = tpp.tone_sequence
+ AND tms.syllable_count = (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence)))
+ WHERE ta.rowid = NEW.rowid;
+END;
+
+/**
+ * VIEWS FOR TONE LEARNING ANALYTICS
+ * Common queries for dashboard and reporting
+ */
+
+-- User dashboard view combining traditional SRS and tone learning
+CREATE VIEW IF NOT EXISTS user_dashboard AS
+SELECT
+ u.id as user_id,
+ u.name as username,
+ COUNT(DISTINCT tpp.word_id) as total_tone_words,
+ COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words,
+ COUNT(DISTINCT CASE WHEN tpp.next_review_date <= (strftime('%s', 'now') * 1000) THEN tpp.word_id END) as due_tone_words,
+ COUNT(DISTINCT up.card_id) as total_cards,
+ COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards,
+ COUNT(DISTINCT CASE WHEN up.next_review_date <= (strftime('%s', 'now') * 1000) THEN up.card_id END) as due_cards,
+ (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > (strftime('%s', 'now') * 1000 - 86400000)) as reviews_today,
+ (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > (strftime('%s', 'now') * 1000 - 604800000)) as weekly_tone_accuracy
+FROM users u
+LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id
+LEFT JOIN user_progress up ON up.user_id = u.id
+GROUP BY u.id, u.name;
+
+-- Tone learning statistics view
+CREATE VIEW IF NOT EXISTS tone_learning_stats_view AS
+SELECT
+ tms.user_id,
+ tms.tone_pattern,
+ tms.syllable_count,
+ tms.total_attempts,
+ tms.successful_attempts,
+ tms.avg_accuracy,
+ tms.avg_review_time,
+ tms.best_accuracy,
+ tms.last_attempted,
+ tms.mastered_at,
+ COUNT(tpp.word_id) as word_count_with_pattern,
+ AVG(tpp.difficulty) as avg_difficulty,
+ AVG(tpp.tone_accuracy) as avg_current_accuracy
+FROM tone_mastery_stats tms
+LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = tms.user_id
+ AND tpp.tone_sequence = tms.tone_pattern
+GROUP BY tms.user_id, tms.tone_pattern, tms.syllable_count, tms.total_attempts,
+ tms.successful_attempts, tms.avg_accuracy, tms.avg_review_time,
+ tms.best_accuracy, tms.last_attempted, tms.mastered_at;
+
+/**
+ * MIGRATION NOTES:
+ * ===============
+ *
+ * TONE SRS SYSTEM ADDITIONS:
+ * - Added comprehensive tone learning tables to the unified schema
+ * - Maintains consistency with existing phonetic analysis structure
+ * - Supports both traditional SRS and tone-specific learning
+ * - Provides detailed analytics for tone pattern mastery
+ *
+ * BACKWARD COMPATIBILITY:
+ * - All existing tables and relationships preserved
+ * - New tables are additive and don't break existing functionality
+ * - Views provide unified dashboard with both learning systems
+ *
+ * PERFORMANCE:
+ * - Added specialized indexes for tone pattern queries
+ * - Optimized for Thai tone learning analytics
+ * - Supports real-time progress tracking
+ *
+ * This enhanced unified schema now provides comprehensive support for:
+ * - Thai language learning with detailed phonetic analysis
+ * - Course management and lesson structure
+ * - Traditional spaced repetition system
+ * - Advanced tone pattern learning with SRS
+ * - User management and personalization
+ * - Comprehensive learning analytics
+ *
+ * TOTAL: Unified 4 schemas while preserving all unique functionality
+ */
diff --git a/packages/db/src/semantic.ts b/packages/db/src/semantic.ts
new file mode 100644
index 0000000..a3fe44b
--- /dev/null
+++ b/packages/db/src/semantic.ts
@@ -0,0 +1,554 @@
+import { Database } from 'bun:sqlite';
+
+export interface Expression {
+ id: number;
+ spelling: string;
+ lang: string;
+ frequency?: number;
+ type: string;
+ syllables?: number;
+ notes?: string;
+ ipa?: any; // JSONB
+ prosody?: any; // JSONB
+ confidence: number;
+}
+
+export interface Sense {
+ id: number;
+ parent_id: number;
+ spelling: string;
+ pos?: string;
+ etymology?: string;
+ senses?: any; // JSONB
+ forms?: any; // JSONB
+ related?: any; // JSONB
+ ipa?: any; // JSONB
+ prosody?: any; // JSONB
+ confidence: number;
+}
+
+export interface Subsense {
+ id: number;
+ sid: number;
+ gloss: string;
+ examples?: any; // JSONB
+}
+
+export interface Derivation {
+ id: number;
+ sid: number;
+ type: string;
+ text: string;
+ tags?: any; // JSONB
+}
+
+export interface Category {
+ name: string;
+}
+
+export interface WordCategory {
+ word_id: number;
+ category: string;
+}
+
+export interface Bookmark {
+ word_id: number;
+ user_id: number;
+ notes?: string;
+ created: number;
+}
+
+export interface Language {
+ code: string;
+ name: string;
+ native_name?: string;
+ iso6392?: string;
+ english?: string;
+}
+
+export class SemanticQueries {
+ constructor(private db: Database) {}
+
+ // Language operations
+ getLanguages(): Language[] {
+ const query = this.db.query(`
+ SELECT code, name, native_name, iso6392, english
+ FROM languages
+ ORDER BY name
+ `);
+ return query.all() as Language[];
+ }
+
+ getLanguageByCode(code: string): Language | null {
+ const query = this.db.query(`
+ SELECT code, name, native_name, iso6392, english
+ FROM languages
+ WHERE code = ?
+ `);
+ return query.get(code) as Language | null;
+ }
+
+ // Expression operations
+ getExpressionById(id: number): Expression | null {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE id = ?
+ `);
+ const result = query.get(id) as Expression | null;
+ if (result) {
+ // Parse JSON fields
+ result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+ result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+ }
+ return result;
+ }
+
+ getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ? OFFSET ?
+ `);
+ const results = query.all(lang, limit, offset) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ? AND spelling LIKE ?
+ ORDER BY
+ CASE
+ WHEN spelling = ? THEN 1
+ WHEN spelling LIKE ? THEN 2
+ ELSE 3
+ END,
+ frequency DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE spelling LIKE ?
+ ORDER BY
+ CASE
+ WHEN spelling = ? THEN 1
+ WHEN spelling LIKE ? THEN 2
+ ELSE 3
+ END,
+ frequency DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE type = ? AND lang = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(type, lang, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE type = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(type, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ? AND frequency BETWEEN ? AND ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ // Sense operations
+ getSensesForExpression(expressionId: number): Sense[] {
+ const query = this.db.query(`
+ SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+ FROM senses
+ WHERE parent_id = ?
+ ORDER BY confidence DESC
+ `);
+ const results = query.all(expressionId) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ getSenseById(id: number): Sense | null {
+ const query = this.db.query(`
+ SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+ FROM senses
+ WHERE id = ?
+ `);
+ const result = query.get(id) as Sense | null;
+ if (result) {
+ result.senses = result.senses ? JSON.parse(result.senses) : undefined;
+ result.forms = result.forms ? JSON.parse(result.forms) : undefined;
+ result.related = result.related ? JSON.parse(result.related) : undefined;
+ result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+ result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+ }
+ return result;
+ }
+
+ searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?)
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE s.spelling LIKE ? OR s.pos LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Subsense operations
+ getSubsensesForSense(senseId: number): Subsense[] {
+ const query = this.db.query(`
+ SELECT id, sid, gloss, examples
+ FROM subsenses
+ WHERE sid = ?
+ ORDER BY id
+ `);
+ const results = query.all(senseId) as Subsense[];
+ return results.map(result => ({
+ ...result,
+ examples: result.examples ? JSON.parse(result.examples) : undefined,
+ }));
+ }
+
+ // Derivation operations
+ getDerivationsForSense(senseId: number): Derivation[] {
+ const query = this.db.query(`
+ SELECT id, sid, type, text, tags
+ FROM derivation
+ WHERE sid = ?
+ ORDER BY type, text
+ `);
+ const results = query.all(senseId) as Derivation[];
+ return results.map(result => ({
+ ...result,
+ tags: result.tags ? JSON.parse(result.tags) : undefined,
+ }));
+ }
+
+ // Category operations
+ getCategories(): Category[] {
+ const query = this.db.query(`
+ SELECT name
+ FROM categories
+ ORDER BY name
+ `);
+ return query.all() as Category[];
+ }
+
+ getCategoriesForWord(wordId: number): Category[] {
+ const query = this.db.query(`
+ SELECT c.name
+ FROM categories c
+ JOIN word_categories wc ON c.name = wc.category
+ WHERE wc.word_id = ?
+ ORDER BY c.name
+ `);
+ return query.all(wordId) as Category[];
+ }
+
+ getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN word_categories wc ON e.id = wc.word_id
+ WHERE wc.category = ? AND e.lang = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(category, lang, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN word_categories wc ON e.id = wc.word_id
+ WHERE wc.category = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(category, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Bookmark operations
+ getBookmarksForUser(userId: number): Bookmark[] {
+ const query = this.db.query(`
+ SELECT word_id, user_id, notes, created
+ FROM bookmarks
+ WHERE user_id = ?
+ ORDER BY created DESC
+ `);
+ return query.all(userId) as Bookmark[];
+ }
+
+ createBookmark(userId: number, wordId: number, notes?: string): Bookmark {
+ const query = this.db.query(`
+ INSERT INTO bookmarks (word_id, user_id, notes, created)
+ VALUES (?, ?, ?, ?)
+ RETURNING word_id, user_id, notes, created
+ `);
+ return query.get(wordId, userId, notes, Date.now()) as Bookmark;
+ }
+
+ updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null {
+ const query = this.db.query(`
+ UPDATE bookmarks
+ SET notes = ?
+ WHERE user_id = ? AND word_id = ?
+ RETURNING word_id, user_id, notes, created
+ `);
+ return query.get(notes, userId, wordId) as Bookmark | null;
+ }
+
+ deleteBookmark(userId: number, wordId: number): boolean {
+ const query = this.db.query(`
+ DELETE FROM bookmarks
+ WHERE user_id = ? AND word_id = ?
+ `);
+ const result = query.run(userId, wordId);
+ return result.changes > 0;
+ }
+
+ isBookmarked(userId: number, wordId: number): boolean {
+ const query = this.db.query(`
+ SELECT 1
+ FROM bookmarks
+ WHERE user_id = ? AND word_id = ?
+ LIMIT 1
+ `);
+ return query.get(userId, wordId) !== undefined;
+ }
+
+ // Advanced search operations
+ searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN senses s ON e.id = s.parent_id
+ WHERE e.lang = ? AND s.pos = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(lang, pos, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN senses s ON e.id = s.parent_id
+ WHERE s.pos = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(pos, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND s.etymology LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${term}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE s.etymology LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${term}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Statistics operations
+ getSemanticStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(DISTINCT e.id) as total_expressions,
+ COUNT(DISTINCT s.id) as total_senses,
+ COUNT(DISTINCT s.pos) as unique_pos,
+ COUNT(DISTINCT c.name) as unique_categories,
+ AVG(e.confidence) as avg_confidence,
+ COUNT(DISTINCT e.type) as unique_types
+ FROM expressions e
+ LEFT JOIN senses s ON e.id = s.parent_id
+ LEFT JOIN word_categories wc ON e.id = wc.word_id
+ LEFT JOIN categories c ON wc.category = c.name
+ WHERE e.lang = ?
+ `);
+ return query.get(lang);
+ }
+
+ getPOSTDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ s.pos,
+ COUNT(DISTINCT s.parent_id) as expression_count,
+ COUNT(s.id) as sense_count,
+ ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND s.pos IS NOT NULL
+ GROUP BY s.pos
+ ORDER BY sense_count DESC
+ `);
+ return query.all(lang);
+ }
+
+ getCategoryDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ c.name as category,
+ COUNT(DISTINCT wc.word_id) as word_count,
+ ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage
+ FROM categories c
+ JOIN word_categories wc ON c.name = wc.category
+ JOIN expressions e ON wc.word_id = e.id
+ WHERE e.lang = ?
+ GROUP BY c.name
+ ORDER BY word_count DESC
+ `);
+ return query.all(lang);
+ }
+} \ No newline at end of file
diff --git a/packages/db/src/srs.ts b/packages/db/src/srs.ts
new file mode 100644
index 0000000..0b80960
--- /dev/null
+++ b/packages/db/src/srs.ts
@@ -0,0 +1,402 @@
+import { Database } from 'bun:sqlite';
+
+// Traditional SRS Types
+export interface SRSProgress {
+ id?: number;
+ user_id: number;
+ card_id: number;
+ repetition_count: number;
+ ease_factor: number;
+ interval: number;
+ next_review_date: number | null;
+ last_reviewed: number | null;
+ is_mastered: boolean;
+ created_at?: number;
+ updated_at?: number;
+}
+
+export interface SRSAttempt {
+ id?: number;
+ user_id: number;
+ timestamp: number;
+ card_id: number;
+ good: number; // 0 or 1
+ created_at?: number;
+}
+
+// Tone SRS Types
+export interface ToneProgress {
+ id?: number;
+ user_id: number;
+ word_id: number;
+ tone_sequence: string;
+ syllable_sequence: string;
+ repetition_count: number;
+ ease_factor: number;
+ interval: number;
+ next_review_date: number;
+ last_reviewed: number;
+ is_mastered: boolean;
+ difficulty: number;
+ tone_accuracy: number;
+ created_at?: number;
+ updated_at?: number;
+}
+
+export interface ToneAttempt {
+ id?: number;
+ user_id: number;
+ word_id: number;
+ timestamp: number;
+ accuracy: number;
+ tone_accuracy: number;
+ pronunciation_score: number;
+ review_time: number;
+ difficulty: number;
+ mode: 'exploration' | 'practice' | 'review';
+ created_at?: number;
+}
+
+export interface ToneMasteryStats {
+ id?: number;
+ user_id: number;
+ tone_pattern: string;
+ syllable_count: number;
+ total_attempts: number;
+ successful_attempts: number;
+ avg_accuracy: number;
+ avg_review_time: number;
+ best_accuracy: number;
+ last_attempted: number | null;
+ mastered_at: number | null;
+ created_at?: number;
+ updated_at?: number;
+}
+
+export class SRSQueries {
+ constructor(private db: Database) {}
+
+ // Traditional SRS Methods
+
+ /**
+ * Get or create SRS progress for a user and card
+ */
+ getOrCreateSRSProgress(userId: number, cardId: number): SRSProgress {
+ const query = this.db.query(`
+ INSERT INTO user_progress (user_id, card_id)
+ VALUES (?, ?)
+ ON CONFLICT (user_id, card_id) DO UPDATE SET
+ user_id = excluded.user_id,
+ card_id = excluded.card_id
+ RETURNING id, user_id, card_id, repetition_count, ease_factor, interval,
+ next_review_date, last_reviewed, is_mastered, created_at, updated_at
+ `);
+
+ return query.get(userId, cardId) as SRSProgress;
+ }
+
+ /**
+ * Update SRS progress after a review
+ */
+ updateSRSProgress(progress: Omit<SRSProgress, 'id' | 'created_at' | 'updated_at'>): SRSProgress {
+ const query = this.db.query(`
+ UPDATE user_progress
+ SET repetition_count = ?,
+ ease_factor = ?,
+ interval = ?,
+ next_review_date = ?,
+ last_reviewed = ?,
+ is_mastered = ?,
+ updated_at = (strftime('%s', 'now') * 1000)
+ WHERE user_id = ? AND card_id = ?
+ RETURNING id, user_id, card_id, repetition_count, ease_factor, interval,
+ next_review_date, last_reviewed, is_mastered, created_at, updated_at
+ `);
+
+ return query.get(
+ progress.repetition_count,
+ progress.ease_factor,
+ progress.interval,
+ progress.next_review_date,
+ progress.last_reviewed,
+ progress.is_mastered,
+ progress.user_id,
+ progress.card_id
+ ) as SRSProgress;
+ }
+
+ /**
+ * Record a traditional SRS attempt
+ */
+ recordSRSAttempt(attempt: Omit<SRSAttempt, 'id' | 'created_at'>): SRSAttempt {
+ const query = this.db.query(`
+ INSERT INTO attempts (user_id, timestamp, card_id, good)
+ VALUES (?, ?, ?, ?)
+ RETURNING id, user_id, timestamp, card_id, good, created_at
+ `);
+
+ return query.get(
+ attempt.user_id,
+ attempt.timestamp,
+ attempt.card_id,
+ attempt.good
+ ) as SRSAttempt;
+ }
+
+ /**
+ * Get due cards for a user
+ */
+ getDueCards(userId: number, limit: number = 20): number[] {
+ const query = this.db.query(`
+ SELECT card_id
+ FROM user_progress
+ WHERE user_id = ?
+ AND (next_review_date IS NULL OR next_review_date <= ?)
+ AND is_mastered = 0
+ ORDER BY next_review_date ASC
+ LIMIT ?
+ `);
+
+ const results = query.all(userId, Date.now(), limit) as { card_id: number }[];
+ return results.map(r => r.card_id);
+ }
+
+ /**
+ * Get SRS progress statistics for a user
+ */
+ getSRSStats(userId: number) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(*) as total_cards,
+ COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_cards,
+ COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_cards,
+ COUNT(CASE WHEN next_review_date > ? THEN 1 END) as future_cards,
+ AVG(ease_factor) as avg_ease_factor,
+ AVG(interval) as avg_interval
+ FROM user_progress
+ WHERE user_id = ?
+ `);
+
+ return query.get(Date.now(), Date.now(), userId);
+ }
+
+ // Tone SRS Methods
+
+ /**
+ * Get or create tone progress for a user and word
+ */
+ getOrCreateToneProgress(userId: number, wordId: number, toneSequence: string, syllableSequence: string): ToneProgress {
+ const query = this.db.query(`
+ INSERT INTO tone_pattern_progress (user_id, word_id, tone_sequence, syllable_sequence, next_review_date, last_reviewed)
+ VALUES (?, ?, ?, ?, ?, ?)
+ ON CONFLICT (user_id, word_id) DO UPDATE SET
+ tone_sequence = excluded.tone_sequence,
+ syllable_sequence = excluded.syllable_sequence
+ RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count,
+ ease_factor, interval, next_review_date, last_reviewed, is_mastered,
+ difficulty, tone_accuracy, created_at, updated_at
+ `);
+
+ return query.get(
+ userId,
+ wordId,
+ toneSequence,
+ syllableSequence,
+ Date.now(), // next_review_date
+ Date.now() // last_reviewed
+ ) as ToneProgress;
+ }
+
+ /**
+ * Update tone progress after a practice session
+ */
+ updateToneProgress(progress: Omit<ToneProgress, 'id' | 'created_at' | 'updated_at'>): ToneProgress {
+ const query = this.db.query(`
+ UPDATE tone_pattern_progress
+ SET repetition_count = ?,
+ ease_factor = ?,
+ interval = ?,
+ next_review_date = ?,
+ last_reviewed = ?,
+ is_mastered = ?,
+ difficulty = ?,
+ tone_accuracy = ?,
+ updated_at = (strftime('%s', 'now') * 1000)
+ WHERE user_id = ? AND word_id = ?
+ RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count,
+ ease_factor, interval, next_review_date, last_reviewed, is_mastered,
+ difficulty, tone_accuracy, created_at, updated_at
+ `);
+
+ return query.get(
+ progress.repetition_count,
+ progress.ease_factor,
+ progress.interval,
+ progress.next_review_date,
+ progress.last_reviewed,
+ progress.is_mastered,
+ progress.difficulty,
+ progress.tone_accuracy,
+ progress.user_id,
+ progress.word_id
+ ) as ToneProgress;
+ }
+
+ /**
+ * Record a tone attempt
+ */
+ recordToneAttempt(attempt: Omit<ToneAttempt, 'id' | 'created_at'>): ToneAttempt {
+ const query = this.db.query(`
+ INSERT INTO tone_attempts (user_id, word_id, timestamp, accuracy, tone_accuracy,
+ pronunciation_score, review_time, difficulty, mode)
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+ RETURNING id, user_id, word_id, timestamp, accuracy, tone_accuracy,
+ pronunciation_score, review_time, difficulty, mode, created_at
+ `);
+
+ return query.get(
+ attempt.user_id,
+ attempt.word_id,
+ attempt.timestamp,
+ attempt.accuracy,
+ attempt.tone_accuracy,
+ attempt.pronunciation_score,
+ attempt.review_time,
+ attempt.difficulty,
+ attempt.mode
+ ) as ToneAttempt;
+ }
+
+ /**
+ * Get due tone words for a user
+ */
+ getDueToneWords(userId: number, limit: number = 20): number[] {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM tone_pattern_progress
+ WHERE user_id = ?
+ AND next_review_date <= ?
+ AND is_mastered = 0
+ ORDER BY next_review_date ASC, difficulty ASC
+ LIMIT ?
+ `);
+
+ const results = query.all(userId, Date.now(), limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ /**
+ * Get tone progress statistics for a user
+ */
+ getToneStats(userId: number) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(*) as total_tone_words,
+ COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_tone_words,
+ COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_tone_words,
+ AVG(difficulty) as avg_difficulty,
+ AVG(tone_accuracy) as avg_tone_accuracy,
+ AVG(ease_factor) as avg_ease_factor
+ FROM tone_pattern_progress
+ WHERE user_id = ?
+ `);
+
+ return query.get(Date.now(), userId);
+ }
+
+ /**
+ * Get recent tone attempts for a user
+ */
+ getRecentToneAttempts(userId: number, limit: number = 10): ToneAttempt[] {
+ const query = this.db.query(`
+ SELECT id, user_id, word_id, timestamp, accuracy, tone_accuracy,
+ pronunciation_score, review_time, difficulty, mode, created_at
+ FROM tone_attempts
+ WHERE user_id = ?
+ ORDER BY timestamp DESC
+ LIMIT ?
+ `);
+
+ return query.all(userId, limit) as ToneAttempt[];
+ }
+
+ /**
+ * Get tone mastery statistics for a user
+ */
+ getToneMasteryStats(userId: number): ToneMasteryStats[] {
+ const query = this.db.query(`
+ SELECT id, user_id, tone_pattern, syllable_count, total_attempts, successful_attempts,
+ avg_accuracy, avg_review_time, best_accuracy, last_attempted, mastered_at,
+ created_at, updated_at
+ FROM tone_mastery_stats
+ WHERE user_id = ?
+ ORDER BY avg_accuracy DESC, last_attempted DESC
+ `);
+
+ return query.all(userId) as ToneMasteryStats[];
+ }
+
+ /**
+ * Get tone words for practice (prioritizes due words, then random new words)
+ */
+ getToneWordsForPractice(userId: number, limit: number = 20): number[] {
+ const query = this.db.query(`
+ WITH due_words AS (
+ SELECT word_id, 0 as priority
+ FROM tone_pattern_progress
+ WHERE user_id = ? AND next_review_date <= ? AND is_mastered = 0
+ LIMIT ?
+ ),
+ new_words AS (
+ SELECT e.id as word_id, 1 as priority
+ FROM expressions e
+ LEFT JOIN tone_pattern_progress tpp ON e.id = tpp.word_id AND tpp.user_id = ?
+ WHERE tpp.word_id IS NULL
+ AND e.lang = 'th'
+ AND e.type = 'word'
+ ORDER BY e.frequency DESC
+ LIMIT ?
+ )
+ SELECT word_id
+ FROM (
+ SELECT word_id, priority FROM due_words
+ UNION ALL
+ SELECT word_id, priority FROM new_words
+ )
+ ORDER BY priority, RANDOM()
+ LIMIT ?
+ `);
+
+ const results = query.all(userId, Date.now(), limit, userId, limit, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ /**
+ * Get user dashboard data (combines traditional SRS and tone SRS)
+ */
+ getUserDashboard(userId: number) {
+ const query = this.db.query(`
+ SELECT
+ u.id as user_id,
+ u.name as username,
+ COUNT(DISTINCT tpp.word_id) as total_tone_words,
+ COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words,
+ COUNT(DISTINCT CASE WHEN tpp.next_review_date <= ? THEN tpp.word_id END) as due_tone_words,
+ COUNT(DISTINCT up.card_id) as total_cards,
+ COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards,
+ COUNT(DISTINCT CASE WHEN up.next_review_date <= ? THEN up.card_id END) as due_cards,
+ (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > ?) as reviews_today,
+ (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > ?) as weekly_tone_accuracy
+ FROM users u
+ LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id
+ LEFT JOIN user_progress up ON up.user_id = u.id
+ WHERE u.id = ?
+ GROUP BY u.id, u.name
+ `);
+
+ const weekAgo = Date.now() - (7 * 24 * 60 * 60 * 1000);
+ const today = Date.now() - (24 * 60 * 60 * 1000);
+
+ return query.get(Date.now(), Date.now(), today, weekAgo, userId);
+ }
+} \ No newline at end of file
diff --git a/packages/db/src/test.ts b/packages/db/src/test.ts
new file mode 100644
index 0000000..6767c13
--- /dev/null
+++ b/packages/db/src/test.ts
@@ -0,0 +1,149 @@
+import ndb from ".";
+import toneDb from "@/lib/db/prosodydb";
+
+// function test() {
+// const tones = ["low", "mid"];
+// const ndb = new BigQueries();
+// const nq = ndb.fetchWordsByToneAndSyls(tones);
+// const oq = toneDb.fetchWordsByToneAndSyls(tones);
+// // const senses = JSON.parse(res[0].senses_array);
+// console.log({ nq });
+// console.log(nq.length, oq.length);
+// }
+function test() {
+ const tones = ["low", "mid"];
+ console.log("wtf");
+ // const qq = ndb.fetchWordsByToneAndSyls1(tones);
+ // const oq = ndb.fetchWordsByToneAndSylsO(tones);
+ const nq = ndb.fetchWordsByToneAndSyls(tones);
+ // const roq = toneDb.fetchWordsByToneAndSyls(tones);
+ // const senses = JSON.parse(res[0].senses_array);
+ // console.log({ nq });
+ // console.log("old", oq.length);
+ // console.log("old db", roq.length);
+ console.log("new", nq);
+ // console.log("test", qq.length);
+}
+test();
+
+// `
+// `
+// WITH word_tone_sequences AS (
+// SELECT
+// w.id as word_id,
+// w.spelling,
+// wp.ipa,
+// w.frequency,
+// GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+// COUNT(sw.syl_id) as syllable_count,
+// (SELECT
+// json_group_array(json_object(
+// 'id', s.id,
+// 'pos', s.pos,
+// 'etymology', s.etymology,
+// 'confidence', s.confidence,
+// 'subsenses', (
+// SELECT json_group_array(json_object(
+// 'id', ss.id,
+// 'gloss', ss.gloss
+// ))
+// FROM subsenses ss
+// WHERE ss.sid = s.id
+// ),
+// 'examples', (
+// SELECT json_group_array(json_object(
+// 'id', ex.id,
+// 'example', ex.example,
+// 'ref', ex.ref
+// ))
+// FROM examples ex
+// WHERE ex.sid = s.id
+// ),
+// 'derivation', (
+// SELECT json_group_array(json_object(
+// 'id', d.id,
+// 'type', d.type,
+// 'text', d.text,
+// 'tags', d.tags
+// ))
+// FROM derivation d
+// WHERE d.sid = s.id
+// ),
+// 'categories', (
+// SELECT json_group_array(wc.category)
+// FROM word_categories wc
+// WHERE wc.word_id = s.id
+// )
+// ))
+// FROM senses s
+// WHERE s.parent_id = w.id
+// ) as senses_array
+// FROM expressions w
+// JOIN word_phonetics wp ON w.id = wp.word_id
+// JOIN syllables_words sw ON wp.id = sw.word_id
+// JOIN syllables sy ON sw.syl_id = sy.id
+// JOIN tones t ON sy.tone = t.id
+// GROUP BY w.id, w.spelling, w.lang, w.frequency
+// )
+// SELECT *
+// FROM word_tone_sequences
+// WHERE tone_sequence LIKE ?
+// AND syllable_count = ?
+// ORDER BY frequency ASC NULLS LAST;
+// `
+//
+//
+//
+// `
+// WITH word_tone_sequences AS (
+// SELECT
+// w.id as word_id,
+// w.spelling,
+// wp.ipa,
+// w.frequency,
+// GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+// COUNT(sw.syl_id) as syllable_count
+// FROM expressions w
+// JOIN word_phonetics wp ON w.id = wp.word_id
+// JOIN syllables_words sw ON wp.id = sw.word_id
+// JOIN syllables sy ON sw.syl_id = sy.id
+// JOIN tones t ON sy.tone = t.id
+// GROUP BY w.id, w.spelling, w.lang, w.frequency, wp.ipa
+// ),
+// sense_data AS (
+// SELECT
+// s.*,
+// GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+// GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+// GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+// GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+// FROM senses s
+// LEFT JOIN subsenses ss ON ss.sid = s.id
+// LEFT JOIN examples ex ON ex.sid = s.id
+// LEFT JOIN derivation d ON d.sid = s.id
+// LEFT JOIN word_categories wc ON wc.word_id = s.id
+// GROUP BY s.id
+// )
+// SELECT
+// wts.*,
+// (SELECT
+// json_group_array(json_object(
+// 'id', sd.id,
+// 'pos', sd.pos,
+// 'etymology', sd.etymology,
+// 'confidence', sd.confidence,
+// 'subsenses_data', sd.subsenses_data,
+// 'examples_data', sd.examples_data,
+// 'derivation_data', sd.derivation_data,
+// 'categories_data', sd.categories_data
+// ))
+// FROM sense_data sd
+// WHERE sd.parent_id = wts.word_id
+// ) as senses_array
+// FROM word_tone_sequences wts
+// WHERE wts.tone_sequence LIKE ?
+// AND wts.syllable_count = ?
+// ORDER BY wts.frequency ASC NULLS LAST;
+// `
diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts
new file mode 100644
index 0000000..66c2826
--- /dev/null
+++ b/packages/db/src/types.ts
@@ -0,0 +1,84 @@
+export type Tone = {
+ letters: string;
+ numbers: number;
+ name: string;
+};
+
+export type Phoneme = {
+ ipa: string;
+ spelling: string;
+};
+export type Syllable = {
+ stressed: boolean;
+ long: boolean;
+ spelling: string;
+ ipa: string;
+ nucleus: Phoneme;
+ onset: Phoneme;
+ medial: Phoneme;
+ coda: Phoneme;
+ rhyme: Phoneme;
+ tone: Tone;
+};
+
+export type ToneQuery = Array<string | null>;
+export type MutationType = { change: string } | { keep: string };
+export type MutationOrder = MutationType[];
+
+export type PhoneticData = {
+ word_id: number;
+ tone_sequence: string;
+ syllable_count: number;
+ syl_seq: string;
+ spelling: string;
+ ipa: string;
+ frequency: number;
+};
+
+export const thaiTones: Record<string, string> = {
+ "˧": "mid",
+ "˨˩": "low",
+ "˥˩": "falling",
+ "˦˥": "high",
+ "˩˩˦": "rising",
+};
+export const thaiToneNums: Record<string, number> = {
+ "˧": 33,
+ "˨˩": 21,
+ "˥˩": 41,
+ "˦˥": 45,
+ "˩˩˦": 214,
+};
+
+export type FullWordDataDB = {
+ word_id: number;
+ tone_sequence: string;
+ syllable_count: number;
+ syl_seq: string;
+ spelling: string;
+ ipa: string;
+ frequency: number;
+ senses_array: string;
+};
+export type FullWordData = {
+ word_id: number;
+ tone_sequence: string;
+ syllable_count: number;
+ syl_seq: string;
+ spelling: string;
+ ipa: string;
+ frequency: number;
+ senses: Sense[];
+};
+export type Sense = {
+ confidence: number;
+ examples: Example[];
+ categories: string[];
+ etymology: string;
+ derivation: Derivation[];
+ pos: string;
+ forms: Array<{ form: string; tags: string[] }>;
+ glosses: string[];
+};
+export type Example = { ref: string; text: string };
+export type Derivation = { type: string; text: string; tags: any };
diff --git a/packages/db/src/users.ts b/packages/db/src/users.ts
new file mode 100644
index 0000000..ebf715c
--- /dev/null
+++ b/packages/db/src/users.ts
@@ -0,0 +1,305 @@
+import { Database } from "bun:sqlite";
+
+export interface User {
+ id: number;
+ name: string;
+ creds: string;
+}
+
+export interface Session {
+ id: string;
+ user_id: number;
+ created_at: number;
+ expires_at: number;
+ last_activity?: number;
+ ip_address?: string;
+ user_agent?: string;
+ data?: any;
+}
+
+export interface UserSettings {
+ id?: number;
+ user_id: number;
+ preferred_session_length: number;
+ target_daily_reviews: number;
+ difficulty_preference: number;
+ audio_enabled: boolean;
+ tone_visualization_enabled: boolean;
+ auto_advance: boolean;
+ created_at?: number;
+ updated_at?: number;
+}
+
+export class UserQueries {
+ constructor(private db: Database) {}
+
+ /**
+ * Create a new user
+ */
+ createUser(name: string, creds: string): User {
+ const query = this.db.query(`
+ INSERT INTO users (name, creds)
+ VALUES (?, ?)
+ RETURNING id, name, creds
+ `);
+
+ return query.get(name, creds) as User;
+ }
+
+ /**
+ * Get user by ID
+ */
+ getUserById(id: number): User | null {
+ const query = this.db.query(`
+ SELECT id, name, creds
+ FROM users
+ WHERE id = ?
+ `);
+
+ return query.get(id) as User | null;
+ }
+
+ /**
+ * Get user by name
+ */
+ getUserByName(name: string): User | null {
+ const query = this.db.query(`
+ SELECT id, name, creds
+ FROM users
+ WHERE name = ?
+ `);
+
+ return query.get(name) as User | null;
+ }
+
+ /**
+ * Create a new session for a user
+ */
+ createSession(userId: number, sessionToken: string, expiresAt: number): Session {
+ const query = this.db.query(`
+ INSERT INTO sessions (id, user_id, created_at, expires_at, last_activity)
+ VALUES (?, ?, ?, ?, ?)
+ RETURNING id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+ `);
+
+ const now = Date.now();
+ return query.get(sessionToken, userId, now, expiresAt, now) as Session;
+ }
+
+ /**
+ * Get session by token
+ */
+ getSessionByToken(sessionToken: string): Session | null {
+ const query = this.db.query(`
+ SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+ FROM sessions
+ WHERE id = ? AND expires_at > ?
+ `);
+
+ return query.get(sessionToken, Date.now()) as Session | null;
+ }
+
+ /**
+ * Get user from session token
+ */
+ getUserFromSession(sessionToken: string): User | null {
+ const query = this.db.query(`
+ SELECT u.id, u.name, u.creds
+ FROM users u
+ JOIN sessions s ON u.id = s.user_id
+ WHERE s.id = ? AND s.expires_at > ?
+ `);
+
+ return query.get(sessionToken, Date.now()) as User | null;
+ }
+
+ /**
+ * Delete session by token
+ */
+ deleteSession(sessionToken: string): boolean {
+ const query = this.db.query(`
+ DELETE FROM sessions
+ WHERE id = ?
+ `);
+
+ const result = query.run(sessionToken);
+ return result.changes > 0;
+ }
+
+ /**
+ * Clean up expired sessions
+ */
+ cleanupExpiredSessions(): number {
+ const query = this.db.query(`
+ DELETE FROM sessions
+ WHERE expires_at <= ?
+ `);
+
+ const result = query.run(Date.now());
+ return result.changes;
+ }
+
+ /**
+ * Update session last activity
+ */
+ updateSessionActivity(sessionToken: string): boolean {
+ const query = this.db.query(`
+ UPDATE sessions
+ SET last_activity = ?
+ WHERE id = ? AND expires_at > ?
+ `);
+
+ const result = query.run(Date.now(), sessionToken, Date.now());
+ return result.changes > 0;
+ }
+
+ /**
+ * Extend session expiry
+ */
+ extendSessionExpiry(sessionToken: string, newExpiry: number): boolean {
+ const query = this.db.query(`
+ UPDATE sessions
+ SET expires_at = ?, last_activity = ?
+ WHERE id = ? AND expires_at > ?
+ `);
+
+ const result = query.run(newExpiry, Date.now(), sessionToken, Date.now());
+ return result.changes > 0;
+ }
+
+ /**
+ * Get all active sessions for a user
+ */
+ getUserSessions(userId: number): Session[] {
+ const query = this.db.query(`
+ SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+ FROM sessions
+ WHERE user_id = ? AND expires_at > ?
+ ORDER BY last_activity DESC
+ `);
+
+ return query.all(userId, Date.now()) as Session[];
+ }
+
+ /**
+ * Delete all sessions for a user (for logout all devices)
+ */
+ deleteAllUserSessions(userId: number): number {
+ const query = this.db.query(`
+ DELETE FROM sessions
+ WHERE user_id = ?
+ `);
+
+ const result = query.run(userId);
+ return result.changes;
+ }
+
+ /**
+ * Get user settings
+ */
+ getUserSettings(userId: number): UserSettings | null {
+ const query = this.db.query(`
+ SELECT id, user_id, preferred_session_length, target_daily_reviews,
+ difficulty_preference, audio_enabled, tone_visualization_enabled,
+ auto_advance, created_at, updated_at
+ FROM tone_learning_settings
+ WHERE user_id = ?
+ `);
+
+ return query.get(userId) as UserSettings | null;
+ }
+
+ /**
+ * Create or update user settings
+ */
+ upsertUserSettings(
+ settings: Omit<UserSettings, "id" | "created_at" | "updated_at">,
+ ): UserSettings {
+ const query = this.db.query(`
+ INSERT INTO tone_learning_settings
+ (user_id, preferred_session_length, target_daily_reviews,
+ difficulty_preference, audio_enabled, tone_visualization_enabled, auto_advance)
+ VALUES (?, ?, ?, ?, ?, ?, ?)
+ ON CONFLICT (user_id) DO UPDATE SET
+ preferred_session_length = excluded.preferred_session_length,
+ target_daily_reviews = excluded.target_daily_reviews,
+ difficulty_preference = excluded.difficulty_preference,
+ audio_enabled = excluded.audio_enabled,
+ tone_visualization_enabled = excluded.tone_visualization_enabled,
+ auto_advance = excluded.auto_advance,
+ updated_at = (strftime('%s', 'now') * 1000)
+ RETURNING id, user_id, preferred_session_length, target_daily_reviews,
+ difficulty_preference, audio_enabled, tone_visualization_enabled,
+ auto_advance, created_at, updated_at
+ `);
+
+ return query.get(
+ settings.user_id,
+ settings.preferred_session_length,
+ settings.target_daily_reviews,
+ settings.difficulty_preference,
+ settings.audio_enabled,
+ settings.tone_visualization_enabled,
+ settings.auto_advance,
+ ) as UserSettings;
+ }
+
+ /**
+ * Update user settings partially
+ */
+ updateUserSettings(
+ userId: number,
+ updates: Partial<UserSettings>,
+ ): UserSettings | null {
+ const setClause = Object.keys(updates)
+ .filter(
+ (key) => key !== "id" && key !== "user_id" && key !== "created_at",
+ )
+ .map((key) => `${key} = ?`)
+ .join(", ");
+
+ if (!setClause) return null;
+
+ const values = Object.values(updates).filter((_, index) => {
+ const key = Object.keys(updates)[index];
+ return key !== "id" && key !== "user_id" && key !== "created_at";
+ });
+
+ const query = this.db.query(`
+ UPDATE tone_learning_settings
+ SET ${setClause}, updated_at = (strftime('%s', 'now') * 1000)
+ WHERE user_id = ?
+ RETURNING id, user_id, preferred_session_length, target_daily_reviews,
+ difficulty_preference, audio_enabled, tone_visualization_enabled,
+ auto_advance, created_at, updated_at
+ `);
+
+ return query.get(...values, userId) as UserSettings | null;
+ }
+
+ /**
+ * Get all users (admin function)
+ */
+ getAllUsers(): User[] {
+ const query = this.db.query(`
+ SELECT id, name, creds
+ FROM users
+ ORDER BY name
+ `);
+
+ return query.all() as User[];
+ }
+
+ /**
+ * Delete user (admin function)
+ */
+ deleteUser(userId: number): boolean {
+ const query = this.db.query(`
+ DELETE FROM users
+ WHERE id = ?
+ `);
+
+ const result = query.run(userId);
+ return result.changes > 0;
+ }
+}