diff options
| author | polwex <polwex@sortug.com> | 2025-11-23 01:12:53 +0700 |
|---|---|---|
| committer | polwex <polwex@sortug.com> | 2025-11-23 01:12:53 +0700 |
| commit | cb1b56f5a0eddbf77446f415f2beda57c8305f85 (patch) | |
| tree | d333ca5c143063af8ee1b2f9e2d1d25f8ef2007c /packages/db | |
wut
Diffstat (limited to 'packages/db')
| -rw-r--r-- | packages/db/.gitignore | 34 | ||||
| -rw-r--r-- | packages/db/CLAUDE.md | 106 | ||||
| -rw-r--r-- | packages/db/README.md | 15 | ||||
| -rw-r--r-- | packages/db/bun.lock | 25 | ||||
| -rw-r--r-- | packages/db/index.ts | 3 | ||||
| -rw-r--r-- | packages/db/package.json | 11 | ||||
| -rw-r--r-- | packages/db/src/index.ts | 253 | ||||
| -rw-r--r-- | packages/db/src/migration.ts | 430 | ||||
| -rw-r--r-- | packages/db/src/phonetics.ts | 523 | ||||
| -rw-r--r-- | packages/db/src/schema.sql | 729 | ||||
| -rw-r--r-- | packages/db/src/semantic.ts | 554 | ||||
| -rw-r--r-- | packages/db/src/srs.ts | 402 | ||||
| -rw-r--r-- | packages/db/src/test.ts | 149 | ||||
| -rw-r--r-- | packages/db/src/types.ts | 84 | ||||
| -rw-r--r-- | packages/db/src/users.ts | 305 | ||||
| -rw-r--r-- | packages/db/tsconfig.json | 29 |
16 files changed, 3652 insertions, 0 deletions
diff --git a/packages/db/.gitignore b/packages/db/.gitignore new file mode 100644 index 0000000..a14702c --- /dev/null +++ b/packages/db/.gitignore @@ -0,0 +1,34 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/packages/db/CLAUDE.md b/packages/db/CLAUDE.md new file mode 100644 index 0000000..1ee6890 --- /dev/null +++ b/packages/db/CLAUDE.md @@ -0,0 +1,106 @@ + +Default to using Bun instead of Node.js. + +- Use `bun <file>` instead of `node <file>` or `ts-node <file>` +- Use `bun test` instead of `jest` or `vitest` +- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild` +- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install` +- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>` +- Bun automatically loads .env, so don't use dotenv. + +## APIs + +- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`. +- `bun:sqlite` for SQLite. Don't use `better-sqlite3`. +- `Bun.redis` for Redis. Don't use `ioredis`. +- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`. +- `WebSocket` is built-in. Don't use `ws`. +- Prefer `Bun.file` over `node:fs`'s readFile/writeFile +- Bun.$`ls` instead of execa. + +## Testing + +Use `bun test` to run tests. + +```ts#index.test.ts +import { test, expect } from "bun:test"; + +test("hello world", () => { + expect(1).toBe(1); +}); +``` + +## Frontend + +Use HTML imports with `Bun.serve()`. Don't use `vite`. HTML imports fully support React, CSS, Tailwind. + +Server: + +```ts#index.ts +import index from "./index.html" + +Bun.serve({ + routes: { + "/": index, + "/api/users/:id": { + GET: (req) => { + return new Response(JSON.stringify({ id: req.params.id })); + }, + }, + }, + // optional websocket support + websocket: { + open: (ws) => { + ws.send("Hello, world!"); + }, + message: (ws, message) => { + ws.send(message); + }, + close: (ws) => { + // handle close + } + }, + development: { + hmr: true, + console: true, + } +}) +``` + +HTML files can import .tsx, .jsx or .js files directly and Bun's bundler will transpile & bundle automatically. `<link>` tags can point to stylesheets and Bun's CSS bundler will bundle. + +```html#index.html +<html> + <body> + <h1>Hello, world!</h1> + <script type="module" src="./frontend.tsx"></script> + </body> +</html> +``` + +With the following `frontend.tsx`: + +```tsx#frontend.tsx +import React from "react"; + +// import .css files directly and it works +import './index.css'; + +import { createRoot } from "react-dom/client"; + +const root = createRoot(document.body); + +export default function Frontend() { + return <h1>Hello, world!</h1>; +} + +root.render(<Frontend />); +``` + +Then, run index.ts + +```sh +bun --hot ./index.ts +``` + +For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`. diff --git a/packages/db/README.md b/packages/db/README.md new file mode 100644 index 0000000..94cf634 --- /dev/null +++ b/packages/db/README.md @@ -0,0 +1,15 @@ +# @sortug/sorlang-db + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.3.1. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. diff --git a/packages/db/bun.lock b/packages/db/bun.lock new file mode 100644 index 0000000..cedc049 --- /dev/null +++ b/packages/db/bun.lock @@ -0,0 +1,25 @@ +{ + "lockfileVersion": 1, + "workspaces": { + "": { + "name": "@sortug/sorlang-db", + "devDependencies": { + "@types/bun": "latest", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@types/bun": ["@types/bun@1.3.3", "", { "dependencies": { "bun-types": "1.3.3" } }, "sha512-ogrKbJ2X5N0kWLLFKeytG0eHDleBYtngtlbu9cyBKFtNL3cnpDZkNdQj8flVf6WTZUX5ulI9AY1oa7ljhSrp+g=="], + + "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], + + "bun-types": ["bun-types@1.3.3", "", { "dependencies": { "@types/node": "*" } }, "sha512-z3Xwlg7j2l9JY27x5Qn3Wlyos8YAp0kKRlrePAOjgjMGS5IG6E7Jnlx736vH9UVI4wUICwwhC9anYL++XeOgTQ=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="], + } +} diff --git a/packages/db/index.ts b/packages/db/index.ts new file mode 100644 index 0000000..ceecd52 --- /dev/null +++ b/packages/db/index.ts @@ -0,0 +1,3 @@ +import DB from "./src"; + +export default DB; diff --git a/packages/db/package.json b/packages/db/package.json new file mode 100644 index 0000000..a121b42 --- /dev/null +++ b/packages/db/package.json @@ -0,0 +1,11 @@ +{ + "name": "@sortug/sorlang-db", + "module": "index.ts", + "type": "module", + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5" + } +} diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts new file mode 100644 index 0000000..6a89ee2 --- /dev/null +++ b/packages/db/src/index.ts @@ -0,0 +1,253 @@ +import { Database } from "bun:sqlite"; +import type { FullWordDataDB } from "./types"; +import { SRSQueries } from "./srs"; + +export class Queries { + db: Database; + srs: SRSQueries; + constructor() { + const db = new Database("/home/y/code/bun/sorlang/bulkdata/unified.db"); + db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance + db.exec("PRAGMA foreign_keys = ON"); + db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB + db.exec("PRAGMA mmap_size = 30000000000"); + db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory + db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster + + this.db = db; + this.srs = new SRSQueries(db); + } + fetchExpressionById(id: number) { + const query = this.db.query( + ` + SELECT * FROM expressions WHERE id = ? + `, + ); + return query.get(id) as any; + } + fetchWordsByToneAndSyls1(tones: Array<string | null>) { + const toneString = tones + .reduce((acc: string, item) => { + if (!item) return `${acc},%`; + else return `${acc},${item}`; + }, "") + .slice(1); + const query = this.db.query( + ` + WITH word_tone_sequences AS ( + SELECT + wp.ipa, + GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq, + GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, + COUNT(sw.syl_id) as syllable_count + FROM word_phonetics wp + JOIN syllables_words sw ON wp.id = sw.word_id + JOIN syllables sy ON sw.syl_id = sy.id + JOIN tones t ON sy.tone = t.id + GROUP BY wp.ipa + ) + SELECT * + FROM word_tone_sequences + WHERE tone_sequence LIKE ? + AND syllable_count = ? + `, + ); + return query.all(toneString, tones.length) as any[]; + } + fetchWordsByToneAndSylsO(tones: Array<string | null>) { + const toneString = tones + .reduce((acc: string, item) => { + if (!item) return `${acc},%`; + else return `${acc},${item}`; + }, "") + .slice(1); + const query = this.db.query( + ` + WITH word_tone_sequences AS ( + SELECT + w.id as word_id, + w.spelling, + wp.ipa, + w.frequency, + GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq, + GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, + COUNT(sw.syl_id) as syllable_count + FROM expressions w + JOIN word_phonetics wp ON w.id = wp.word_id + JOIN syllables_words sw ON wp.id = sw.word_id + JOIN syllables sy ON sw.syl_id = sy.id + JOIN tones t ON sy.tone = t.id + GROUP BY w.id, w.spelling, w.lang, w.frequency + ) + SELECT word_id, + spelling, + ipa, + frequency, + syl_seq, + tone_sequence, + syllable_count + FROM word_tone_sequences + WHERE tone_sequence LIKE ? + AND syllable_count = ? + ORDER BY frequency ASC NULLS LAST; + `, + ); + // TODO combine with this old query to get both phonetic and semantic data + // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table + + return query.all(toneString, tones.length) as any[]; + } + fetchWordsByToneAndSyls(tones: Array<string | null>): FullWordDataDB[] { + const toneString = tones + .reduce((acc: string, item) => { + if (!item) return `${acc},%`; + else return `${acc},${item}`; + }, "") + .slice(1); + const query = this.db.query( + ` + WITH word_tone_sequences AS ( + SELECT + w.id as word_id, + w.spelling, + wp.ipa, + w.frequency, + GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq, + GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, + COUNT(sw.syl_id) as syllable_count, + (SELECT + json_group_array(json_object( + 'id', s.id, + 'pos', s.pos, + 'etymology', s.etymology, + 'confidence', s.confidence, + 'glosses', ( + SELECT json_group_array(ss.gloss) + FROM subsenses ss + WHERE ss.sid = s.id + ), + 'examples', ( + SELECT json_group_array(json_object( + 'example', ex.example, + 'ref', ex.ref + )) + FROM examples ex + WHERE ex.sid = s.id + ), + 'derivation', ( + SELECT json_group_array(json_object( + 'type', d.type, + 'text', d.text, + 'tags', d.tags + )) + FROM derivation d + WHERE d.sid = s.id + ), + 'categories', ( + SELECT json_group_array(wc.category) + FROM word_categories wc + WHERE wc.word_id = s.id + ) + )) + FROM senses s + WHERE s.parent_id = w.id + ) as senses_array + FROM expressions w + JOIN word_phonetics wp ON w.id = wp.word_id + JOIN syllables_words sw ON wp.id = sw.word_id + JOIN syllables sy ON sw.syl_id = sy.id + JOIN tones t ON sy.tone = t.id + GROUP BY w.id, w.spelling, w.lang, w.frequency + ) + SELECT * + FROM word_tone_sequences + WHERE tone_sequence LIKE ? + AND syllable_count = ? + ORDER BY frequency ASC NULLS LAST; + `, + ); + // TODO combine with this old query to get both phonetic and semantic data + // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table + + return query.all(toneString, tones.length) as any[]; + } + // TODO combine with this old query to get both phonetic and semantic data + // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table + + fetchSenses(spelling: string, lang: string) { + const query = this.db.query(` + WITH sense_data AS ( + SELECT + s.*, + GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data, + GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data, + GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data, + GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data + FROM senses s + LEFT JOIN subsenses ss ON ss.sid = s.id + LEFT JOIN examples ex ON ex.sid = s.id + LEFT JOIN derivation d ON d.sid = s.id + LEFT JOIN word_categories wc ON wc.word_id = s.id + GROUP BY s.id + ) + SELECT e.*, + (SELECT + json_group_array(json_object( + 'id', sd.id, + 'pos', sd.pos, + 'etymology', sd.etymology, + 'confidence', sd.confidence, + 'subsenses_data', sd.subsenses_data, + 'examples_data', sd.examples_data, + 'derivation_data', sd.derivation_data, + 'categories_data', sd.categories_data + )) + FROM sense_data sd + WHERE sd.parent_id = e.id + ) as senses_array + FROM expressions e + WHERE e.spelling = ? AND e.lang = ? + ORDER BY e.frequency DESC`); + return query.all(spelling, lang); + } +} +export default Queries; +// ` +// WITH word_tone_sequences AS ( +// SELECT +// w.id as word_id, +// w.spelling, +// wp.ipa, +// w.frequency, +// s.etymology, +// s.pos, +// GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq, +// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, +// // GROUP_CONCAT(t.name ORDER BY s.id) as senses, +// // GROUP_CONCAT(ex.example ORDER BY s.id) as examples, +// // GROUP_CONCAT(cat.category) as tags, +// COUNT(sw.syl_id) as syllable_count +// FROM expressions w +// JOIN word_phonetics wp ON w.id = wp.word_id +// JOIN syllables_words sw ON wp.id = sw.word_id +// JOIN syllables s ON sw.syl_id = s.id +// JOIN tones t ON s.tone = t.id +// JOIN senses s ON s.parent_id = w.id +// JOIN word_categories cat ON s.id = cat.word_id +// JOIN subsenses ss ON ss.sid = s.id +// JOIN examples ex ON ex.sid = s.id +// GROUP BY w.id, w.spelling, w.lang, w.frequency +// ) +// SELECT +// word_id, +// spelling, +// ipa, +// frequency, +// syl_seq, +// tone_sequence, +// syllable_count +// FROM word_tone_sequences +// WHERE tone_sequence LIKE ? +// AND syllable_count = ? +// ORDER BY frequency ASC NULLS LAST; +// ` diff --git a/packages/db/src/migration.ts b/packages/db/src/migration.ts new file mode 100644 index 0000000..c4150f8 --- /dev/null +++ b/packages/db/src/migration.ts @@ -0,0 +1,430 @@ +import { Database } from "bun:sqlite"; + +const PROSODY_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/prosody.db"; +const THAIPHON_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/thaiphon.db"; +const UNIFIED_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/unified.db"; + +async function migrateSenses() { + console.log("Starting database migration to unified.db...\n"); + const expressionIdMap: Map<number, number> = new Map(); + const senseIdMap: Map<number, number> = new Map(); + const wordExpressionMap: Map<number, number> = new Map(); + + // Open all databases + const prosodyDb = new Database(PROSODY_DB, { readonly: true }); + const thaiphonDb = new Database(THAIPHON_DB, { readonly: true }); + const unifiedDb = new Database(UNIFIED_DB); + + // Enable foreign keys and WAL mode for unified database + unifiedDb.run("PRAGMA foreign_keys = ON"); + unifiedDb.run("PRAGMA journal_mode = WAL"); + + try { + // Start transaction for atomic migration + unifiedDb.run("BEGIN TRANSACTION"); + + console.log("📦 Migrating from prosody.db..."); + + // 1. Migrate languages (combining both sources, preferring prosody.db) + console.log(" → Migrating categories..."); + // categories + const prosodyCats = prosodyDb + .query("SELECT * FROM categories") + .all() as any[]; + const insertCat = unifiedDb.prepare( + `INSERT INTO categories (name) VALUES (?) + ON CONFLICT(name) DO NOTHING + RETURNING rowid + `, + ); + for (const cat of prosodyCats) { + const cr = insertCat.run(cat.name.toLowerCase().trim()); + // console.log({ cr }); + if (cr.changes !== 1) console.log({ cr }); + } + console.log(` ✓ Migrated ${prosodyCats.length} categories`); + + console.log(" → Migrating languages..."); + const prosodyLangs = prosodyDb + .query("SELECT * FROM languages") + .all() as any[]; + + // Add thaiphon languages first + // Override with prosody languages (they have more complete data) + for (const lang of prosodyLangs) { + // Insert merged languages + const insertLang = unifiedDb.prepare( + "INSERT OR IGNORE INTO languages (code, name, native_name, iso6392) VALUES (?, ?, ?, ?)", + ); + + insertLang.run(lang.code, lang.name, lang.native_name, null); + } + console.log(` ✓ Migrated ${prosodyLangs.length} languages`); + + // 2. Migrate expressions from prosody.db + console.log(" → Migrating expressions..."); + const expressions = prosodyDb + .query("SELECT * FROM expressions") + .all() as any[]; + const insertExpression = unifiedDb.prepare( + "INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?)", + ); + + for (const expr of expressions) { + const ei = insertExpression.run( + expr.spelling, + expr.lang, + expr.frequency, + expr.type, + expr.notes, + ); + if (ei.changes > 0) + expressionIdMap.set(expr.id, Number(ei.lastInsertRowid)); + } + + const insertExample = unifiedDb.prepare( + "INSERT INTO examples(sid, example, ref) VALUES (?, ?, ?)", + ); + const insertSubsense = unifiedDb.prepare( + "INSERT INTO subsenses (sid, gloss) VALUES (?, ?)", + ); + const insertDerivation = unifiedDb.prepare( + "INSERT INTO derivation (sid, type, text) VALUES (?, ?, ?)", + ); + const insertCatw = unifiedDb.prepare( + "INSERT OR IGNORE INTO word_categories(category, word_id) VALUES (?, ?)", + ); + const insertWp = unifiedDb.prepare( + "INSERT INTO word_phonetics(word_id, sense_id, ipa, ipa_sequence, syllable_count, syllable_sequence, stressed, tone_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ); + console.log(" → Migrating senses..."); + const senses = prosodyDb.query("SELECT * FROM senses").all() as any[]; + const insertSense = unifiedDb.prepare( + "INSERT INTO senses (parent_id, spelling, pos, etymology, confidence) VALUES (?, ?, ?, ?, ?)", + ); + + for (const sense of senses) { + const parentId = expressionIdMap.get(sense.parent_id); + const senseRow = insertSense.run( + parentId, + sense.spelling, + sense.pos, + sense.etymology, + sense.confidence, + ); + const senseId = senseRow.lastInsertRowid; + senseIdMap.set(sense.id, Number(senseId)); + const deriv = ( + name: string, + type: Array<{ word: string }> | undefined, + ) => { + if (type) + for (const t of type) insertDerivation.run(senseId, name, t.word); + }; + const subsenses = JSON.parse(sense.senses); + for (const ss of subsenses) { + const { + examples, + glosses, + tags, + synonyms, + antonyms, + hypernyms, + meronyms, + hyponyms, + holonyms, + } = ss; + if (examples) + for (const ex of examples) { + const ref = ex.ref || null; + + if (ex.text) insertExample.run(senseId, ex.text, ref); + // else console.log("bad example", ex); + } + if (glosses) + for (const gloss of glosses) insertSubsense.run(senseId, gloss); + if (tags) + for (const tag of tags) { + const lw = tag.toLowerCase().trim(); + insertCat.run(lw); + insertCatw.run(lw, senseId); + } + if (synonyms) deriv("synonym", synonyms); + if (antonyms) deriv("antonym", antonyms); + if (hypernyms) deriv("hypernym", hypernyms); + if (meronyms) deriv("meronym", meronyms); + if (hyponyms) deriv("hyponym", hyponyms); + if (holonyms) deriv("holonym", holonyms); + } + const prosody = JSON.parse(sense.prosody); + const stressed = prosody?.stressedSyllable || null; + if (!sense.ipa) { + console.log("no ipa", sense); + continue; + } + const ipa = JSON.parse(sense.ipa); + for (const ip of ipa) { + insertWp.run( + parentId, + senseId, + ip.ipa, + ip.ipa, + 0, + "", + stressed, + "", + ip.tag, + null, + ); + } + } + console.log(` ✓ Migrated ${senses.length} senses`); + + console.log("\n📦 Migrating from thaiphon.db..."); + + // 6. Migrate phonetic components from thaiphon.db + console.log(" → Migrating phonetic components..."); + + // Tones + const tones = thaiphonDb.query("SELECT * FROM tones").all() as any; + const insertTone = unifiedDb.prepare( + "INSERT INTO tones (id, ipa, lang, name, nums) VALUES (?, ?, ?, ?, ?)", + ); + for (const tone of tones) { + insertTone.run(tone.id, tone.ipa, tone.lang, tone.name, tone.nums); + } + console.log(` ✓ Migrated ${tones.length} tones`); + + // Onsets + const onsets = thaiphonDb.query("SELECT * FROM onsets").all() as any; + const insertOnset = unifiedDb.prepare( + "INSERT INTO onsets (id, ipa, text, lang) VALUES (?, ?, ?, ?)", + ); + for (const onset of onsets) { + insertOnset.run(onset.id, onset.ipa, onset.text, onset.lang); + } + console.log(` ✓ Migrated ${onsets.length} onsets`); + + // Medials + const medials = thaiphonDb.query("SELECT * FROM medials").all() as any; + const insertMedial = unifiedDb.prepare( + "INSERT INTO medials (id, ipa, text, lang) VALUES (?, ?, ?, ?)", + ); + for (const medial of medials) { + insertMedial.run(medial.id, medial.ipa, medial.text, medial.lang) as any; + } + console.log(` ✓ Migrated ${medials.length} medials`); + + // Nucleus + const nucleus = thaiphonDb.query("SELECT * FROM nucleus").all() as any; + const insertNucleus = unifiedDb.prepare( + "INSERT INTO nucleus (id, ipa, text, lang) VALUES (?, ?, ?, ?)", + ); + for (const nucl of nucleus) { + insertNucleus.run(nucl.id, nucl.ipa, nucl.text, nucl.lang); + } + console.log(` ✓ Migrated ${nucleus.length} nucleus`); + + // Codas + const codas = thaiphonDb.query("SELECT * FROM codas").all() as any; + const insertCoda = unifiedDb.prepare( + "INSERT INTO codas (id, ipa, text, lang) VALUES (?, ?, ?, ?)", + ); + for (const coda of codas) { + insertCoda.run(coda.id, coda.ipa, coda.text, coda.lang) as any; + } + console.log(` ✓ Migrated ${codas.length} codas`); + + // Rhymes + const rhymes = thaiphonDb.query("SELECT * FROM rhymes").all() as any; + const insertRhyme = unifiedDb.prepare( + "INSERT INTO rhymes (id, ipa, text, lang) VALUES (?, ?, ?, ?)", + ); + for (const rhyme of rhymes) { + insertRhyme.run(rhyme.id, rhyme.ipa, rhyme.text, rhyme.lang); + } + console.log(` ✓ Migrated ${rhymes.length} rhymes`); + + // 7. Migrate syllables from thaiphon.db + console.log(" → Migrating syllables..."); + const syllables = thaiphonDb.query("SELECT * FROM syllables").all() as any; + const insertSyllable = unifiedDb.prepare( + "INSERT INTO syllables (id, lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + ); + for (const syl of syllables) { + insertSyllable.run( + syl.id, + syl.lang, + syl.ipa, + syl.long, + syl.text, + syl.onset, + syl.medial, + syl.nucleus, + syl.coda, + syl.rhyme, + syl.tone, + syl.notes, + ); + } + console.log(` ✓ Migrated ${syllables.length} syllables`); + + // 8. Migrate words from thaiphon.db (as expressions if not exists) + console.log(" → Migrating words as expressions..."); + const words = thaiphonDb.query("SELECT * FROM words").all() as any[]; + const insertWordAsExpr = unifiedDb.query( + `INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?) + ON CONFLICT(spelling, lang) DO UPDATE SET spelling = excluded.spelling + RETURNING rowid`, + ); + + // Also keep track for syllables_words mapping + let prev: any = null; + for (const word of words) { + const result = insertWordAsExpr.get( + word.spelling, + word.lang, + word.frequency, + "word", + word.notes, + ) as { id: number }; + // 41085 + console.log({ result }); + // if (prev === result.id) { + // console.log(word, result); + // throw new Error("fucked up again"); + // } + // prev = result.id; + wordExpressionMap.set(word.id, result.id); + } + console.log(` ✓ Processed ${words.length} words`); + + console.log(" → Migrating word phonetics..."); + const wordPhonetics = thaiphonDb + .query("SELECT * FROM word_phonetics") + .all() as any[]; + const insertWordPhonetic = unifiedDb.prepare( + "INSERT INTO word_phonetics (word_id, sense_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", + ); + // TODO find the sense id later I guess + const wpMap: Map<number, number> = new Map(); + for (const wp of wordPhonetics) { + const newWordId = wordExpressionMap.get(wp.word_id); + if (newWordId) { + const wr = insertWordPhonetic.run( + newWordId, + null, + wp.ipa, + wp.syllable_count, + wp.syllable_sequence, + wp.tone_sequence, + wp.ipa_sequence, + wp.tag, + wp.notes, + ); + if (wr.changes !== 0) wpMap.set(wp.word_id, Number(wr.lastInsertRowid)); + } + } + + // 9. Migrate syllables_words junction + console.log(" → Migrating syllables_words..."); + const sylWords = thaiphonDb + .query("SELECT * FROM syllables_words") + .all() as any; + const insertSylWord = unifiedDb.prepare( + "INSERT OR IGNORE INTO syllables_words (syl_id, word_id, idx, stressed) VALUES (?, ?, ?, ?)", + ); + for (const sw of sylWords) { + const newWordId = wpMap.get(sw.word_id); + if (newWordId) + insertSylWord.run(sw.syl_id, newWordId, sw.idx, sw.stressed); + } + console.log( + ` ✓ Migrated ${sylWords.length} syllable-word relationships`, + ); + + unifiedDb.run("COMMIT"); + + console.log("\n✅ Migration completed successfully!"); + console.log(" Unified database is ready at:", UNIFIED_DB); + } catch (error) { + console.error("\n❌ Migration failed:", error); + unifiedDb.run("ROLLBACK"); + throw error; + } finally { + // Close all databases + prosodyDb.close(); + // thaiphonDb.close(); + unifiedDb.close(); + } +} + +// Run the migration +migrateSenses(); +// migrateTones(); +// test(); +// + +async function test() { + console.log("Starting database migration to unified.db...\n"); + // Open all databases + const prosodyDb = new Database(PROSODY_DB, { readonly: true }); + const thaiphonDb = new Database(THAIPHON_DB, { readonly: true }); + const unifiedDb = new Database(UNIFIED_DB, { readonly: true }); + + // Enable foreign keys and WAL mode for unified database + unifiedDb.run("PRAGMA foreign_keys = ON"); + unifiedDb.run("PRAGMA journal_mode = WAL"); + const oldE = prosodyDb.query("SELECT COUNT(*) FROM expressions").get(); + const oldS = prosodyDb.query("SELECT COUNT(*) FROM senses").get(); + const oldW = thaiphonDb.query("SELECT COUNT(*) FROM words").get(); + const oldWp = thaiphonDb.query("SELECT COUNT(*) FROM word_phonetics").get(); + const oldSw = thaiphonDb.query("SELECT COUNT(*) FROM syllables_words").get(); + const oldSy = thaiphonDb.query("SELECT COUNT(*) FROM syllables").get(); + const oldTo = thaiphonDb.query("SELECT COUNT(*) FROM tones").get(); + const oldCod = thaiphonDb.query("SELECT COUNT(*) FROM codas").get(); + const oldNuc = thaiphonDb.query("SELECT COUNT(*) FROM nucleus").get(); + const oldOns = thaiphonDb.query("SELECT COUNT(*) FROM onsets").get(); + const oldRhy = thaiphonDb.query("SELECT COUNT(*) FROM rhymes").get(); + const oldWrh = thaiphonDb.query("SELECT COUNT(*) FROM word_rhymes").get(); + const oldWry = thaiphonDb.query("SELECT COUNT(*) FROM words_wrhymes").get(); + const newSw = unifiedDb.query("SELECT COUNT(*) FROM syllables_words").get(); + const newSy = unifiedDb.query("SELECT COUNT(*) FROM syllables").get(); + const newTo = unifiedDb.query("SELECT COUNT(*) FROM tones").get(); + const newCod = unifiedDb.query("SELECT COUNT(*) FROM codas").get(); + const newNuc = unifiedDb.query("SELECT COUNT(*) FROM nucleus").get(); + const newOns = unifiedDb.query("SELECT COUNT(*) FROM onsets").get(); + const newRhy = unifiedDb.query("SELECT COUNT(*) FROM rhymes").get(); + const newWrh = unifiedDb.query("SELECT COUNT(*) FROM word_rhymes").get(); + const newWry = unifiedDb.query("SELECT COUNT(*) FROM words_wrhymes").get(); + const newE = unifiedDb.query("SELECT COUNT(*) FROM expressions").get(); + const newS = unifiedDb.query("SELECT COUNT(*) FROM senses").get(); + const newWp = unifiedDb.query("SELECT COUNT(*) FROM word_phonetics").get(); + console.log({ + oldE, + oldS, + oldW, + oldWp, + newE, + newS, + newWp, + oldSw, + oldSy, + oldTo, + oldCod, + oldNuc, + oldOns, + oldRhy, + oldWrh, + oldWry, + newSw, + newSy, + newTo, + newCod, + newNuc, + newOns, + newRhy, + newWrh, + newWry, + }); +} diff --git a/packages/db/src/phonetics.ts b/packages/db/src/phonetics.ts new file mode 100644 index 0000000..cf62434 --- /dev/null +++ b/packages/db/src/phonetics.ts @@ -0,0 +1,523 @@ +import { Database } from 'bun:sqlite'; + +export interface Syllable { + id: number; + lang: string; + ipa: string; + long: number; + text: string; + onset: number; + medial: number; + nucleus: number; + coda: number; + rhyme: number; + tone: number; + notes?: string; +} + +export interface Tone { + id: number; + ipa: string; + lang: string; + name: string; + nums: number; +} + +export interface Onset { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Medial { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Nucleus { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Coda { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Rhyme { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface WordPhonetics { + id?: number; + word_id: number; + ipa: string; + syllable_count: number; + syllable_sequence: string; + tone_sequence: string; + ipa_sequence: string; + tag?: string; + notes?: string; +} + +export interface WordRhyme { + id: number; + text: string; + lang: string; + notes?: string; +} + +export interface Idiom { + id: number; + spelling: string; + lang: string; + frequency?: number; +} + +export interface SyllableWordMapping { + syl_id: number; + word_id: number; + idx: number; + stressed?: number; +} + +export class PhoneticsQueries { + constructor(private db: Database) {} + + // Tone operations + getTones(lang?: string): Tone[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE lang = ? + ORDER BY nums + `); + return query.all(lang) as Tone[]; + } else { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + ORDER BY lang, nums + `); + return query.all() as Tone[]; + } + } + + getToneById(id: number): Tone | null { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE id = ? + `); + return query.get(id) as Tone | null; + } + + getToneByName(name: string, lang: string): Tone | null { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE name = ? AND lang = ? + `); + return query.get(name, lang) as Tone | null; + } + + // Syllable component operations + getOnsets(lang?: string): Onset[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM onsets + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Onset[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM onsets + ORDER BY lang, text + `); + return query.all() as Onset[]; + } + } + + getMedials(lang?: string): Medial[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM medials + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Medial[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM medials + ORDER BY lang, text + `); + return query.all() as Medial[]; + } + } + + getNucleus(lang?: string): Nucleus[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM nucleus + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Nucleus[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM nucleus + ORDER BY lang, text + `); + return query.all() as Nucleus[]; + } + } + + getCodas(lang?: string): Coda[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM codas + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Coda[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM codas + ORDER BY lang, text + `); + return query.all() as Coda[]; + } + } + + getRhymes(lang?: string): Rhyme[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM rhymes + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Rhyme[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM rhymes + ORDER BY lang, text + `); + return query.all() as Rhyme[]; + } + } + + // Complete syllable operations + getSyllables(lang?: string, tone?: number): Syllable[] { + if (lang && tone !== undefined) { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.lang = ? AND s.tone = ? + ORDER BY s.text + `); + return query.all(lang, tone) as Syllable[]; + } else if (lang) { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.lang = ? + ORDER BY s.text, s.tone + `); + return query.all(lang) as Syllable[]; + } else { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + ORDER BY s.lang, s.text, s.tone + `); + return query.all() as Syllable[]; + } + } + + getSyllableById(id: number): Syllable | null { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.id = ? + `); + return query.get(id) as Syllable | null; + } + + // Word phonetics operations + getWordPhonetics(wordId: number): WordPhonetics | null { + const query = this.db.query(` + SELECT id, word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes + FROM word_phonetics + WHERE word_id = ? + `); + return query.get(wordId) as WordPhonetics | null; + } + + getWordsByTonePattern(toneSequence: string, syllableCount?: number, limit: number = 50): number[] { + if (syllableCount !== undefined) { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE tone_sequence = ? AND syllable_count = ? + ORDER BY id + LIMIT ? + `); + const results = query.all(toneSequence, syllableCount, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } else { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE tone_sequence = ? + ORDER BY syllable_count, id + LIMIT ? + `); + const results = query.all(toneSequence, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + } + + getWordsBySyllablePattern(syllableSequence: string, limit: number = 50): number[] { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE syllable_sequence = ? + ORDER BY id + LIMIT ? + `); + const results = query.all(syllableSequence, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + searchWordsByPhoneticPattern(pattern: { + toneSequence?: string; + syllableCount?: number; + minSyllableCount?: number; + maxSyllableCount?: number; + limit?: number; + }): number[] { + let conditions: string[] = []; + let params: any[] = []; + + if (pattern.toneSequence) { + conditions.push('tone_sequence LIKE ?'); + params.push(`%${pattern.toneSequence}%`); + } + + if (pattern.syllableCount !== undefined) { + conditions.push('syllable_count = ?'); + params.push(pattern.syllableCount); + } + + if (pattern.minSyllableCount !== undefined) { + conditions.push('syllable_count >= ?'); + params.push(pattern.minSyllableCount); + } + + if (pattern.maxSyllableCount !== undefined) { + conditions.push('syllable_count <= ?'); + params.push(pattern.maxSyllableCount); + } + + const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; + const limit = pattern.limit || 50; + params.push(limit); + + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + ${whereClause} + ORDER BY syllable_count, id + LIMIT ? + `); + + const results = query.all(...params) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Word-syllable mapping operations + getSyllablesForWord(wordId: number): SyllableWordMapping[] { + const query = this.db.query(` + SELECT sw.syl_id, sw.word_id, sw.idx, sw.stressed, + s.text as syllable_text, s.ipa as syllable_ipa, s.tone, s.long, + t.name as tone_name + FROM syllables_words sw + JOIN syllables s ON sw.syl_id = s.id + LEFT JOIN tones t ON s.tone = t.id + WHERE sw.word_id = ? + ORDER BY sw.idx + `); + return query.all(wordId) as SyllableWordMapping[]; + } + + getWordsForSyllable(syllableId: number): number[] { + const query = this.db.query(` + SELECT DISTINCT word_id + FROM syllables_words + WHERE syl_id = ? + ORDER BY word_id + `); + const results = query.all(syllableId) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Rhyme operations + getWordRhymes(wordId: number): WordRhyme[] { + const query = this.db.query(` + SELECT wr.id, wr.text, wr.lang, wr.notes + FROM word_rhymes wr + JOIN words_wrhymes wwr ON wr.id = wwr.wrhyme_id + WHERE wwr.word_id = ? + `); + return query.all(wordId) as WordRhyme[]; + } + + getWordsByRhyme(rhymeText: string, lang: string, limit: number = 50): number[] { + const query = this.db.query(` + SELECT DISTINCT ww.word_id + FROM word_rhymes wr + JOIN words_wrhymes ww ON wr.id = ww.wrhyme_id + WHERE wr.text = ? AND wr.lang = ? + LIMIT ? + `); + const results = query.all(rhymeText, lang, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Idiom operations + getIdioms(lang?: string): Idiom[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency + FROM idioms + WHERE lang = ? + ORDER BY frequency DESC, spelling + `); + return query.all(lang) as Idiom[]; + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency + FROM idioms + ORDER BY lang, frequency DESC, spelling + `); + return query.all() as Idiom[]; + } + } + + getIdiomsForWord(wordId: number): Idiom[] { + const query = this.db.query(` + SELECT i.id, i.spelling, i.lang, i.frequency + FROM idioms i + JOIN words_idioms wi ON i.id = wi.idiom_id + WHERE wi.word_id = ? + ORDER BY i.frequency DESC, i.spelling + `); + return query.all(wordId) as Idiom[]; + } + + // Phonetic statistics and analysis + getPhoneticStats(lang: string) { + const query = this.db.query(` + SELECT + COUNT(DISTINCT s.id) as total_syllables, + COUNT(DISTINCT s.tone) as unique_tones, + AVG(s.long) as avg_vowel_length, + COUNT(DISTINCT s.onset) as unique_onsets, + COUNT(DISTINCT s.medial) as unique_medials, + COUNT(DISTINCT s.nucleus) as unique_nucleus, + COUNT(DISTINCT s.coda) as unique_codas, + COUNT(DISTINCT s.rhyme) as unique_rhymes, + COUNT(DISTINCT wp.id) as total_word_phonetics, + AVG(wp.syllable_count) as avg_syllables_per_word + FROM syllables s + LEFT JOIN word_phonetics wp ON 1=1 + WHERE s.lang = ? + `); + return query.get(lang); + } + + getToneDistribution(lang: string) { + const query = this.db.query(` + SELECT + t.name as tone_name, + t.nums as tone_number, + COUNT(s.id) as syllable_count, + ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM syllables WHERE lang = ?), 2) as percentage + FROM syllables s + JOIN tones t ON s.tone = t.id + WHERE s.lang = ? + GROUP BY t.id, t.name, t.nums + ORDER BY t.nums + `); + return query.all(lang, lang); + } + + getSyllableComplexityStats(lang: string) { + const query = this.db.query(` + SELECT + syllable_count, + COUNT(*) as word_count, + ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM word_phonetics), 2) as percentage + FROM word_phonetics wp + JOIN expressions e ON wp.word_id = e.id + WHERE e.lang = ? + GROUP BY syllable_count + ORDER BY syllable_count + `); + return query.all(lang); + } +}
\ No newline at end of file diff --git a/packages/db/src/schema.sql b/packages/db/src/schema.sql new file mode 100644 index 0000000..c4a7c76 --- /dev/null +++ b/packages/db/src/schema.sql @@ -0,0 +1,729 @@ +/** + * UNIFIED DATABASE SCHEMA FOR THAI LANGUAGE LEARNING APPLICATION + * + * This file consolidates 3 redundant database schemas: + * - schema.sql (main schema with courses and SRS) + * - prosodyschema.sql (phonetic analysis with syllable breakdown) + * - senseschema.sql (semantic analysis with subsenses and derivations) + * + * REDUNDANCY ANALYSIS: + * ==================== + * + ** MAJOR CONFLICTS RESOLVED: + * 1. Languages table: + * - schema.sql: uses 'code' as PRIMARY KEY + * - prosodyschema.sql: uses 'iso6392' as PRIMARY KEY + * - RESOLVED: Unified with both code systems supported + * + * 2. Senses table: + * - schema.sql: includes ipa/prosody JSONB fields + * - senseschema.sql: missing phonetic fields + * - RESOLVED: Enhanced version with all fields + * + * 3. Word/Expression entities: + * - schema.sql: uses 'expressions' table + * - prosodyschema.sql: uses 'words' table + * - RESOLVED: Standardized on 'expressions' terminology + * + * 4. Categories tables: + * - EXACT DUPLICATES in schema.sql and senseschema.sql + * - RESOLVED: Keep one instance, remove duplicate + * + * UNIQUE FEATURES PRESERVED: + * ======================== + * + * FROM schema.sql (Main Course Learning): + * - User management: users, cookies + * - Course structure: lessons, cards, cards_lessons, cards_expressions + * - SRS tracking: user_progress, attempts + * - Bookmarks and user features + * + * FROM prosodyschema.sql (Phonetic Analysis): + * - Detailed syllable breakdown: tones, onsets, medials, nucleus, codas, rhymes + * - Phonetic patterns: word_phonetics, syllables_words + * - Rhyme analysis: word_rhymes, words_wrhymes + * + * FROM senseschema.sql (Semantic Analysis): + * - Detailed definitions: subsenses + * - Etymology tracking: derivation + * + * FOREIGN KEY RELATIONSHIPS: + * ========================= + * - Updated all references from 'words.id' to 'expressions.id' + * - Unified language references to use languages.code + * - Maintained proper cascade relationships + * + * BENEFITS OF UNIFICATION: + * ======================= + * 1. Single source of truth for database structure + * 2. Eliminated conflicting table definitions + * 3. Preserved all unique functionality + * 4. Improved foreign key consistency + * 5. Better support for comprehensive Thai language analysis + */ + +-- Enable foreign key support and performance optimizations +PRAGMA foreign_keys = ON; +PRAGMA journal_mode = WAL; +PRAGMA cache_size = -2000; +PRAGMA mmap_size = 30000000000; + +/** + * UNIFIED LANGUAGES TABLE + * + * RESOLVES CONFLICT between: + * - schema.sql: code (PRIMARY KEY), name, native_name + * - prosodyschema.sql: iso6392 (PRIMARY KEY), english + * + * NOW SUPPORTS multiple language code systems for maximum compatibility + */ +CREATE TABLE IF NOT EXISTS languages ( + code TEXT PRIMARY KEY, -- Primary language code (ISO 639-1 preferred) + name TEXT NOT NULL, -- English name + native_name TEXT, -- Native name + iso6392 TEXT UNIQUE, -- ISO 639-2 code alternative + CONSTRAINT name_unique UNIQUE (name) +); + +/** + * CORE CONTENT TABLES + * Standardized on 'expressions' terminology from schema.sql + * Enhanced with fields from both schemas + */ + +-- Main expressions table (formerly 'words' in prosodyschema.sql) +-- UNIFIED from schema.sql expressions and prosodyschema.sql words +CREATE TABLE IF NOT EXISTS expressions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + spelling TEXT NOT NULL, + lang TEXT NOT NULL, + frequency INTEGER, + type TEXT NOT NULL, -- word | idiom | w/e + notes TEXT, -- Additional notes (from prosodyschema.sql) + FOREIGN KEY (lang) REFERENCES languages(code), + CONSTRAINT spell_unique UNIQUE (spelling, lang) +); + +-- Enhanced senses table with phonetic capabilities +-- MERGED from schema.sql senses (with ipa/prosody) and senseschema.sql senses +CREATE TABLE IF NOT EXISTS senses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + parent_id INTEGER NOT NULL, + spelling TEXT NOT NULL, + pos TEXT, + etymology TEXT, + confidence INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (parent_id) REFERENCES expressions(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS categories ( + name TEXT PRIMARY KEY +); + +-- Word-category relationships +CREATE TABLE IF NOT EXISTS word_categories ( + word_id INTEGER NOT NULL, + category TEXT NOT NULL, + PRIMARY KEY (word_id, category), + FOREIGN KEY (word_id) REFERENCES senses(id) ON DELETE CASCADE, + FOREIGN KEY (category) REFERENCES categories(name) ON DELETE CASCADE +); + +-- Semantic analysis tables (FROM senseschema.sql) +CREATE TABLE IF NOT EXISTS examples( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sid INTEGER NOT NULL, + example TEXT NOT NULL, -- Example sentence + ref TEXT, -- source of the quote, llm if generated etc. + FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE +); +CREATE TABLE IF NOT EXISTS subsenses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sid INTEGER NOT NULL, + gloss TEXT NOT NULL, -- Definition/explanation + FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE +); + +CREATE TABLE IF NOT EXISTS derivation ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + sid INTEGER NOT NULL, + type TEXT NOT NULL, -- Type of derivation (prefix, suffix, compound, etc.) + text TEXT NOT NULL, -- Derivative text + tags JSONB, -- Additional metadata + FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE +); + +/** + * PHONETIC ANALYSIS TABLES (FROM prosodyschema.sql) + * + * Comprehensive syllable breakdown system for Thai language analysis + * These tables provide detailed phonetic analysis beyond basic JSONB fields + */ + + +-- Syllable component tables +CREATE TABLE IF NOT EXISTS tones ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + lang TEXT NOT NULL, + name TEXT NOT NULL, + nums INTEGER NOT NULL, -- Tone number (1-5 for Thai) + CONSTRAINT tone_unique UNIQUE (ipa, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS onsets ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + text TEXT NOT NULL, + lang TEXT NOT NULL, + CONSTRAINT onset_unique UNIQUE (ipa, text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS medials ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + text TEXT NOT NULL, + lang TEXT NOT NULL, + CONSTRAINT medial_unique UNIQUE (ipa, text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS nucleus ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + text TEXT NOT NULL, + lang TEXT NOT NULL, + CONSTRAINT nucleus_unique UNIQUE (ipa, text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS codas ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + text TEXT NOT NULL, + lang TEXT NOT NULL, + CONSTRAINT coda_unique UNIQUE (ipa, text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS rhymes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + ipa TEXT NOT NULL, + text TEXT NOT NULL, + lang TEXT NOT NULL, + CONSTRAINT rhyme_unique UNIQUE (ipa, text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +-- Complete syllable breakdown (UNIQUE to prosodyschema.sql) +CREATE TABLE IF NOT EXISTS syllables ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + lang TEXT NOT NULL, + ipa TEXT NOT NULL, + long INTEGER NOT NULL, -- Vowel length (0=short, 1=long) + text TEXT NOT NULL, + onset INTEGER NOT NULL, + medial INTEGER NOT NULL, + nucleus INTEGER NOT NULL, + coda INTEGER NOT NULL, + rhyme INTEGER NOT NULL, + tone INTEGER NOT NULL, + notes TEXT, + FOREIGN KEY (lang) REFERENCES languages(code), + FOREIGN KEY (onset) REFERENCES onsets(id), + FOREIGN KEY (medial) REFERENCES medials(id), + FOREIGN KEY (nucleus) REFERENCES nucleus(id), + FOREIGN KEY (coda) REFERENCES codas(id), + FOREIGN KEY (rhyme) REFERENCES rhymes(id), + FOREIGN KEY (tone) REFERENCES tones(id), + CONSTRAINT syllable_unique UNIQUE (text, ipa, lang) +); + +-- Phonetic pattern storage (UNIQUE to prosodyschema.sql) +CREATE TABLE IF NOT EXISTS word_phonetics ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + word_id INTEGER NOT NULL, + sense_id INTEGER, + ipa TEXT NOT NULL, + ipa_sequence TEXT NOT NULL, -- IPA representation sequence + syllable_count INTEGER NOT NULL, + syllable_sequence TEXT NOT NULL, -- Comma-separated syllables + stressed INTEGER, -- index of stressed syllable + tone_sequence TEXT, -- Comma-separated tones + tag JSONB, -- Pattern/usage tag + notes TEXT, + FOREIGN KEY (word_id) REFERENCES expressions(id), + FOREIGN KEY (sense_id) REFERENCES senses(id) +); + +-- Rhyme analysis tables (UNIQUE to prosodyschema.sql) +CREATE TABLE IF NOT EXISTS word_rhymes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + text TEXT NOT NULL, + lang TEXT NOT NULL, + notes TEXT, + CONSTRAINT wrhyme_unique UNIQUE (text, lang), + FOREIGN KEY (lang) REFERENCES languages(code) +); + +CREATE TABLE IF NOT EXISTS words_wrhymes ( + word_id INTEGER NOT NULL, + wrhyme_id INTEGER NOT NULL, + FOREIGN KEY (word_id) REFERENCES word_phonetics(id), + FOREIGN KEY (wrhyme_id) REFERENCES word_rhymes(id), + PRIMARY KEY (word_id, wrhyme_id) +); + +-- Junction tables for relationships + +-- Expressions to syllables mapping +CREATE TABLE IF NOT EXISTS syllables_words ( + syl_id INTEGER NOT NULL, + word_id INTEGER NOT NULL, + idx INTEGER NOT NULL, -- Position in word + stressed INTEGER, -- Stress accent (0=none, 1=stressed) + FOREIGN KEY (syl_id) REFERENCES syllables(id), + FOREIGN KEY (word_id) REFERENCES word_phonetics(id), + PRIMARY KEY (syl_id, word_id, idx) +); + +/** + * COURSE AND LEARNING TABLES (FROM schema.sql) + * + * Complete course structure and lesson management system + */ + +-- User management +CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + creds TEXT NOT NULL, + CONSTRAINT name_unique UNIQUE (name) +); + +CREATE TABLE sessions ( + id TEXT PRIMARY KEY, -- Session ID/token (usually UUID or random hash) + user_id INTEGER NOT NULL, + created_at INTEGER NOT NULL, -- Timestamp when created + expires_at INTEGER NOT NULL, -- Timestamp when expires + last_activity INTEGER, -- Last activity timestamp + ip_address TEXT, -- Optional: track IP + user_agent TEXT, -- Optional: track browser/client + data JSONB, -- Optional: session-specific data + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE +); + +CREATE INDEX idx_sessions_user ON sessions(user_id); +CREATE INDEX idx_sessions_expires ON sessions(expires_at); + + -- The separation is cleaner: + -- - sessions table - Server-side session management + -- - cookies are just the client-side token that references the session ID + + -- Some apps also add: + -- - refresh_tokens table for JWT refresh tokens + -- - remember_tokens table for "remember me" functionality + -- - active_sessions view for currently valid sessions + + -- The cookie itself just stores the session ID, while all the actual session data lives in the database. This is more secure and gives you better control over session + -- invalidation. +-- Course structure +CREATE TABLE IF NOT EXISTS lessons ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + position INTEGER NOT NULL DEFAULT 0, + description TEXT +); + +CREATE TABLE IF NOT EXISTS lang_lessons ( + lesson_id INTEGER NOT NULL, + lang TEXT NOT NULL, + PRIMARY KEY (lang, lesson_id), + FOREIGN KEY (lang) REFERENCES languages(code), + FOREIGN KEY (lesson_id) REFERENCES lessons(id) +); + +CREATE TABLE IF NOT EXISTS cards ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + text TEXT NOT NULL, + note TEXT +); + +CREATE TABLE IF NOT EXISTS cards_expressions ( + expression_id INTEGER NOT NULL, + card_id INTEGER NOT NULL, + PRIMARY KEY (card_id, expression_id), + FOREIGN KEY (card_id) REFERENCES cards(id), + FOREIGN KEY (expression_id) REFERENCES expressions(id) +); + +CREATE TABLE IF NOT EXISTS cards_lessons ( + lesson_id INTEGER, + card_id INTEGER NOT NULL, + PRIMARY KEY (card_id, lesson_id), + FOREIGN KEY (card_id) REFERENCES cards(id), + FOREIGN KEY (lesson_id) REFERENCES lessons(id) +); + + +/** + * USER PROGRESS AND SRS TRACKING (FROM schema.sql) + * + * Complete spaced repetition system with attempt tracking + */ + +CREATE TABLE IF NOT EXISTS user_progress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + card_id INTEGER NOT NULL, + repetition_count INTEGER DEFAULT 0, + ease_factor REAL DEFAULT 2.5, + interval INTEGER DEFAULT 1, + next_review_date INTEGER, + last_reviewed INTEGER, + is_mastered BOOLEAN DEFAULT FALSE, + FOREIGN KEY (user_id) REFERENCES users(id), + FOREIGN KEY (card_id) REFERENCES cards(id), + CONSTRAINT progress_unique UNIQUE (user_id, card_id) +); + +CREATE TABLE IF NOT EXISTS attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + timestamp INTEGER NOT NULL, + card_id INTEGER NOT NULL, + good INTEGER NOT NULL, -- 0 or 1 for success/failure + FOREIGN KEY (user_id) REFERENCES users(id), + FOREIGN KEY (card_id) REFERENCES cards(id) +); + +/** + * USER FEATURES (FROM schema.sql) + */ + +CREATE TABLE IF NOT EXISTS bookmarks ( + word_id INTEGER NOT NULL, + user_id INTEGER NOT NULL, + notes TEXT, + created INTEGER NOT NULL, + PRIMARY KEY (word_id, user_id), + FOREIGN KEY (word_id) REFERENCES expressions(id), + FOREIGN KEY (user_id) REFERENCES users(id) +); + +/** + * INDEXES FOR PERFORMANCE OPTIMIZATION + * + * Comprehensive indexes based on analysis of query patterns + * from all three schemas + */ + +-- Language and expression indexes +CREATE INDEX IF NOT EXISTS idx_expressions_spelling ON expressions(spelling); +CREATE INDEX IF NOT EXISTS idx_expressions_type ON expressions(type); +CREATE INDEX IF NOT EXISTS idx_expressions_lang ON expressions(lang); +CREATE INDEX IF NOT EXISTS idx_expressions_lang_freq ON expressions(lang, frequency DESC); + +-- Sense and semantic indexes +CREATE INDEX IF NOT EXISTS idx_senses_parent ON senses(parent_id); +CREATE INDEX IF NOT EXISTS idx_senses_pos ON senses(pos); +CREATE INDEX IF NOT EXISTS idx_subsenses_sid ON subsenses(sid); +CREATE INDEX IF NOT EXISTS idx_derivation_sid ON derivation(sid); + +-- Phonetic analysis indexes (FROM prosodyschema.sql) +CREATE INDEX IF NOT EXISTS idx_tones_name_lang ON tones(name, lang); +CREATE INDEX IF NOT EXISTS idx_tones_nums_lang ON tones(nums, lang); +CREATE INDEX IF NOT EXISTS idx_syllables_text_lang ON syllables(text, lang); +CREATE INDEX IF NOT EXISTS idx_syllables_tone ON syllables(tone); + +CREATE INDEX IF NOT EXISTS idx_word_phonetics_word_id ON word_phonetics(word_id); +CREATE INDEX IF NOT EXISTS idx_word_phonetics_syllables ON word_phonetics(syllable_sequence); +CREATE INDEX IF NOT EXISTS idx_word_phonetics_tones ON word_phonetics(tone_sequence); +CREATE INDEX IF NOT EXISTS idx_word_phonetics_count ON word_phonetics(syllable_count); + +-- Junction table indexes +CREATE INDEX IF NOT EXISTS idx_syllables_words_word_idx ON syllables_words(word_id, idx); +CREATE INDEX IF NOT EXISTS idx_syllables_words_syl ON syllables_words(syl_id); +CREATE INDEX IF NOT EXISTS idx_cards_expressions ON cards_expressions(expression_id, card_id); + +-- User progress and SRS indexes +CREATE INDEX IF NOT EXISTS idx_user_progress_user ON user_progress(user_id); +CREATE INDEX IF NOT EXISTS idx_user_progress_card ON user_progress(card_id); +CREATE INDEX IF NOT EXISTS idx_user_progress_next_review ON user_progress(next_review_date); +CREATE INDEX IF NOT EXISTS idx_attempts_user ON attempts(user_id); +CREATE INDEX IF NOT EXISTS idx_attempts_card ON attempts(card_id); +CREATE INDEX IF NOT EXISTS idx_attempts_user_resource ON attempts(user_id, card_id); + +-- User feature indexes +CREATE INDEX IF NOT EXISTS idx_bookmarks ON bookmarks(word_id); +CREATE INDEX IF NOT EXISTS idx_word_categories_category ON word_categories(category); + +-- Composite indexes for common query patterns +CREATE INDEX IF NOT EXISTS idx_syllables_compound ON syllables(lang, text, tone); +CREATE INDEX IF NOT EXISTS idx_syllables_words_compound ON syllables_words(word_id, idx, syl_id); +CREATE INDEX IF NOT EXISTS idx_word_patterns_mixed ON word_phonetics(syllable_count, syllable_sequence, tone_sequence); + +/** + * TONE-SPECIFIC SRS SYSTEM + * + * Advanced tone pattern learning with detailed progress tracking + * and analytics for effective Thai tone acquisition + */ + +-- Tone pattern progress tracking +CREATE TABLE IF NOT EXISTS tone_pattern_progress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + word_id INTEGER NOT NULL, + tone_sequence TEXT NOT NULL, + syllable_sequence TEXT NOT NULL, + repetition_count INTEGER DEFAULT 0, + ease_factor REAL DEFAULT 2.5, + interval INTEGER DEFAULT 1, + next_review_date INTEGER NOT NULL, + last_reviewed INTEGER NOT NULL, + is_mastered BOOLEAN DEFAULT FALSE, + difficulty REAL DEFAULT 2.5, + tone_accuracy REAL DEFAULT 0.0, + created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE, + UNIQUE(user_id, word_id) +); + +-- Tone pattern attempt tracking with detailed metrics +CREATE TABLE IF NOT EXISTS tone_attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + word_id INTEGER NOT NULL, + timestamp INTEGER NOT NULL, + accuracy REAL NOT NULL CHECK (accuracy >= 0 AND accuracy <= 1), + tone_accuracy REAL NOT NULL CHECK (tone_accuracy >= 0 AND tone_accuracy <= 1), + pronunciation_score REAL DEFAULT 0.0 CHECK (pronunciation_score >= 0 AND pronunciation_score <= 1), + review_time INTEGER NOT NULL, + difficulty REAL NOT NULL, + mode TEXT DEFAULT 'exploration' CHECK (mode IN ('exploration', 'practice', 'review')), + created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE +); + +-- User tone learning preferences and settings +CREATE TABLE IF NOT EXISTS tone_learning_settings ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + preferred_session_length INTEGER DEFAULT 20 CHECK (preferred_session_length > 0), + target_daily_reviews INTEGER DEFAULT 50 CHECK (target_daily_reviews > 0), + difficulty_preference REAL DEFAULT 2.5 CHECK (difficulty_preference >= 1.0 AND difficulty_preference <= 4.0), + audio_enabled BOOLEAN DEFAULT TRUE, + tone_visualization_enabled BOOLEAN DEFAULT TRUE, + auto_advance BOOLEAN DEFAULT FALSE, + created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + UNIQUE(user_id) +); + +-- Tone pattern mastery statistics and analytics +CREATE TABLE IF NOT EXISTS tone_mastery_stats ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + user_id INTEGER NOT NULL, + tone_pattern TEXT NOT NULL, + syllable_count INTEGER NOT NULL CHECK (syllable_count > 0), + total_attempts INTEGER DEFAULT 0, + successful_attempts INTEGER DEFAULT 0, + avg_accuracy REAL DEFAULT 0.0 CHECK (avg_accuracy >= 0 AND avg_accuracy <= 1), + avg_review_time REAL DEFAULT 0.0, + best_accuracy REAL DEFAULT 0.0 CHECK (best_accuracy >= 0 AND best_accuracy <= 1), + last_attempted INTEGER, + mastered_at INTEGER, + created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000), + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + UNIQUE(user_id, tone_pattern, syllable_count) +); + +/** + * TONE-SPECIFIC INDEXES + * Optimized for tone pattern queries and SRS operations + */ + +CREATE INDEX IF NOT EXISTS idx_tone_progress_user_word ON tone_pattern_progress(user_id, word_id); +CREATE INDEX IF NOT EXISTS idx_tone_progress_next_review ON tone_pattern_progress(next_review_date); +CREATE INDEX IF NOT EXISTS idx_tone_progress_user ON tone_pattern_progress(user_id); +CREATE INDEX IF NOT EXISTS idx_tone_progress_difficulty ON tone_pattern_progress(difficulty); +CREATE INDEX IF NOT EXISTS idx_tone_progress_mastered ON tone_pattern_progress(is_mastered); + +CREATE INDEX IF NOT EXISTS idx_tone_attempts_user_word ON tone_attempts(user_id, word_id); +CREATE INDEX IF NOT EXISTS idx_tone_attempts_timestamp ON tone_attempts(timestamp); +CREATE INDEX IF NOT EXISTS idx_tone_attempts_user ON tone_attempts(user_id); +CREATE INDEX IF NOT EXISTS idx_tone_attempts_mode ON tone_attempts(mode); +CREATE INDEX IF NOT EXISTS idx_tone_attempts_accuracy ON tone_attempts(accuracy, tone_accuracy); + +CREATE INDEX IF NOT EXISTS idx_tone_mastery_pattern ON tone_mastery_stats(tone_pattern, syllable_count); +CREATE INDEX IF NOT EXISTS idx_tone_mastery_user ON tone_mastery_stats(user_id); +CREATE INDEX IF NOT EXISTS idx_tone_mastery_accuracy ON tone_mastery_stats(avg_accuracy); + +/** + * TRIGGERS FOR TONE LEARNING SYSTEM + * Automatic timestamp updates and statistics calculation + */ + +-- Update timestamps automatically +CREATE TRIGGER IF NOT EXISTS update_tone_progress_updated_at +AFTER UPDATE ON tone_pattern_progress +FOR EACH ROW +BEGIN + UPDATE tone_pattern_progress SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id; +END; + +CREATE TRIGGER IF NOT EXISTS update_tone_learning_settings_updated_at +AFTER UPDATE ON tone_learning_settings +FOR EACH ROW +BEGIN + UPDATE tone_learning_settings SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id; +END; + +CREATE TRIGGER IF NOT EXISTS update_tone_mastery_updated_at +AFTER UPDATE ON tone_mastery_stats +FOR EACH ROW +BEGIN + UPDATE tone_mastery_stats SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id; +END; + +-- Update mastery stats when a pattern is mastered +CREATE TRIGGER IF NOT EXISTS update_tone_mastery_on_progress +AFTER UPDATE ON tone_pattern_progress +FOR EACH ROW +WHEN NEW.is_mastered = 1 AND OLD.is_mastered = 0 +BEGIN + INSERT OR REPLACE INTO tone_mastery_stats + (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, mastered_at, updated_at) + VALUES ( + NEW.user_id, + NEW.tone_sequence, + (SELECT COUNT(*) FROM (SELECT value FROM json_each(NEW.syllable_sequence))), + NEW.repetition_count, + CAST(NEW.repetition_count * NEW.tone_accuracy AS INTEGER), + NEW.tone_accuracy, + (strftime('%s', 'now') * 1000), + (strftime('%s', 'now') * 1000) + ); +END; + +-- Update tone mastery stats after each attempt +CREATE TRIGGER IF NOT EXISTS update_tone_mastery_after_attempt +AFTER INSERT ON tone_attempts +FOR EACH ROW +BEGIN + -- Get the current tone progress + INSERT OR REPLACE INTO tone_mastery_stats + (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, avg_review_time, best_accuracy, last_attempted, updated_at) + SELECT + ta.user_id, + tpp.tone_sequence, + (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence))) as syllable_count, + COALESCE(tms.total_attempts + 1, 1) as total_attempts, + COALESCE(tms.successful_attempts + CAST(ta.tone_accuracy >= 0.8 AS INTEGER), CAST(ta.tone_accuracy >= 0.8 AS INTEGER)) as successful_attempts, + CASE + WHEN tms.total_attempts IS NULL THEN ta.tone_accuracy + ELSE (tms.avg_accuracy * tms.total_attempts + ta.tone_accuracy) / (tms.total_attempts + 1) + END as avg_accuracy, + CASE + WHEN tms.total_attempts IS NULL THEN ta.review_time + ELSE (tms.avg_review_time * tms.total_attempts + ta.review_time) / (tms.total_attempts + 1) + END as avg_review_time, + CASE + WHEN tms.best_accuracy IS NULL THEN ta.tone_accuracy + WHEN ta.tone_accuracy > tms.best_accuracy THEN ta.tone_accuracy + ELSE tms.best_accuracy + END as best_accuracy, + (strftime('%s', 'now') * 1000) as last_attempted, + (strftime('%s', 'now') * 1000) as updated_at + FROM tone_attempts ta + JOIN tone_pattern_progress tpp ON tpp.word_id = ta.word_id AND tpp.user_id = ta.user_id + LEFT JOIN tone_mastery_stats tms ON tms.user_id = ta.user_id + AND tms.tone_pattern = tpp.tone_sequence + AND tms.syllable_count = (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence))) + WHERE ta.rowid = NEW.rowid; +END; + +/** + * VIEWS FOR TONE LEARNING ANALYTICS + * Common queries for dashboard and reporting + */ + +-- User dashboard view combining traditional SRS and tone learning +CREATE VIEW IF NOT EXISTS user_dashboard AS +SELECT + u.id as user_id, + u.name as username, + COUNT(DISTINCT tpp.word_id) as total_tone_words, + COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words, + COUNT(DISTINCT CASE WHEN tpp.next_review_date <= (strftime('%s', 'now') * 1000) THEN tpp.word_id END) as due_tone_words, + COUNT(DISTINCT up.card_id) as total_cards, + COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards, + COUNT(DISTINCT CASE WHEN up.next_review_date <= (strftime('%s', 'now') * 1000) THEN up.card_id END) as due_cards, + (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > (strftime('%s', 'now') * 1000 - 86400000)) as reviews_today, + (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > (strftime('%s', 'now') * 1000 - 604800000)) as weekly_tone_accuracy +FROM users u +LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id +LEFT JOIN user_progress up ON up.user_id = u.id +GROUP BY u.id, u.name; + +-- Tone learning statistics view +CREATE VIEW IF NOT EXISTS tone_learning_stats_view AS +SELECT + tms.user_id, + tms.tone_pattern, + tms.syllable_count, + tms.total_attempts, + tms.successful_attempts, + tms.avg_accuracy, + tms.avg_review_time, + tms.best_accuracy, + tms.last_attempted, + tms.mastered_at, + COUNT(tpp.word_id) as word_count_with_pattern, + AVG(tpp.difficulty) as avg_difficulty, + AVG(tpp.tone_accuracy) as avg_current_accuracy +FROM tone_mastery_stats tms +LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = tms.user_id + AND tpp.tone_sequence = tms.tone_pattern +GROUP BY tms.user_id, tms.tone_pattern, tms.syllable_count, tms.total_attempts, + tms.successful_attempts, tms.avg_accuracy, tms.avg_review_time, + tms.best_accuracy, tms.last_attempted, tms.mastered_at; + +/** + * MIGRATION NOTES: + * =============== + * + * TONE SRS SYSTEM ADDITIONS: + * - Added comprehensive tone learning tables to the unified schema + * - Maintains consistency with existing phonetic analysis structure + * - Supports both traditional SRS and tone-specific learning + * - Provides detailed analytics for tone pattern mastery + * + * BACKWARD COMPATIBILITY: + * - All existing tables and relationships preserved + * - New tables are additive and don't break existing functionality + * - Views provide unified dashboard with both learning systems + * + * PERFORMANCE: + * - Added specialized indexes for tone pattern queries + * - Optimized for Thai tone learning analytics + * - Supports real-time progress tracking + * + * This enhanced unified schema now provides comprehensive support for: + * - Thai language learning with detailed phonetic analysis + * - Course management and lesson structure + * - Traditional spaced repetition system + * - Advanced tone pattern learning with SRS + * - User management and personalization + * - Comprehensive learning analytics + * + * TOTAL: Unified 4 schemas while preserving all unique functionality + */ diff --git a/packages/db/src/semantic.ts b/packages/db/src/semantic.ts new file mode 100644 index 0000000..a3fe44b --- /dev/null +++ b/packages/db/src/semantic.ts @@ -0,0 +1,554 @@ +import { Database } from 'bun:sqlite'; + +export interface Expression { + id: number; + spelling: string; + lang: string; + frequency?: number; + type: string; + syllables?: number; + notes?: string; + ipa?: any; // JSONB + prosody?: any; // JSONB + confidence: number; +} + +export interface Sense { + id: number; + parent_id: number; + spelling: string; + pos?: string; + etymology?: string; + senses?: any; // JSONB + forms?: any; // JSONB + related?: any; // JSONB + ipa?: any; // JSONB + prosody?: any; // JSONB + confidence: number; +} + +export interface Subsense { + id: number; + sid: number; + gloss: string; + examples?: any; // JSONB +} + +export interface Derivation { + id: number; + sid: number; + type: string; + text: string; + tags?: any; // JSONB +} + +export interface Category { + name: string; +} + +export interface WordCategory { + word_id: number; + category: string; +} + +export interface Bookmark { + word_id: number; + user_id: number; + notes?: string; + created: number; +} + +export interface Language { + code: string; + name: string; + native_name?: string; + iso6392?: string; + english?: string; +} + +export class SemanticQueries { + constructor(private db: Database) {} + + // Language operations + getLanguages(): Language[] { + const query = this.db.query(` + SELECT code, name, native_name, iso6392, english + FROM languages + ORDER BY name + `); + return query.all() as Language[]; + } + + getLanguageByCode(code: string): Language | null { + const query = this.db.query(` + SELECT code, name, native_name, iso6392, english + FROM languages + WHERE code = ? + `); + return query.get(code) as Language | null; + } + + // Expression operations + getExpressionById(id: number): Expression | null { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE id = ? + `); + const result = query.get(id) as Expression | null; + if (result) { + // Parse JSON fields + result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; + result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; + } + return result; + } + + getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? + ORDER BY frequency DESC, spelling + LIMIT ? OFFSET ? + `); + const results = query.all(lang, limit, offset) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? AND spelling LIKE ? + ORDER BY + CASE + WHEN spelling = ? THEN 1 + WHEN spelling LIKE ? THEN 2 + ELSE 3 + END, + frequency DESC + LIMIT ? + `); + const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE spelling LIKE ? + ORDER BY + CASE + WHEN spelling = ? THEN 1 + WHEN spelling LIKE ? THEN 2 + ELSE 3 + END, + frequency DESC + LIMIT ? + `); + const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE type = ? AND lang = ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(type, lang, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE type = ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(type, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? AND frequency BETWEEN ? AND ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + // Sense operations + getSensesForExpression(expressionId: number): Sense[] { + const query = this.db.query(` + SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence + FROM senses + WHERE parent_id = ? + ORDER BY confidence DESC + `); + const results = query.all(expressionId) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + getSenseById(id: number): Sense | null { + const query = this.db.query(` + SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence + FROM senses + WHERE id = ? + `); + const result = query.get(id) as Sense | null; + if (result) { + result.senses = result.senses ? JSON.parse(result.senses) : undefined; + result.forms = result.forms ? JSON.parse(result.forms) : undefined; + result.related = result.related ? JSON.parse(result.related) : undefined; + result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; + result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; + } + return result; + } + + searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] { + if (lang) { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?) + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE s.spelling LIKE ? OR s.pos LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Subsense operations + getSubsensesForSense(senseId: number): Subsense[] { + const query = this.db.query(` + SELECT id, sid, gloss, examples + FROM subsenses + WHERE sid = ? + ORDER BY id + `); + const results = query.all(senseId) as Subsense[]; + return results.map(result => ({ + ...result, + examples: result.examples ? JSON.parse(result.examples) : undefined, + })); + } + + // Derivation operations + getDerivationsForSense(senseId: number): Derivation[] { + const query = this.db.query(` + SELECT id, sid, type, text, tags + FROM derivation + WHERE sid = ? + ORDER BY type, text + `); + const results = query.all(senseId) as Derivation[]; + return results.map(result => ({ + ...result, + tags: result.tags ? JSON.parse(result.tags) : undefined, + })); + } + + // Category operations + getCategories(): Category[] { + const query = this.db.query(` + SELECT name + FROM categories + ORDER BY name + `); + return query.all() as Category[]; + } + + getCategoriesForWord(wordId: number): Category[] { + const query = this.db.query(` + SELECT c.name + FROM categories c + JOIN word_categories wc ON c.name = wc.category + WHERE wc.word_id = ? + ORDER BY c.name + `); + return query.all(wordId) as Category[]; + } + + getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN word_categories wc ON e.id = wc.word_id + WHERE wc.category = ? AND e.lang = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(category, lang, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN word_categories wc ON e.id = wc.word_id + WHERE wc.category = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(category, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Bookmark operations + getBookmarksForUser(userId: number): Bookmark[] { + const query = this.db.query(` + SELECT word_id, user_id, notes, created + FROM bookmarks + WHERE user_id = ? + ORDER BY created DESC + `); + return query.all(userId) as Bookmark[]; + } + + createBookmark(userId: number, wordId: number, notes?: string): Bookmark { + const query = this.db.query(` + INSERT INTO bookmarks (word_id, user_id, notes, created) + VALUES (?, ?, ?, ?) + RETURNING word_id, user_id, notes, created + `); + return query.get(wordId, userId, notes, Date.now()) as Bookmark; + } + + updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null { + const query = this.db.query(` + UPDATE bookmarks + SET notes = ? + WHERE user_id = ? AND word_id = ? + RETURNING word_id, user_id, notes, created + `); + return query.get(notes, userId, wordId) as Bookmark | null; + } + + deleteBookmark(userId: number, wordId: number): boolean { + const query = this.db.query(` + DELETE FROM bookmarks + WHERE user_id = ? AND word_id = ? + `); + const result = query.run(userId, wordId); + return result.changes > 0; + } + + isBookmarked(userId: number, wordId: number): boolean { + const query = this.db.query(` + SELECT 1 + FROM bookmarks + WHERE user_id = ? AND word_id = ? + LIMIT 1 + `); + return query.get(userId, wordId) !== undefined; + } + + // Advanced search operations + searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN senses s ON e.id = s.parent_id + WHERE e.lang = ? AND s.pos = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(lang, pos, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN senses s ON e.id = s.parent_id + WHERE s.pos = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(pos, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] { + if (lang) { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND s.etymology LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(lang, `%${term}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE s.etymology LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(`%${term}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Statistics operations + getSemanticStats(lang: string) { + const query = this.db.query(` + SELECT + COUNT(DISTINCT e.id) as total_expressions, + COUNT(DISTINCT s.id) as total_senses, + COUNT(DISTINCT s.pos) as unique_pos, + COUNT(DISTINCT c.name) as unique_categories, + AVG(e.confidence) as avg_confidence, + COUNT(DISTINCT e.type) as unique_types + FROM expressions e + LEFT JOIN senses s ON e.id = s.parent_id + LEFT JOIN word_categories wc ON e.id = wc.word_id + LEFT JOIN categories c ON wc.category = c.name + WHERE e.lang = ? + `); + return query.get(lang); + } + + getPOSTDistribution(lang: string) { + const query = this.db.query(` + SELECT + s.pos, + COUNT(DISTINCT s.parent_id) as expression_count, + COUNT(s.id) as sense_count, + ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND s.pos IS NOT NULL + GROUP BY s.pos + ORDER BY sense_count DESC + `); + return query.all(lang); + } + + getCategoryDistribution(lang: string) { + const query = this.db.query(` + SELECT + c.name as category, + COUNT(DISTINCT wc.word_id) as word_count, + ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage + FROM categories c + JOIN word_categories wc ON c.name = wc.category + JOIN expressions e ON wc.word_id = e.id + WHERE e.lang = ? + GROUP BY c.name + ORDER BY word_count DESC + `); + return query.all(lang); + } +}
\ No newline at end of file diff --git a/packages/db/src/srs.ts b/packages/db/src/srs.ts new file mode 100644 index 0000000..0b80960 --- /dev/null +++ b/packages/db/src/srs.ts @@ -0,0 +1,402 @@ +import { Database } from 'bun:sqlite'; + +// Traditional SRS Types +export interface SRSProgress { + id?: number; + user_id: number; + card_id: number; + repetition_count: number; + ease_factor: number; + interval: number; + next_review_date: number | null; + last_reviewed: number | null; + is_mastered: boolean; + created_at?: number; + updated_at?: number; +} + +export interface SRSAttempt { + id?: number; + user_id: number; + timestamp: number; + card_id: number; + good: number; // 0 or 1 + created_at?: number; +} + +// Tone SRS Types +export interface ToneProgress { + id?: number; + user_id: number; + word_id: number; + tone_sequence: string; + syllable_sequence: string; + repetition_count: number; + ease_factor: number; + interval: number; + next_review_date: number; + last_reviewed: number; + is_mastered: boolean; + difficulty: number; + tone_accuracy: number; + created_at?: number; + updated_at?: number; +} + +export interface ToneAttempt { + id?: number; + user_id: number; + word_id: number; + timestamp: number; + accuracy: number; + tone_accuracy: number; + pronunciation_score: number; + review_time: number; + difficulty: number; + mode: 'exploration' | 'practice' | 'review'; + created_at?: number; +} + +export interface ToneMasteryStats { + id?: number; + user_id: number; + tone_pattern: string; + syllable_count: number; + total_attempts: number; + successful_attempts: number; + avg_accuracy: number; + avg_review_time: number; + best_accuracy: number; + last_attempted: number | null; + mastered_at: number | null; + created_at?: number; + updated_at?: number; +} + +export class SRSQueries { + constructor(private db: Database) {} + + // Traditional SRS Methods + + /** + * Get or create SRS progress for a user and card + */ + getOrCreateSRSProgress(userId: number, cardId: number): SRSProgress { + const query = this.db.query(` + INSERT INTO user_progress (user_id, card_id) + VALUES (?, ?) + ON CONFLICT (user_id, card_id) DO UPDATE SET + user_id = excluded.user_id, + card_id = excluded.card_id + RETURNING id, user_id, card_id, repetition_count, ease_factor, interval, + next_review_date, last_reviewed, is_mastered, created_at, updated_at + `); + + return query.get(userId, cardId) as SRSProgress; + } + + /** + * Update SRS progress after a review + */ + updateSRSProgress(progress: Omit<SRSProgress, 'id' | 'created_at' | 'updated_at'>): SRSProgress { + const query = this.db.query(` + UPDATE user_progress + SET repetition_count = ?, + ease_factor = ?, + interval = ?, + next_review_date = ?, + last_reviewed = ?, + is_mastered = ?, + updated_at = (strftime('%s', 'now') * 1000) + WHERE user_id = ? AND card_id = ? + RETURNING id, user_id, card_id, repetition_count, ease_factor, interval, + next_review_date, last_reviewed, is_mastered, created_at, updated_at + `); + + return query.get( + progress.repetition_count, + progress.ease_factor, + progress.interval, + progress.next_review_date, + progress.last_reviewed, + progress.is_mastered, + progress.user_id, + progress.card_id + ) as SRSProgress; + } + + /** + * Record a traditional SRS attempt + */ + recordSRSAttempt(attempt: Omit<SRSAttempt, 'id' | 'created_at'>): SRSAttempt { + const query = this.db.query(` + INSERT INTO attempts (user_id, timestamp, card_id, good) + VALUES (?, ?, ?, ?) + RETURNING id, user_id, timestamp, card_id, good, created_at + `); + + return query.get( + attempt.user_id, + attempt.timestamp, + attempt.card_id, + attempt.good + ) as SRSAttempt; + } + + /** + * Get due cards for a user + */ + getDueCards(userId: number, limit: number = 20): number[] { + const query = this.db.query(` + SELECT card_id + FROM user_progress + WHERE user_id = ? + AND (next_review_date IS NULL OR next_review_date <= ?) + AND is_mastered = 0 + ORDER BY next_review_date ASC + LIMIT ? + `); + + const results = query.all(userId, Date.now(), limit) as { card_id: number }[]; + return results.map(r => r.card_id); + } + + /** + * Get SRS progress statistics for a user + */ + getSRSStats(userId: number) { + const query = this.db.query(` + SELECT + COUNT(*) as total_cards, + COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_cards, + COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_cards, + COUNT(CASE WHEN next_review_date > ? THEN 1 END) as future_cards, + AVG(ease_factor) as avg_ease_factor, + AVG(interval) as avg_interval + FROM user_progress + WHERE user_id = ? + `); + + return query.get(Date.now(), Date.now(), userId); + } + + // Tone SRS Methods + + /** + * Get or create tone progress for a user and word + */ + getOrCreateToneProgress(userId: number, wordId: number, toneSequence: string, syllableSequence: string): ToneProgress { + const query = this.db.query(` + INSERT INTO tone_pattern_progress (user_id, word_id, tone_sequence, syllable_sequence, next_review_date, last_reviewed) + VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT (user_id, word_id) DO UPDATE SET + tone_sequence = excluded.tone_sequence, + syllable_sequence = excluded.syllable_sequence + RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count, + ease_factor, interval, next_review_date, last_reviewed, is_mastered, + difficulty, tone_accuracy, created_at, updated_at + `); + + return query.get( + userId, + wordId, + toneSequence, + syllableSequence, + Date.now(), // next_review_date + Date.now() // last_reviewed + ) as ToneProgress; + } + + /** + * Update tone progress after a practice session + */ + updateToneProgress(progress: Omit<ToneProgress, 'id' | 'created_at' | 'updated_at'>): ToneProgress { + const query = this.db.query(` + UPDATE tone_pattern_progress + SET repetition_count = ?, + ease_factor = ?, + interval = ?, + next_review_date = ?, + last_reviewed = ?, + is_mastered = ?, + difficulty = ?, + tone_accuracy = ?, + updated_at = (strftime('%s', 'now') * 1000) + WHERE user_id = ? AND word_id = ? + RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count, + ease_factor, interval, next_review_date, last_reviewed, is_mastered, + difficulty, tone_accuracy, created_at, updated_at + `); + + return query.get( + progress.repetition_count, + progress.ease_factor, + progress.interval, + progress.next_review_date, + progress.last_reviewed, + progress.is_mastered, + progress.difficulty, + progress.tone_accuracy, + progress.user_id, + progress.word_id + ) as ToneProgress; + } + + /** + * Record a tone attempt + */ + recordToneAttempt(attempt: Omit<ToneAttempt, 'id' | 'created_at'>): ToneAttempt { + const query = this.db.query(` + INSERT INTO tone_attempts (user_id, word_id, timestamp, accuracy, tone_accuracy, + pronunciation_score, review_time, difficulty, mode) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + RETURNING id, user_id, word_id, timestamp, accuracy, tone_accuracy, + pronunciation_score, review_time, difficulty, mode, created_at + `); + + return query.get( + attempt.user_id, + attempt.word_id, + attempt.timestamp, + attempt.accuracy, + attempt.tone_accuracy, + attempt.pronunciation_score, + attempt.review_time, + attempt.difficulty, + attempt.mode + ) as ToneAttempt; + } + + /** + * Get due tone words for a user + */ + getDueToneWords(userId: number, limit: number = 20): number[] { + const query = this.db.query(` + SELECT word_id + FROM tone_pattern_progress + WHERE user_id = ? + AND next_review_date <= ? + AND is_mastered = 0 + ORDER BY next_review_date ASC, difficulty ASC + LIMIT ? + `); + + const results = query.all(userId, Date.now(), limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + /** + * Get tone progress statistics for a user + */ + getToneStats(userId: number) { + const query = this.db.query(` + SELECT + COUNT(*) as total_tone_words, + COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_tone_words, + COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_tone_words, + AVG(difficulty) as avg_difficulty, + AVG(tone_accuracy) as avg_tone_accuracy, + AVG(ease_factor) as avg_ease_factor + FROM tone_pattern_progress + WHERE user_id = ? + `); + + return query.get(Date.now(), userId); + } + + /** + * Get recent tone attempts for a user + */ + getRecentToneAttempts(userId: number, limit: number = 10): ToneAttempt[] { + const query = this.db.query(` + SELECT id, user_id, word_id, timestamp, accuracy, tone_accuracy, + pronunciation_score, review_time, difficulty, mode, created_at + FROM tone_attempts + WHERE user_id = ? + ORDER BY timestamp DESC + LIMIT ? + `); + + return query.all(userId, limit) as ToneAttempt[]; + } + + /** + * Get tone mastery statistics for a user + */ + getToneMasteryStats(userId: number): ToneMasteryStats[] { + const query = this.db.query(` + SELECT id, user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, + avg_accuracy, avg_review_time, best_accuracy, last_attempted, mastered_at, + created_at, updated_at + FROM tone_mastery_stats + WHERE user_id = ? + ORDER BY avg_accuracy DESC, last_attempted DESC + `); + + return query.all(userId) as ToneMasteryStats[]; + } + + /** + * Get tone words for practice (prioritizes due words, then random new words) + */ + getToneWordsForPractice(userId: number, limit: number = 20): number[] { + const query = this.db.query(` + WITH due_words AS ( + SELECT word_id, 0 as priority + FROM tone_pattern_progress + WHERE user_id = ? AND next_review_date <= ? AND is_mastered = 0 + LIMIT ? + ), + new_words AS ( + SELECT e.id as word_id, 1 as priority + FROM expressions e + LEFT JOIN tone_pattern_progress tpp ON e.id = tpp.word_id AND tpp.user_id = ? + WHERE tpp.word_id IS NULL + AND e.lang = 'th' + AND e.type = 'word' + ORDER BY e.frequency DESC + LIMIT ? + ) + SELECT word_id + FROM ( + SELECT word_id, priority FROM due_words + UNION ALL + SELECT word_id, priority FROM new_words + ) + ORDER BY priority, RANDOM() + LIMIT ? + `); + + const results = query.all(userId, Date.now(), limit, userId, limit, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + /** + * Get user dashboard data (combines traditional SRS and tone SRS) + */ + getUserDashboard(userId: number) { + const query = this.db.query(` + SELECT + u.id as user_id, + u.name as username, + COUNT(DISTINCT tpp.word_id) as total_tone_words, + COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words, + COUNT(DISTINCT CASE WHEN tpp.next_review_date <= ? THEN tpp.word_id END) as due_tone_words, + COUNT(DISTINCT up.card_id) as total_cards, + COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards, + COUNT(DISTINCT CASE WHEN up.next_review_date <= ? THEN up.card_id END) as due_cards, + (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > ?) as reviews_today, + (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > ?) as weekly_tone_accuracy + FROM users u + LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id + LEFT JOIN user_progress up ON up.user_id = u.id + WHERE u.id = ? + GROUP BY u.id, u.name + `); + + const weekAgo = Date.now() - (7 * 24 * 60 * 60 * 1000); + const today = Date.now() - (24 * 60 * 60 * 1000); + + return query.get(Date.now(), Date.now(), today, weekAgo, userId); + } +}
\ No newline at end of file diff --git a/packages/db/src/test.ts b/packages/db/src/test.ts new file mode 100644 index 0000000..6767c13 --- /dev/null +++ b/packages/db/src/test.ts @@ -0,0 +1,149 @@ +import ndb from "."; +import toneDb from "@/lib/db/prosodydb"; + +// function test() { +// const tones = ["low", "mid"]; +// const ndb = new BigQueries(); +// const nq = ndb.fetchWordsByToneAndSyls(tones); +// const oq = toneDb.fetchWordsByToneAndSyls(tones); +// // const senses = JSON.parse(res[0].senses_array); +// console.log({ nq }); +// console.log(nq.length, oq.length); +// } +function test() { + const tones = ["low", "mid"]; + console.log("wtf"); + // const qq = ndb.fetchWordsByToneAndSyls1(tones); + // const oq = ndb.fetchWordsByToneAndSylsO(tones); + const nq = ndb.fetchWordsByToneAndSyls(tones); + // const roq = toneDb.fetchWordsByToneAndSyls(tones); + // const senses = JSON.parse(res[0].senses_array); + // console.log({ nq }); + // console.log("old", oq.length); + // console.log("old db", roq.length); + console.log("new", nq); + // console.log("test", qq.length); +} +test(); + +// ` +// ` +// WITH word_tone_sequences AS ( +// SELECT +// w.id as word_id, +// w.spelling, +// wp.ipa, +// w.frequency, +// GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq, +// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, +// COUNT(sw.syl_id) as syllable_count, +// (SELECT +// json_group_array(json_object( +// 'id', s.id, +// 'pos', s.pos, +// 'etymology', s.etymology, +// 'confidence', s.confidence, +// 'subsenses', ( +// SELECT json_group_array(json_object( +// 'id', ss.id, +// 'gloss', ss.gloss +// )) +// FROM subsenses ss +// WHERE ss.sid = s.id +// ), +// 'examples', ( +// SELECT json_group_array(json_object( +// 'id', ex.id, +// 'example', ex.example, +// 'ref', ex.ref +// )) +// FROM examples ex +// WHERE ex.sid = s.id +// ), +// 'derivation', ( +// SELECT json_group_array(json_object( +// 'id', d.id, +// 'type', d.type, +// 'text', d.text, +// 'tags', d.tags +// )) +// FROM derivation d +// WHERE d.sid = s.id +// ), +// 'categories', ( +// SELECT json_group_array(wc.category) +// FROM word_categories wc +// WHERE wc.word_id = s.id +// ) +// )) +// FROM senses s +// WHERE s.parent_id = w.id +// ) as senses_array +// FROM expressions w +// JOIN word_phonetics wp ON w.id = wp.word_id +// JOIN syllables_words sw ON wp.id = sw.word_id +// JOIN syllables sy ON sw.syl_id = sy.id +// JOIN tones t ON sy.tone = t.id +// GROUP BY w.id, w.spelling, w.lang, w.frequency +// ) +// SELECT * +// FROM word_tone_sequences +// WHERE tone_sequence LIKE ? +// AND syllable_count = ? +// ORDER BY frequency ASC NULLS LAST; +// ` +// +// +// +// ` +// WITH word_tone_sequences AS ( +// SELECT +// w.id as word_id, +// w.spelling, +// wp.ipa, +// w.frequency, +// GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq, +// GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence, +// COUNT(sw.syl_id) as syllable_count +// FROM expressions w +// JOIN word_phonetics wp ON w.id = wp.word_id +// JOIN syllables_words sw ON wp.id = sw.word_id +// JOIN syllables sy ON sw.syl_id = sy.id +// JOIN tones t ON sy.tone = t.id +// GROUP BY w.id, w.spelling, w.lang, w.frequency, wp.ipa +// ), +// sense_data AS ( +// SELECT +// s.*, +// GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data, +// GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data, +// GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data, +// GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data +// FROM senses s +// LEFT JOIN subsenses ss ON ss.sid = s.id +// LEFT JOIN examples ex ON ex.sid = s.id +// LEFT JOIN derivation d ON d.sid = s.id +// LEFT JOIN word_categories wc ON wc.word_id = s.id +// GROUP BY s.id +// ) +// SELECT +// wts.*, +// (SELECT +// json_group_array(json_object( +// 'id', sd.id, +// 'pos', sd.pos, +// 'etymology', sd.etymology, +// 'confidence', sd.confidence, +// 'subsenses_data', sd.subsenses_data, +// 'examples_data', sd.examples_data, +// 'derivation_data', sd.derivation_data, +// 'categories_data', sd.categories_data +// )) +// FROM sense_data sd +// WHERE sd.parent_id = wts.word_id +// ) as senses_array +// FROM word_tone_sequences wts +// WHERE wts.tone_sequence LIKE ? +// AND wts.syllable_count = ? +// ORDER BY wts.frequency ASC NULLS LAST; +// ` diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts new file mode 100644 index 0000000..66c2826 --- /dev/null +++ b/packages/db/src/types.ts @@ -0,0 +1,84 @@ +export type Tone = { + letters: string; + numbers: number; + name: string; +}; + +export type Phoneme = { + ipa: string; + spelling: string; +}; +export type Syllable = { + stressed: boolean; + long: boolean; + spelling: string; + ipa: string; + nucleus: Phoneme; + onset: Phoneme; + medial: Phoneme; + coda: Phoneme; + rhyme: Phoneme; + tone: Tone; +}; + +export type ToneQuery = Array<string | null>; +export type MutationType = { change: string } | { keep: string }; +export type MutationOrder = MutationType[]; + +export type PhoneticData = { + word_id: number; + tone_sequence: string; + syllable_count: number; + syl_seq: string; + spelling: string; + ipa: string; + frequency: number; +}; + +export const thaiTones: Record<string, string> = { + "˧": "mid", + "˨˩": "low", + "˥˩": "falling", + "˦˥": "high", + "˩˩˦": "rising", +}; +export const thaiToneNums: Record<string, number> = { + "˧": 33, + "˨˩": 21, + "˥˩": 41, + "˦˥": 45, + "˩˩˦": 214, +}; + +export type FullWordDataDB = { + word_id: number; + tone_sequence: string; + syllable_count: number; + syl_seq: string; + spelling: string; + ipa: string; + frequency: number; + senses_array: string; +}; +export type FullWordData = { + word_id: number; + tone_sequence: string; + syllable_count: number; + syl_seq: string; + spelling: string; + ipa: string; + frequency: number; + senses: Sense[]; +}; +export type Sense = { + confidence: number; + examples: Example[]; + categories: string[]; + etymology: string; + derivation: Derivation[]; + pos: string; + forms: Array<{ form: string; tags: string[] }>; + glosses: string[]; +}; +export type Example = { ref: string; text: string }; +export type Derivation = { type: string; text: string; tags: any }; diff --git a/packages/db/src/users.ts b/packages/db/src/users.ts new file mode 100644 index 0000000..ebf715c --- /dev/null +++ b/packages/db/src/users.ts @@ -0,0 +1,305 @@ +import { Database } from "bun:sqlite"; + +export interface User { + id: number; + name: string; + creds: string; +} + +export interface Session { + id: string; + user_id: number; + created_at: number; + expires_at: number; + last_activity?: number; + ip_address?: string; + user_agent?: string; + data?: any; +} + +export interface UserSettings { + id?: number; + user_id: number; + preferred_session_length: number; + target_daily_reviews: number; + difficulty_preference: number; + audio_enabled: boolean; + tone_visualization_enabled: boolean; + auto_advance: boolean; + created_at?: number; + updated_at?: number; +} + +export class UserQueries { + constructor(private db: Database) {} + + /** + * Create a new user + */ + createUser(name: string, creds: string): User { + const query = this.db.query(` + INSERT INTO users (name, creds) + VALUES (?, ?) + RETURNING id, name, creds + `); + + return query.get(name, creds) as User; + } + + /** + * Get user by ID + */ + getUserById(id: number): User | null { + const query = this.db.query(` + SELECT id, name, creds + FROM users + WHERE id = ? + `); + + return query.get(id) as User | null; + } + + /** + * Get user by name + */ + getUserByName(name: string): User | null { + const query = this.db.query(` + SELECT id, name, creds + FROM users + WHERE name = ? + `); + + return query.get(name) as User | null; + } + + /** + * Create a new session for a user + */ + createSession(userId: number, sessionToken: string, expiresAt: number): Session { + const query = this.db.query(` + INSERT INTO sessions (id, user_id, created_at, expires_at, last_activity) + VALUES (?, ?, ?, ?, ?) + RETURNING id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data + `); + + const now = Date.now(); + return query.get(sessionToken, userId, now, expiresAt, now) as Session; + } + + /** + * Get session by token + */ + getSessionByToken(sessionToken: string): Session | null { + const query = this.db.query(` + SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data + FROM sessions + WHERE id = ? AND expires_at > ? + `); + + return query.get(sessionToken, Date.now()) as Session | null; + } + + /** + * Get user from session token + */ + getUserFromSession(sessionToken: string): User | null { + const query = this.db.query(` + SELECT u.id, u.name, u.creds + FROM users u + JOIN sessions s ON u.id = s.user_id + WHERE s.id = ? AND s.expires_at > ? + `); + + return query.get(sessionToken, Date.now()) as User | null; + } + + /** + * Delete session by token + */ + deleteSession(sessionToken: string): boolean { + const query = this.db.query(` + DELETE FROM sessions + WHERE id = ? + `); + + const result = query.run(sessionToken); + return result.changes > 0; + } + + /** + * Clean up expired sessions + */ + cleanupExpiredSessions(): number { + const query = this.db.query(` + DELETE FROM sessions + WHERE expires_at <= ? + `); + + const result = query.run(Date.now()); + return result.changes; + } + + /** + * Update session last activity + */ + updateSessionActivity(sessionToken: string): boolean { + const query = this.db.query(` + UPDATE sessions + SET last_activity = ? + WHERE id = ? AND expires_at > ? + `); + + const result = query.run(Date.now(), sessionToken, Date.now()); + return result.changes > 0; + } + + /** + * Extend session expiry + */ + extendSessionExpiry(sessionToken: string, newExpiry: number): boolean { + const query = this.db.query(` + UPDATE sessions + SET expires_at = ?, last_activity = ? + WHERE id = ? AND expires_at > ? + `); + + const result = query.run(newExpiry, Date.now(), sessionToken, Date.now()); + return result.changes > 0; + } + + /** + * Get all active sessions for a user + */ + getUserSessions(userId: number): Session[] { + const query = this.db.query(` + SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data + FROM sessions + WHERE user_id = ? AND expires_at > ? + ORDER BY last_activity DESC + `); + + return query.all(userId, Date.now()) as Session[]; + } + + /** + * Delete all sessions for a user (for logout all devices) + */ + deleteAllUserSessions(userId: number): number { + const query = this.db.query(` + DELETE FROM sessions + WHERE user_id = ? + `); + + const result = query.run(userId); + return result.changes; + } + + /** + * Get user settings + */ + getUserSettings(userId: number): UserSettings | null { + const query = this.db.query(` + SELECT id, user_id, preferred_session_length, target_daily_reviews, + difficulty_preference, audio_enabled, tone_visualization_enabled, + auto_advance, created_at, updated_at + FROM tone_learning_settings + WHERE user_id = ? + `); + + return query.get(userId) as UserSettings | null; + } + + /** + * Create or update user settings + */ + upsertUserSettings( + settings: Omit<UserSettings, "id" | "created_at" | "updated_at">, + ): UserSettings { + const query = this.db.query(` + INSERT INTO tone_learning_settings + (user_id, preferred_session_length, target_daily_reviews, + difficulty_preference, audio_enabled, tone_visualization_enabled, auto_advance) + VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (user_id) DO UPDATE SET + preferred_session_length = excluded.preferred_session_length, + target_daily_reviews = excluded.target_daily_reviews, + difficulty_preference = excluded.difficulty_preference, + audio_enabled = excluded.audio_enabled, + tone_visualization_enabled = excluded.tone_visualization_enabled, + auto_advance = excluded.auto_advance, + updated_at = (strftime('%s', 'now') * 1000) + RETURNING id, user_id, preferred_session_length, target_daily_reviews, + difficulty_preference, audio_enabled, tone_visualization_enabled, + auto_advance, created_at, updated_at + `); + + return query.get( + settings.user_id, + settings.preferred_session_length, + settings.target_daily_reviews, + settings.difficulty_preference, + settings.audio_enabled, + settings.tone_visualization_enabled, + settings.auto_advance, + ) as UserSettings; + } + + /** + * Update user settings partially + */ + updateUserSettings( + userId: number, + updates: Partial<UserSettings>, + ): UserSettings | null { + const setClause = Object.keys(updates) + .filter( + (key) => key !== "id" && key !== "user_id" && key !== "created_at", + ) + .map((key) => `${key} = ?`) + .join(", "); + + if (!setClause) return null; + + const values = Object.values(updates).filter((_, index) => { + const key = Object.keys(updates)[index]; + return key !== "id" && key !== "user_id" && key !== "created_at"; + }); + + const query = this.db.query(` + UPDATE tone_learning_settings + SET ${setClause}, updated_at = (strftime('%s', 'now') * 1000) + WHERE user_id = ? + RETURNING id, user_id, preferred_session_length, target_daily_reviews, + difficulty_preference, audio_enabled, tone_visualization_enabled, + auto_advance, created_at, updated_at + `); + + return query.get(...values, userId) as UserSettings | null; + } + + /** + * Get all users (admin function) + */ + getAllUsers(): User[] { + const query = this.db.query(` + SELECT id, name, creds + FROM users + ORDER BY name + `); + + return query.all() as User[]; + } + + /** + * Delete user (admin function) + */ + deleteUser(userId: number): boolean { + const query = this.db.query(` + DELETE FROM users + WHERE id = ? + `); + + const result = query.run(userId); + return result.changes > 0; + } +} diff --git a/packages/db/tsconfig.json b/packages/db/tsconfig.json new file mode 100644 index 0000000..bfa0fea --- /dev/null +++ b/packages/db/tsconfig.json @@ -0,0 +1,29 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +} |
