wut

author: polwex <polwex@sortug.com> 2025-11-23 01:12:53 +0700
committer: polwex <polwex@sortug.com> 2025-11-23 01:12:53 +0700
commit: cb1b56f5a0eddbf77446f415f2beda57c8305f85 (patch)
tree: d333ca5c143063af8ee1b2f9e2d1d25f8ef2007c /packages/db
16 files changed, 3652 insertions, 0 deletions
diff --git a/packages/db/.gitignore b/packages/db/.gitignore
new file mode 100644
index 0000000..a14702c
--- /dev/null
+++ b/packages/db/.gitignore
@@ -0,0 +1,34 @@
+# dependencies (bun install)
+node_modules
+
+# output
+out
+dist
+*.tgz
+
+# code coverage
+coverage
+*.lcov
+
+# logs
+logs
+_.log
+report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# caches
+.eslintcache
+.cache
+*.tsbuildinfo
+
+# IntelliJ based IDEs
+.idea
+
+# Finder (MacOS) folder config
+.DS_Store
diff --git a/packages/db/CLAUDE.md b/packages/db/CLAUDE.md
new file mode 100644
index 0000000..1ee6890
--- /dev/null
+++ b/packages/db/CLAUDE.md
@@ -0,0 +1,106 @@
+
+Default to using Bun instead of Node.js.
+
+- Use `bun <file>` instead of `node <file>` or `ts-node <file>`
+- Use `bun test` instead of `jest` or `vitest`
+- Use `bun build <file.html|file.ts|file.css>` instead of `webpack` or `esbuild`
+- Use `bun install` instead of `npm install` or `yarn install` or `pnpm install`
+- Use `bun run <script>` instead of `npm run <script>` or `yarn run <script>` or `pnpm run <script>`
+- Bun automatically loads .env, so don't use dotenv.
+
+## APIs
+
+- `Bun.serve()` supports WebSockets, HTTPS, and routes. Don't use `express`.
+- `bun:sqlite` for SQLite. Don't use `better-sqlite3`.
+- `Bun.redis` for Redis. Don't use `ioredis`.
+- `Bun.sql` for Postgres. Don't use `pg` or `postgres.js`.
+- `WebSocket` is built-in. Don't use `ws`.
+- Prefer `Bun.file` over `node:fs`'s readFile/writeFile
+- Bun.$`ls` instead of execa.
+
+## Testing
+
+Use `bun test` to run tests.
+
+```ts#index.test.ts
+import { test, expect } from "bun:test";
+
+test("hello world", () => {
+  expect(1).toBe(1);
+});
+```
+
+## Frontend
+
+Use HTML imports with `Bun.serve()`. Don't use `vite`. HTML imports fully support React, CSS, Tailwind.
+
+Server:
+
+```ts#index.ts
+import index from "./index.html"
+
+Bun.serve({
+  routes: {
+    "/": index,
+    "/api/users/:id": {
+      GET: (req) => {
+        return new Response(JSON.stringify({ id: req.params.id }));
+      },
+    },
+  },
+  // optional websocket support
+  websocket: {
+    open: (ws) => {
+      ws.send("Hello, world!");
+    },
+    message: (ws, message) => {
+      ws.send(message);
+    },
+    close: (ws) => {
+      // handle close
+    }
+  },
+  development: {
+    hmr: true,
+    console: true,
+  }
+})
+```
+
+HTML files can import .tsx, .jsx or .js files directly and Bun's bundler will transpile & bundle automatically. `<link>` tags can point to stylesheets and Bun's CSS bundler will bundle.
+
+```html#index.html
+<html>
+  <body>
+    <h1>Hello, world!</h1>
+    <script type="module" src="./frontend.tsx"></script>
+  </body>
+</html>
+```
+
+With the following `frontend.tsx`:
+
+```tsx#frontend.tsx
+import React from "react";
+
+// import .css files directly and it works
+import './index.css';
+
+import { createRoot } from "react-dom/client";
+
+const root = createRoot(document.body);
+
+export default function Frontend() {
+  return <h1>Hello, world!</h1>;
+}
+
+root.render(<Frontend />);
+```
+
+Then, run index.ts
+
+```sh
+bun --hot ./index.ts
+```
+
+For more information, read the Bun API docs in `node_modules/bun-types/docs/**.md`.
diff --git a/packages/db/README.md b/packages/db/README.md
new file mode 100644
index 0000000..94cf634
--- /dev/null
+++ b/packages/db/README.md
@@ -0,0 +1,15 @@
+# @sortug/sorlang-db
+
+To install dependencies:
+
+```bash
+bun install
+```
+
+To run:
+
+```bash
+bun run index.ts
+```
+
+This project was created using `bun init` in bun v1.3.1. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
diff --git a/packages/db/bun.lock b/packages/db/bun.lock
new file mode 100644
index 0000000..cedc049
--- /dev/null
+++ b/packages/db/bun.lock
@@ -0,0 +1,25 @@
+{
+  "lockfileVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "@sortug/sorlang-db",
+      "devDependencies": {
+        "@types/bun": "latest",
+      },
+      "peerDependencies": {
+        "typescript": "^5",
+      },
+    },
+  },
+  "packages": {
+    "@types/bun": ["@types/bun@1.3.3", "", { "dependencies": { "bun-types": "1.3.3" } }, "sha512-ogrKbJ2X5N0kWLLFKeytG0eHDleBYtngtlbu9cyBKFtNL3cnpDZkNdQj8flVf6WTZUX5ulI9AY1oa7ljhSrp+g=="],
+
+    "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="],
+
+    "bun-types": ["bun-types@1.3.3", "", { "dependencies": { "@types/node": "*" } }, "sha512-z3Xwlg7j2l9JY27x5Qn3Wlyos8YAp0kKRlrePAOjgjMGS5IG6E7Jnlx736vH9UVI4wUICwwhC9anYL++XeOgTQ=="],
+
+    "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+
+    "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
+  }
+}
diff --git a/packages/db/index.ts b/packages/db/index.ts
new file mode 100644
index 0000000..ceecd52
--- /dev/null
+++ b/packages/db/index.ts
@@ -0,0 +1,3 @@
+import DB from "./src";
+
+export default DB;
diff --git a/packages/db/package.json b/packages/db/package.json
new file mode 100644
index 0000000..a121b42
--- /dev/null
+++ b/packages/db/package.json
@@ -0,0 +1,11 @@
+{
+  "name": "@sortug/sorlang-db",
+  "module": "index.ts",
+  "type": "module",
+  "devDependencies": {
+    "@types/bun": "latest"
+  },
+  "peerDependencies": {
+    "typescript": "^5"
+  }
+}
diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts
new file mode 100644
index 0000000..6a89ee2
--- /dev/null
+++ b/packages/db/src/index.ts
@@ -0,0 +1,253 @@
+import { Database } from "bun:sqlite";
+import type { FullWordDataDB } from "./types";
+import { SRSQueries } from "./srs";
+
+export class Queries {
+  db: Database;
+  srs: SRSQueries;
+  constructor() {
+    const db = new Database("/home/y/code/bun/sorlang/bulkdata/unified.db");
+    db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
+    db.exec("PRAGMA foreign_keys = ON");
+    db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB
+    db.exec("PRAGMA mmap_size = 30000000000");
+    db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory
+    db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster
+
+    this.db = db;
+    this.srs = new SRSQueries(db);
+  }
+  fetchExpressionById(id: number) {
+    const query = this.db.query(
+      `
+      SELECT * FROM expressions WHERE id = ?
+      `,
+    );
+    return query.get(id) as any;
+  }
+  fetchWordsByToneAndSyls1(tones: Array<string | null>) {
+    const toneString = tones
+      .reduce((acc: string, item) => {
+        if (!item) return `${acc},%`;
+        else return `${acc},${item}`;
+      }, "")
+      .slice(1);
+    const query = this.db.query(
+      `
+      WITH word_tone_sequences AS (
+          SELECT 
+              wp.ipa,
+              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+              COUNT(sw.syl_id) as syllable_count
+          FROM word_phonetics wp
+          JOIN syllables_words sw ON wp.id = sw.word_id
+          JOIN syllables sy ON sw.syl_id = sy.id
+          JOIN tones t ON sy.tone = t.id
+          GROUP BY wp.ipa
+      )        
+        SELECT *
+        FROM word_tone_sequences
+        WHERE tone_sequence LIKE ?
+        AND syllable_count = ?
+      `,
+    );
+    return query.all(toneString, tones.length) as any[];
+  }
+  fetchWordsByToneAndSylsO(tones: Array<string | null>) {
+    const toneString = tones
+      .reduce((acc: string, item) => {
+        if (!item) return `${acc},%`;
+        else return `${acc},${item}`;
+      }, "")
+      .slice(1);
+    const query = this.db.query(
+      `
+      WITH word_tone_sequences AS (
+          SELECT 
+              w.id as word_id,
+              w.spelling,
+              wp.ipa,
+              w.frequency,
+              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+              COUNT(sw.syl_id) as syllable_count
+          FROM expressions w
+          JOIN word_phonetics wp ON w.id = wp.word_id
+          JOIN syllables_words sw ON wp.id = sw.word_id
+          JOIN syllables sy ON sw.syl_id = sy.id
+          JOIN tones t ON sy.tone = t.id
+          GROUP BY w.id, w.spelling, w.lang, w.frequency
+      )        
+        SELECT word_id,
+               spelling,
+               ipa,
+               frequency,
+               syl_seq,
+               tone_sequence,
+               syllable_count
+        FROM word_tone_sequences
+        WHERE tone_sequence LIKE ?
+        AND syllable_count = ?
+        ORDER BY frequency ASC NULLS LAST;
+      `,
+    );
+    // TODO combine with this old query to get both phonetic and semantic data
+    // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+    return query.all(toneString, tones.length) as any[];
+  }
+  fetchWordsByToneAndSyls(tones: Array<string | null>): FullWordDataDB[] {
+    const toneString = tones
+      .reduce((acc: string, item) => {
+        if (!item) return `${acc},%`;
+        else return `${acc},${item}`;
+      }, "")
+      .slice(1);
+    const query = this.db.query(
+      `
+      WITH word_tone_sequences AS (
+          SELECT 
+              w.id as word_id,
+              w.spelling,
+              wp.ipa,
+              w.frequency,
+              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+              COUNT(sw.syl_id) as syllable_count,
+              (SELECT
+                json_group_array(json_object(
+                  'id', s.id,
+                  'pos', s.pos,
+                  'etymology', s.etymology,
+                  'confidence', s.confidence,
+                  'glosses', (
+                    SELECT json_group_array(ss.gloss)
+                    FROM subsenses ss
+                    WHERE ss.sid = s.id
+                  ),
+                  'examples', (
+                    SELECT json_group_array(json_object(
+                      'example', ex.example,
+                      'ref', ex.ref
+                    ))
+                    FROM examples ex
+                    WHERE ex.sid = s.id
+                  ),
+                  'derivation', (
+                    SELECT json_group_array(json_object(
+                      'type', d.type,
+                      'text', d.text,
+                      'tags', d.tags
+                    ))
+                    FROM derivation d
+                    WHERE d.sid = s.id
+                  ),
+                  'categories', (
+                    SELECT json_group_array(wc.category)
+                    FROM word_categories wc
+                    WHERE wc.word_id = s.id
+                  )
+                ))
+                FROM senses s
+                WHERE s.parent_id = w.id
+              ) as senses_array
+          FROM expressions w
+          JOIN word_phonetics wp ON w.id = wp.word_id
+          JOIN syllables_words sw ON wp.id = sw.word_id
+          JOIN syllables sy ON sw.syl_id = sy.id
+          JOIN tones t ON sy.tone = t.id
+          GROUP BY w.id, w.spelling, w.lang, w.frequency
+      )        
+        SELECT *
+        FROM word_tone_sequences
+        WHERE tone_sequence LIKE ?
+        AND syllable_count = ?
+        ORDER BY frequency ASC NULLS LAST;
+      `,
+    );
+    // TODO combine with this old query to get both phonetic and semantic data
+    // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+    return query.all(toneString, tones.length) as any[];
+  }
+  // TODO combine with this old query to get both phonetic and semantic data
+  // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+  fetchSenses(spelling: string, lang: string) {
+    const query = this.db.query(`
+      WITH sense_data AS (
+        SELECT
+          s.*,
+          GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+          GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+          GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+          GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+        FROM senses s
+        LEFT JOIN subsenses ss ON ss.sid = s.id
+        LEFT JOIN examples ex ON ex.sid = s.id
+        LEFT JOIN derivation d ON d.sid = s.id
+        LEFT JOIN word_categories wc ON wc.word_id = s.id
+        GROUP BY s.id
+      )
+      SELECT e.*,
+        (SELECT
+          json_group_array(json_object(
+            'id', sd.id,
+            'pos', sd.pos,
+            'etymology', sd.etymology,
+            'confidence', sd.confidence,
+            'subsenses_data', sd.subsenses_data,
+            'examples_data', sd.examples_data,
+            'derivation_data', sd.derivation_data,
+            'categories_data', sd.categories_data
+          ))
+          FROM sense_data sd
+          WHERE sd.parent_id = e.id
+        ) as senses_array
+      FROM expressions e
+      WHERE e.spelling = ? AND e.lang = ?
+      ORDER BY e.frequency DESC`);
+    return query.all(spelling, lang);
+  }
+}
+export default Queries;
+// `
+//       WITH word_tone_sequences AS (
+//           SELECT
+//               w.id as word_id,
+//               w.spelling,
+//               wp.ipa,
+//               w.frequency,
+//               s.etymology,
+//               s.pos,
+//               GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq,
+//               GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+//               // GROUP_CONCAT(t.name ORDER BY s.id) as senses,
+//               // GROUP_CONCAT(ex.example ORDER BY s.id) as examples,
+//               // GROUP_CONCAT(cat.category) as tags,
+//               COUNT(sw.syl_id) as syllable_count
+//           FROM expressions w
+//           JOIN word_phonetics wp ON w.id = wp.word_id
+//           JOIN syllables_words sw ON wp.id = sw.word_id
+//           JOIN syllables s ON sw.syl_id = s.id
+//           JOIN tones t ON s.tone = t.id
+//           JOIN senses s ON s.parent_id = w.id
+//           JOIN word_categories cat ON s.id = cat.word_id
+//           JOIN subsenses ss ON ss.sid = s.id
+//           JOIN examples ex ON ex.sid = s.id
+//           GROUP BY w.id, w.spelling, w.lang, w.frequency
+//       )
+//         SELECT
+//             word_id,
+//             spelling,
+//             ipa,
+//             frequency,
+//             syl_seq,
+//             tone_sequence,
+//             syllable_count
+//         FROM word_tone_sequences
+//         WHERE tone_sequence LIKE ?
+//         AND syllable_count = ?
+//         ORDER BY frequency ASC NULLS LAST;
+//       `
diff --git a/packages/db/src/migration.ts b/packages/db/src/migration.ts
new file mode 100644
index 0000000..c4150f8
--- /dev/null
+++ b/packages/db/src/migration.ts
@@ -0,0 +1,430 @@
+import { Database } from "bun:sqlite";
+
+const PROSODY_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/prosody.db";
+const THAIPHON_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/thaiphon.db";
+const UNIFIED_DB = "/home/y/code/bun/ssr/claudesorlang/bulkdata/unified.db";
+
+async function migrateSenses() {
+  console.log("Starting database migration to unified.db...\n");
+  const expressionIdMap: Map<number, number> = new Map();
+  const senseIdMap: Map<number, number> = new Map();
+  const wordExpressionMap: Map<number, number> = new Map();
+
+  // Open all databases
+  const prosodyDb = new Database(PROSODY_DB, { readonly: true });
+  const thaiphonDb = new Database(THAIPHON_DB, { readonly: true });
+  const unifiedDb = new Database(UNIFIED_DB);
+
+  // Enable foreign keys and WAL mode for unified database
+  unifiedDb.run("PRAGMA foreign_keys = ON");
+  unifiedDb.run("PRAGMA journal_mode = WAL");
+
+  try {
+    // Start transaction for atomic migration
+    unifiedDb.run("BEGIN TRANSACTION");
+
+    console.log("📦 Migrating from prosody.db...");
+
+    // 1. Migrate languages (combining both sources, preferring prosody.db)
+    console.log("  → Migrating categories...");
+    // categories
+    const prosodyCats = prosodyDb
+      .query("SELECT * FROM categories")
+      .all() as any[];
+    const insertCat = unifiedDb.prepare(
+      `INSERT INTO categories (name) VALUES (?)
+         ON CONFLICT(name) DO NOTHING
+         RETURNING rowid
+        `,
+    );
+    for (const cat of prosodyCats) {
+      const cr = insertCat.run(cat.name.toLowerCase().trim());
+      // console.log({ cr });
+      if (cr.changes !== 1) console.log({ cr });
+    }
+    console.log(`    ✓ Migrated ${prosodyCats.length} categories`);
+
+    console.log("  → Migrating languages...");
+    const prosodyLangs = prosodyDb
+      .query("SELECT * FROM languages")
+      .all() as any[];
+
+    // Add thaiphon languages first
+    // Override with prosody languages (they have more complete data)
+    for (const lang of prosodyLangs) {
+      // Insert merged languages
+      const insertLang = unifiedDb.prepare(
+        "INSERT OR IGNORE INTO languages (code, name, native_name, iso6392) VALUES (?, ?, ?, ?)",
+      );
+
+      insertLang.run(lang.code, lang.name, lang.native_name, null);
+    }
+    console.log(`    ✓ Migrated ${prosodyLangs.length} languages`);
+
+    // 2. Migrate expressions from prosody.db
+    console.log("  → Migrating expressions...");
+    const expressions = prosodyDb
+      .query("SELECT * FROM expressions")
+      .all() as any[];
+    const insertExpression = unifiedDb.prepare(
+      "INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?)",
+    );
+
+    for (const expr of expressions) {
+      const ei = insertExpression.run(
+        expr.spelling,
+        expr.lang,
+        expr.frequency,
+        expr.type,
+        expr.notes,
+      );
+      if (ei.changes > 0)
+        expressionIdMap.set(expr.id, Number(ei.lastInsertRowid));
+    }
+
+    const insertExample = unifiedDb.prepare(
+      "INSERT INTO examples(sid, example, ref) VALUES (?, ?, ?)",
+    );
+    const insertSubsense = unifiedDb.prepare(
+      "INSERT INTO subsenses (sid, gloss) VALUES (?, ?)",
+    );
+    const insertDerivation = unifiedDb.prepare(
+      "INSERT INTO derivation (sid, type, text) VALUES (?, ?, ?)",
+    );
+    const insertCatw = unifiedDb.prepare(
+      "INSERT OR IGNORE INTO word_categories(category, word_id) VALUES (?, ?)",
+    );
+    const insertWp = unifiedDb.prepare(
+      "INSERT INTO word_phonetics(word_id, sense_id, ipa, ipa_sequence, syllable_count, syllable_sequence, stressed, tone_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+    );
+    console.log("  → Migrating senses...");
+    const senses = prosodyDb.query("SELECT * FROM senses").all() as any[];
+    const insertSense = unifiedDb.prepare(
+      "INSERT INTO senses (parent_id, spelling, pos, etymology, confidence) VALUES (?, ?, ?, ?, ?)",
+    );
+
+    for (const sense of senses) {
+      const parentId = expressionIdMap.get(sense.parent_id);
+      const senseRow = insertSense.run(
+        parentId,
+        sense.spelling,
+        sense.pos,
+        sense.etymology,
+        sense.confidence,
+      );
+      const senseId = senseRow.lastInsertRowid;
+      senseIdMap.set(sense.id, Number(senseId));
+      const deriv = (
+        name: string,
+        type: Array<{ word: string }> | undefined,
+      ) => {
+        if (type)
+          for (const t of type) insertDerivation.run(senseId, name, t.word);
+      };
+      const subsenses = JSON.parse(sense.senses);
+      for (const ss of subsenses) {
+        const {
+          examples,
+          glosses,
+          tags,
+          synonyms,
+          antonyms,
+          hypernyms,
+          meronyms,
+          hyponyms,
+          holonyms,
+        } = ss;
+        if (examples)
+          for (const ex of examples) {
+            const ref = ex.ref || null;
+
+            if (ex.text) insertExample.run(senseId, ex.text, ref);
+            // else console.log("bad example", ex);
+          }
+        if (glosses)
+          for (const gloss of glosses) insertSubsense.run(senseId, gloss);
+        if (tags)
+          for (const tag of tags) {
+            const lw = tag.toLowerCase().trim();
+            insertCat.run(lw);
+            insertCatw.run(lw, senseId);
+          }
+        if (synonyms) deriv("synonym", synonyms);
+        if (antonyms) deriv("antonym", antonyms);
+        if (hypernyms) deriv("hypernym", hypernyms);
+        if (meronyms) deriv("meronym", meronyms);
+        if (hyponyms) deriv("hyponym", hyponyms);
+        if (holonyms) deriv("holonym", holonyms);
+      }
+      const prosody = JSON.parse(sense.prosody);
+      const stressed = prosody?.stressedSyllable || null;
+      if (!sense.ipa) {
+        console.log("no ipa", sense);
+        continue;
+      }
+      const ipa = JSON.parse(sense.ipa);
+      for (const ip of ipa) {
+        insertWp.run(
+          parentId,
+          senseId,
+          ip.ipa,
+          ip.ipa,
+          0,
+          "",
+          stressed,
+          "",
+          ip.tag,
+          null,
+        );
+      }
+    }
+    console.log(`    ✓ Migrated ${senses.length} senses`);
+
+    console.log("\n📦 Migrating from thaiphon.db...");
+
+    // 6. Migrate phonetic components from thaiphon.db
+    console.log("  → Migrating phonetic components...");
+
+    // Tones
+    const tones = thaiphonDb.query("SELECT * FROM tones").all() as any;
+    const insertTone = unifiedDb.prepare(
+      "INSERT INTO tones (id, ipa, lang, name, nums) VALUES (?, ?, ?, ?, ?)",
+    );
+    for (const tone of tones) {
+      insertTone.run(tone.id, tone.ipa, tone.lang, tone.name, tone.nums);
+    }
+    console.log(`    ✓ Migrated ${tones.length} tones`);
+
+    // Onsets
+    const onsets = thaiphonDb.query("SELECT * FROM onsets").all() as any;
+    const insertOnset = unifiedDb.prepare(
+      "INSERT INTO onsets (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+    );
+    for (const onset of onsets) {
+      insertOnset.run(onset.id, onset.ipa, onset.text, onset.lang);
+    }
+    console.log(`    ✓ Migrated ${onsets.length} onsets`);
+
+    // Medials
+    const medials = thaiphonDb.query("SELECT * FROM medials").all() as any;
+    const insertMedial = unifiedDb.prepare(
+      "INSERT INTO medials (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+    );
+    for (const medial of medials) {
+      insertMedial.run(medial.id, medial.ipa, medial.text, medial.lang) as any;
+    }
+    console.log(`    ✓ Migrated ${medials.length} medials`);
+
+    // Nucleus
+    const nucleus = thaiphonDb.query("SELECT * FROM nucleus").all() as any;
+    const insertNucleus = unifiedDb.prepare(
+      "INSERT INTO nucleus (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+    );
+    for (const nucl of nucleus) {
+      insertNucleus.run(nucl.id, nucl.ipa, nucl.text, nucl.lang);
+    }
+    console.log(`    ✓ Migrated ${nucleus.length} nucleus`);
+
+    // Codas
+    const codas = thaiphonDb.query("SELECT * FROM codas").all() as any;
+    const insertCoda = unifiedDb.prepare(
+      "INSERT INTO codas (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+    );
+    for (const coda of codas) {
+      insertCoda.run(coda.id, coda.ipa, coda.text, coda.lang) as any;
+    }
+    console.log(`    ✓ Migrated ${codas.length} codas`);
+
+    // Rhymes
+    const rhymes = thaiphonDb.query("SELECT * FROM rhymes").all() as any;
+    const insertRhyme = unifiedDb.prepare(
+      "INSERT INTO rhymes (id, ipa, text, lang) VALUES (?, ?, ?, ?)",
+    );
+    for (const rhyme of rhymes) {
+      insertRhyme.run(rhyme.id, rhyme.ipa, rhyme.text, rhyme.lang);
+    }
+    console.log(`    ✓ Migrated ${rhymes.length} rhymes`);
+
+    // 7. Migrate syllables from thaiphon.db
+    console.log("  → Migrating syllables...");
+    const syllables = thaiphonDb.query("SELECT * FROM syllables").all() as any;
+    const insertSyllable = unifiedDb.prepare(
+      "INSERT INTO syllables (id, lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+    );
+    for (const syl of syllables) {
+      insertSyllable.run(
+        syl.id,
+        syl.lang,
+        syl.ipa,
+        syl.long,
+        syl.text,
+        syl.onset,
+        syl.medial,
+        syl.nucleus,
+        syl.coda,
+        syl.rhyme,
+        syl.tone,
+        syl.notes,
+      );
+    }
+    console.log(`    ✓ Migrated ${syllables.length} syllables`);
+
+    // 8. Migrate words from thaiphon.db (as expressions if not exists)
+    console.log("  → Migrating words as expressions...");
+    const words = thaiphonDb.query("SELECT * FROM words").all() as any[];
+    const insertWordAsExpr = unifiedDb.query(
+      `INSERT INTO expressions (spelling, lang, frequency, type, notes) VALUES (?, ?, ?, ?, ?)
+      ON CONFLICT(spelling, lang) DO UPDATE SET spelling = excluded.spelling
+      RETURNING rowid`,
+    );
+
+    // Also keep track for syllables_words mapping
+    let prev: any = null;
+    for (const word of words) {
+      const result = insertWordAsExpr.get(
+        word.spelling,
+        word.lang,
+        word.frequency,
+        "word",
+        word.notes,
+      ) as { id: number };
+      // 41085
+      console.log({ result });
+      // if (prev === result.id) {
+      //   console.log(word, result);
+      //   throw new Error("fucked up again");
+      // }
+      // prev = result.id;
+      wordExpressionMap.set(word.id, result.id);
+    }
+    console.log(`    ✓ Processed ${words.length} words`);
+
+    console.log("  → Migrating word phonetics...");
+    const wordPhonetics = thaiphonDb
+      .query("SELECT * FROM word_phonetics")
+      .all() as any[];
+    const insertWordPhonetic = unifiedDb.prepare(
+      "INSERT INTO word_phonetics (word_id, sense_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+    );
+    // TODO find the sense id later I guess
+    const wpMap: Map<number, number> = new Map();
+    for (const wp of wordPhonetics) {
+      const newWordId = wordExpressionMap.get(wp.word_id);
+      if (newWordId) {
+        const wr = insertWordPhonetic.run(
+          newWordId,
+          null,
+          wp.ipa,
+          wp.syllable_count,
+          wp.syllable_sequence,
+          wp.tone_sequence,
+          wp.ipa_sequence,
+          wp.tag,
+          wp.notes,
+        );
+        if (wr.changes !== 0) wpMap.set(wp.word_id, Number(wr.lastInsertRowid));
+      }
+    }
+
+    // 9. Migrate syllables_words junction
+    console.log("  → Migrating syllables_words...");
+    const sylWords = thaiphonDb
+      .query("SELECT * FROM syllables_words")
+      .all() as any;
+    const insertSylWord = unifiedDb.prepare(
+      "INSERT OR IGNORE INTO syllables_words (syl_id, word_id, idx, stressed) VALUES (?, ?, ?, ?)",
+    );
+    for (const sw of sylWords) {
+      const newWordId = wpMap.get(sw.word_id);
+      if (newWordId)
+        insertSylWord.run(sw.syl_id, newWordId, sw.idx, sw.stressed);
+    }
+    console.log(
+      `    ✓ Migrated ${sylWords.length} syllable-word relationships`,
+    );
+
+    unifiedDb.run("COMMIT");
+
+    console.log("\n✅ Migration completed successfully!");
+    console.log("   Unified database is ready at:", UNIFIED_DB);
+  } catch (error) {
+    console.error("\n❌ Migration failed:", error);
+    unifiedDb.run("ROLLBACK");
+    throw error;
+  } finally {
+    // Close all databases
+    prosodyDb.close();
+    // thaiphonDb.close();
+    unifiedDb.close();
+  }
+}
+
+// Run the migration
+migrateSenses();
+// migrateTones();
+// test();
+//
+
+async function test() {
+  console.log("Starting database migration to unified.db...\n");
+  // Open all databases
+  const prosodyDb = new Database(PROSODY_DB, { readonly: true });
+  const thaiphonDb = new Database(THAIPHON_DB, { readonly: true });
+  const unifiedDb = new Database(UNIFIED_DB, { readonly: true });
+
+  // Enable foreign keys and WAL mode for unified database
+  unifiedDb.run("PRAGMA foreign_keys = ON");
+  unifiedDb.run("PRAGMA journal_mode = WAL");
+  const oldE = prosodyDb.query("SELECT COUNT(*) FROM expressions").get();
+  const oldS = prosodyDb.query("SELECT COUNT(*) FROM senses").get();
+  const oldW = thaiphonDb.query("SELECT COUNT(*) FROM words").get();
+  const oldWp = thaiphonDb.query("SELECT COUNT(*) FROM word_phonetics").get();
+  const oldSw = thaiphonDb.query("SELECT COUNT(*) FROM syllables_words").get();
+  const oldSy = thaiphonDb.query("SELECT COUNT(*) FROM syllables").get();
+  const oldTo = thaiphonDb.query("SELECT COUNT(*) FROM tones").get();
+  const oldCod = thaiphonDb.query("SELECT COUNT(*) FROM codas").get();
+  const oldNuc = thaiphonDb.query("SELECT COUNT(*) FROM nucleus").get();
+  const oldOns = thaiphonDb.query("SELECT COUNT(*) FROM onsets").get();
+  const oldRhy = thaiphonDb.query("SELECT COUNT(*) FROM rhymes").get();
+  const oldWrh = thaiphonDb.query("SELECT COUNT(*) FROM word_rhymes").get();
+  const oldWry = thaiphonDb.query("SELECT COUNT(*) FROM words_wrhymes").get();
+  const newSw = unifiedDb.query("SELECT COUNT(*) FROM syllables_words").get();
+  const newSy = unifiedDb.query("SELECT COUNT(*) FROM syllables").get();
+  const newTo = unifiedDb.query("SELECT COUNT(*) FROM tones").get();
+  const newCod = unifiedDb.query("SELECT COUNT(*) FROM codas").get();
+  const newNuc = unifiedDb.query("SELECT COUNT(*) FROM nucleus").get();
+  const newOns = unifiedDb.query("SELECT COUNT(*) FROM onsets").get();
+  const newRhy = unifiedDb.query("SELECT COUNT(*) FROM rhymes").get();
+  const newWrh = unifiedDb.query("SELECT COUNT(*) FROM word_rhymes").get();
+  const newWry = unifiedDb.query("SELECT COUNT(*) FROM words_wrhymes").get();
+  const newE = unifiedDb.query("SELECT COUNT(*) FROM expressions").get();
+  const newS = unifiedDb.query("SELECT COUNT(*) FROM senses").get();
+  const newWp = unifiedDb.query("SELECT COUNT(*) FROM word_phonetics").get();
+  console.log({
+    oldE,
+    oldS,
+    oldW,
+    oldWp,
+    newE,
+    newS,
+    newWp,
+    oldSw,
+    oldSy,
+    oldTo,
+    oldCod,
+    oldNuc,
+    oldOns,
+    oldRhy,
+    oldWrh,
+    oldWry,
+    newSw,
+    newSy,
+    newTo,
+    newCod,
+    newNuc,
+    newOns,
+    newRhy,
+    newWrh,
+    newWry,
+  });
+}
diff --git a/packages/db/src/phonetics.ts b/packages/db/src/phonetics.ts
new file mode 100644
index 0000000..cf62434
--- /dev/null
+++ b/packages/db/src/phonetics.ts
@@ -0,0 +1,523 @@
+import { Database } from 'bun:sqlite';
+
+export interface Syllable {
+  id: number;
+  lang: string;
+  ipa: string;
+  long: number;
+  text: string;
+  onset: number;
+  medial: number;
+  nucleus: number;
+  coda: number;
+  rhyme: number;
+  tone: number;
+  notes?: string;
+}
+
+export interface Tone {
+  id: number;
+  ipa: string;
+  lang: string;
+  name: string;
+  nums: number;
+}
+
+export interface Onset {
+  id: number;
+  ipa: string;
+  text: string;
+  lang: string;
+}
+
+export interface Medial {
+  id: number;
+  ipa: string;
+  text: string;
+  lang: string;
+}
+
+export interface Nucleus {
+  id: number;
+  ipa: string;
+  text: string;
+  lang: string;
+}
+
+export interface Coda {
+  id: number;
+  ipa: string;
+  text: string;
+  lang: string;
+}
+
+export interface Rhyme {
+  id: number;
+  ipa: string;
+  text: string;
+  lang: string;
+}
+
+export interface WordPhonetics {
+  id?: number;
+  word_id: number;
+  ipa: string;
+  syllable_count: number;
+  syllable_sequence: string;
+  tone_sequence: string;
+  ipa_sequence: string;
+  tag?: string;
+  notes?: string;
+}
+
+export interface WordRhyme {
+  id: number;
+  text: string;
+  lang: string;
+  notes?: string;
+}
+
+export interface Idiom {
+  id: number;
+  spelling: string;
+  lang: string;
+  frequency?: number;
+}
+
+export interface SyllableWordMapping {
+  syl_id: number;
+  word_id: number;
+  idx: number;
+  stressed?: number;
+}
+
+export class PhoneticsQueries {
+  constructor(private db: Database) {}
+
+  // Tone operations
+  getTones(lang?: string): Tone[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, lang, name, nums
+        FROM tones
+        WHERE lang = ?
+        ORDER BY nums
+      `);
+      return query.all(lang) as Tone[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, lang, name, nums
+        FROM tones
+        ORDER BY lang, nums
+      `);
+      return query.all() as Tone[];
+    }
+  }
+
+  getToneById(id: number): Tone | null {
+    const query = this.db.query(`
+      SELECT id, ipa, lang, name, nums
+      FROM tones
+      WHERE id = ?
+    `);
+    return query.get(id) as Tone | null;
+  }
+
+  getToneByName(name: string, lang: string): Tone | null {
+    const query = this.db.query(`
+      SELECT id, ipa, lang, name, nums
+      FROM tones
+      WHERE name = ? AND lang = ?
+    `);
+    return query.get(name, lang) as Tone | null;
+  }
+
+  // Syllable component operations
+  getOnsets(lang?: string): Onset[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM onsets
+        WHERE lang = ?
+        ORDER BY text
+      `);
+      return query.all(lang) as Onset[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM onsets
+        ORDER BY lang, text
+      `);
+      return query.all() as Onset[];
+    }
+  }
+
+  getMedials(lang?: string): Medial[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM medials
+        WHERE lang = ?
+        ORDER BY text
+      `);
+      return query.all(lang) as Medial[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM medials
+        ORDER BY lang, text
+      `);
+      return query.all() as Medial[];
+    }
+  }
+
+  getNucleus(lang?: string): Nucleus[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM nucleus
+        WHERE lang = ?
+        ORDER BY text
+      `);
+      return query.all(lang) as Nucleus[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM nucleus
+        ORDER BY lang, text
+      `);
+      return query.all() as Nucleus[];
+    }
+  }
+
+  getCodas(lang?: string): Coda[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM codas
+        WHERE lang = ?
+        ORDER BY text
+      `);
+      return query.all(lang) as Coda[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM codas
+        ORDER BY lang, text
+      `);
+      return query.all() as Coda[];
+    }
+  }
+
+  getRhymes(lang?: string): Rhyme[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM rhymes
+        WHERE lang = ?
+        ORDER BY text
+      `);
+      return query.all(lang) as Rhyme[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, ipa, text, lang
+        FROM rhymes
+        ORDER BY lang, text
+      `);
+      return query.all() as Rhyme[];
+    }
+  }
+
+  // Complete syllable operations
+  getSyllables(lang?: string, tone?: number): Syllable[] {
+    if (lang && tone !== undefined) {
+      const query = this.db.query(`
+        SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+               o.text as onset_text, m.text as medial_text, n.text as nucleus_text, 
+               c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+        FROM syllables s
+        LEFT JOIN onsets o ON s.onset = o.id
+        LEFT JOIN medials m ON s.medial = m.id
+        LEFT JOIN nucleus n ON s.nucleus = n.id
+        LEFT JOIN codas c ON s.coda = c.id
+        LEFT JOIN rhymes r ON s.rhyme = r.id
+        LEFT JOIN tones t ON s.tone = t.id
+        WHERE s.lang = ? AND s.tone = ?
+        ORDER BY s.text
+      `);
+      return query.all(lang, tone) as Syllable[];
+    } else if (lang) {
+      const query = this.db.query(`
+        SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+               o.text as onset_text, m.text as medial_text, n.text as nucleus_text, 
+               c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+        FROM syllables s
+        LEFT JOIN onsets o ON s.onset = o.id
+        LEFT JOIN medials m ON s.medial = m.id
+        LEFT JOIN nucleus n ON s.nucleus = n.id
+        LEFT JOIN codas c ON s.coda = c.id
+        LEFT JOIN rhymes r ON s.rhyme = r.id
+        LEFT JOIN tones t ON s.tone = t.id
+        WHERE s.lang = ?
+        ORDER BY s.text, s.tone
+      `);
+      return query.all(lang) as Syllable[];
+    } else {
+      const query = this.db.query(`
+        SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+               o.text as onset_text, m.text as medial_text, n.text as nucleus_text, 
+               c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+        FROM syllables s
+        LEFT JOIN onsets o ON s.onset = o.id
+        LEFT JOIN medials m ON s.medial = m.id
+        LEFT JOIN nucleus n ON s.nucleus = n.id
+        LEFT JOIN codas c ON s.coda = c.id
+        LEFT JOIN rhymes r ON s.rhyme = r.id
+        LEFT JOIN tones t ON s.tone = t.id
+        ORDER BY s.lang, s.text, s.tone
+      `);
+      return query.all() as Syllable[];
+    }
+  }
+
+  getSyllableById(id: number): Syllable | null {
+    const query = this.db.query(`
+      SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+             o.text as onset_text, m.text as medial_text, n.text as nucleus_text, 
+             c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+      FROM syllables s
+      LEFT JOIN onsets o ON s.onset = o.id
+      LEFT JOIN medials m ON s.medial = m.id
+      LEFT JOIN nucleus n ON s.nucleus = n.id
+      LEFT JOIN codas c ON s.coda = c.id
+      LEFT JOIN rhymes r ON s.rhyme = r.id
+      LEFT JOIN tones t ON s.tone = t.id
+      WHERE s.id = ?
+    `);
+    return query.get(id) as Syllable | null;
+  }
+
+  // Word phonetics operations
+  getWordPhonetics(wordId: number): WordPhonetics | null {
+    const query = this.db.query(`
+      SELECT id, word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes
+      FROM word_phonetics
+      WHERE word_id = ?
+    `);
+    return query.get(wordId) as WordPhonetics | null;
+  }
+
+  getWordsByTonePattern(toneSequence: string, syllableCount?: number, limit: number = 50): number[] {
+    if (syllableCount !== undefined) {
+      const query = this.db.query(`
+        SELECT word_id
+        FROM word_phonetics
+        WHERE tone_sequence = ? AND syllable_count = ?
+        ORDER BY id
+        LIMIT ?
+      `);
+      const results = query.all(toneSequence, syllableCount, limit) as { word_id: number }[];
+      return results.map(r => r.word_id);
+    } else {
+      const query = this.db.query(`
+        SELECT word_id
+        FROM word_phonetics
+        WHERE tone_sequence = ?
+        ORDER BY syllable_count, id
+        LIMIT ?
+      `);
+      const results = query.all(toneSequence, limit) as { word_id: number }[];
+      return results.map(r => r.word_id);
+    }
+  }
+
+  getWordsBySyllablePattern(syllableSequence: string, limit: number = 50): number[] {
+    const query = this.db.query(`
+      SELECT word_id
+      FROM word_phonetics
+      WHERE syllable_sequence = ?
+      ORDER BY id
+      LIMIT ?
+    `);
+    const results = query.all(syllableSequence, limit) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  searchWordsByPhoneticPattern(pattern: {
+    toneSequence?: string;
+    syllableCount?: number;
+    minSyllableCount?: number;
+    maxSyllableCount?: number;
+    limit?: number;
+  }): number[] {
+    let conditions: string[] = [];
+    let params: any[] = [];
+    
+    if (pattern.toneSequence) {
+      conditions.push('tone_sequence LIKE ?');
+      params.push(`%${pattern.toneSequence}%`);
+    }
+    
+    if (pattern.syllableCount !== undefined) {
+      conditions.push('syllable_count = ?');
+      params.push(pattern.syllableCount);
+    }
+    
+    if (pattern.minSyllableCount !== undefined) {
+      conditions.push('syllable_count >= ?');
+      params.push(pattern.minSyllableCount);
+    }
+    
+    if (pattern.maxSyllableCount !== undefined) {
+      conditions.push('syllable_count <= ?');
+      params.push(pattern.maxSyllableCount);
+    }
+
+    const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
+    const limit = pattern.limit || 50;
+    params.push(limit);
+
+    const query = this.db.query(`
+      SELECT word_id
+      FROM word_phonetics
+      ${whereClause}
+      ORDER BY syllable_count, id
+      LIMIT ?
+    `);
+    
+    const results = query.all(...params) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  // Word-syllable mapping operations
+  getSyllablesForWord(wordId: number): SyllableWordMapping[] {
+    const query = this.db.query(`
+      SELECT sw.syl_id, sw.word_id, sw.idx, sw.stressed,
+             s.text as syllable_text, s.ipa as syllable_ipa, s.tone, s.long,
+             t.name as tone_name
+      FROM syllables_words sw
+      JOIN syllables s ON sw.syl_id = s.id
+      LEFT JOIN tones t ON s.tone = t.id
+      WHERE sw.word_id = ?
+      ORDER BY sw.idx
+    `);
+    return query.all(wordId) as SyllableWordMapping[];
+  }
+
+  getWordsForSyllable(syllableId: number): number[] {
+    const query = this.db.query(`
+      SELECT DISTINCT word_id
+      FROM syllables_words
+      WHERE syl_id = ?
+      ORDER BY word_id
+    `);
+    const results = query.all(syllableId) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  // Rhyme operations
+  getWordRhymes(wordId: number): WordRhyme[] {
+    const query = this.db.query(`
+      SELECT wr.id, wr.text, wr.lang, wr.notes
+      FROM word_rhymes wr
+      JOIN words_wrhymes wwr ON wr.id = wwr.wrhyme_id
+      WHERE wwr.word_id = ?
+    `);
+    return query.all(wordId) as WordRhyme[];
+  }
+
+  getWordsByRhyme(rhymeText: string, lang: string, limit: number = 50): number[] {
+    const query = this.db.query(`
+      SELECT DISTINCT ww.word_id
+      FROM word_rhymes wr
+      JOIN words_wrhymes ww ON wr.id = ww.wrhyme_id
+      WHERE wr.text = ? AND wr.lang = ?
+      LIMIT ?
+    `);
+    const results = query.all(rhymeText, lang, limit) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  // Idiom operations
+  getIdioms(lang?: string): Idiom[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency
+        FROM idioms
+        WHERE lang = ?
+        ORDER BY frequency DESC, spelling
+      `);
+      return query.all(lang) as Idiom[];
+    } else {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency
+        FROM idioms
+        ORDER BY lang, frequency DESC, spelling
+      `);
+      return query.all() as Idiom[];
+    }
+  }
+
+  getIdiomsForWord(wordId: number): Idiom[] {
+    const query = this.db.query(`
+      SELECT i.id, i.spelling, i.lang, i.frequency
+      FROM idioms i
+      JOIN words_idioms wi ON i.id = wi.idiom_id
+      WHERE wi.word_id = ?
+      ORDER BY i.frequency DESC, i.spelling
+    `);
+    return query.all(wordId) as Idiom[];
+  }
+
+  // Phonetic statistics and analysis
+  getPhoneticStats(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        COUNT(DISTINCT s.id) as total_syllables,
+        COUNT(DISTINCT s.tone) as unique_tones,
+        AVG(s.long) as avg_vowel_length,
+        COUNT(DISTINCT s.onset) as unique_onsets,
+        COUNT(DISTINCT s.medial) as unique_medials,
+        COUNT(DISTINCT s.nucleus) as unique_nucleus,
+        COUNT(DISTINCT s.coda) as unique_codas,
+        COUNT(DISTINCT s.rhyme) as unique_rhymes,
+        COUNT(DISTINCT wp.id) as total_word_phonetics,
+        AVG(wp.syllable_count) as avg_syllables_per_word
+      FROM syllables s
+      LEFT JOIN word_phonetics wp ON 1=1
+      WHERE s.lang = ?
+    `);
+    return query.get(lang);
+  }
+
+  getToneDistribution(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        t.name as tone_name,
+        t.nums as tone_number,
+        COUNT(s.id) as syllable_count,
+        ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM syllables WHERE lang = ?), 2) as percentage
+      FROM syllables s
+      JOIN tones t ON s.tone = t.id
+      WHERE s.lang = ?
+      GROUP BY t.id, t.name, t.nums
+      ORDER BY t.nums
+    `);
+    return query.all(lang, lang);
+  }
+
+  getSyllableComplexityStats(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        syllable_count,
+        COUNT(*) as word_count,
+        ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM word_phonetics), 2) as percentage
+      FROM word_phonetics wp
+      JOIN expressions e ON wp.word_id = e.id
+      WHERE e.lang = ?
+      GROUP BY syllable_count
+      ORDER BY syllable_count
+    `);
+    return query.all(lang);
+  }
+}
+\ No newline at end of file
diff --git a/packages/db/src/schema.sql b/packages/db/src/schema.sql
new file mode 100644
index 0000000..c4a7c76
--- /dev/null
+++ b/packages/db/src/schema.sql
@@ -0,0 +1,729 @@
+/**
+ * UNIFIED DATABASE SCHEMA FOR THAI LANGUAGE LEARNING APPLICATION
+ * 
+ * This file consolidates 3 redundant database schemas:
+ * - schema.sql (main schema with courses and SRS)
+ * - prosodyschema.sql (phonetic analysis with syllable breakdown)
+ * - senseschema.sql (semantic analysis with subsenses and derivations)
+ * 
+ * REDUNDANCY ANALYSIS:
+ * ====================
+ * 
+ ** MAJOR CONFLICTS RESOLVED:
+ * 1. Languages table:
+ *    - schema.sql: uses 'code' as PRIMARY KEY
+ *    - prosodyschema.sql: uses 'iso6392' as PRIMARY KEY  
+ *    - RESOLVED: Unified with both code systems supported
+ * 
+ * 2. Senses table:
+ *    - schema.sql: includes ipa/prosody JSONB fields
+ *    - senseschema.sql: missing phonetic fields
+ *    - RESOLVED: Enhanced version with all fields
+ * 
+ * 3. Word/Expression entities:
+ *    - schema.sql: uses 'expressions' table
+ *    - prosodyschema.sql: uses 'words' table
+ *    - RESOLVED: Standardized on 'expressions' terminology
+ * 
+ * 4. Categories tables:
+ *    - EXACT DUPLICATES in schema.sql and senseschema.sql
+ *    - RESOLVED: Keep one instance, remove duplicate
+ * 
+ * UNIQUE FEATURES PRESERVED:
+ * ========================
+ * 
+ * FROM schema.sql (Main Course Learning):
+ * - User management: users, cookies
+ * - Course structure: lessons, cards, cards_lessons, cards_expressions
+ * - SRS tracking: user_progress, attempts
+ * - Bookmarks and user features
+ * 
+ * FROM prosodyschema.sql (Phonetic Analysis):
+ * - Detailed syllable breakdown: tones, onsets, medials, nucleus, codas, rhymes
+ * - Phonetic patterns: word_phonetics, syllables_words
+ * - Rhyme analysis: word_rhymes, words_wrhymes
+ * 
+ * FROM senseschema.sql (Semantic Analysis):
+ * - Detailed definitions: subsenses
+ * - Etymology tracking: derivation
+ * 
+ * FOREIGN KEY RELATIONSHIPS:
+ * =========================
+ * - Updated all references from 'words.id' to 'expressions.id'
+ * - Unified language references to use languages.code
+ * - Maintained proper cascade relationships
+ * 
+ * BENEFITS OF UNIFICATION:
+ * =======================
+ * 1. Single source of truth for database structure
+ * 2. Eliminated conflicting table definitions
+ * 3. Preserved all unique functionality
+ * 4. Improved foreign key consistency
+ * 5. Better support for comprehensive Thai language analysis
+ */
+
+-- Enable foreign key support and performance optimizations
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+PRAGMA cache_size = -2000;
+PRAGMA mmap_size = 30000000000;
+
+/**
+ * UNIFIED LANGUAGES TABLE
+ * 
+ * RESOLVES CONFLICT between:
+ * - schema.sql: code (PRIMARY KEY), name, native_name
+ * - prosodyschema.sql: iso6392 (PRIMARY KEY), english
+ * 
+ * NOW SUPPORTS multiple language code systems for maximum compatibility
+ */
+CREATE TABLE IF NOT EXISTS languages (
+    code TEXT PRIMARY KEY,         -- Primary language code (ISO 639-1 preferred)
+    name TEXT NOT NULL,           -- English name
+    native_name TEXT,             -- Native name
+    iso6392 TEXT UNIQUE,          -- ISO 639-2 code alternative
+    CONSTRAINT name_unique UNIQUE (name)
+);
+
+/**
+ * CORE CONTENT TABLES
+ * Standardized on 'expressions' terminology from schema.sql
+ * Enhanced with fields from both schemas
+ */
+
+-- Main expressions table (formerly 'words' in prosodyschema.sql)
+-- UNIFIED from schema.sql expressions and prosodyschema.sql words
+CREATE TABLE IF NOT EXISTS expressions (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    spelling TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    frequency INTEGER,
+    type TEXT NOT NULL,            --  word | idiom | w/e
+    notes TEXT,                    -- Additional notes (from prosodyschema.sql)
+    FOREIGN KEY (lang) REFERENCES languages(code),
+    CONSTRAINT spell_unique UNIQUE (spelling, lang)
+);
+
+-- Enhanced senses table with phonetic capabilities
+-- MERGED from schema.sql senses (with ipa/prosody) and senseschema.sql senses
+CREATE TABLE IF NOT EXISTS senses (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    parent_id INTEGER NOT NULL,
+    spelling TEXT NOT NULL,
+    pos TEXT,
+    etymology TEXT,
+    confidence INTEGER NOT NULL DEFAULT 0,
+    FOREIGN KEY (parent_id) REFERENCES expressions(id) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS categories (
+    name TEXT PRIMARY KEY
+);
+
+-- Word-category relationships
+CREATE TABLE IF NOT EXISTS word_categories (
+    word_id INTEGER NOT NULL,
+    category TEXT NOT NULL,
+    PRIMARY KEY (word_id, category),
+    FOREIGN KEY (word_id) REFERENCES senses(id) ON DELETE CASCADE,
+    FOREIGN KEY (category) REFERENCES categories(name) ON DELETE CASCADE
+);
+
+-- Semantic analysis tables (FROM senseschema.sql)
+CREATE TABLE IF NOT EXISTS examples(
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    sid INTEGER NOT NULL,
+    example  TEXT NOT NULL,           -- Example sentence
+    ref      TEXT,                    -- source of the quote, llm if generated etc.
+    FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+CREATE TABLE IF NOT EXISTS subsenses (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    sid INTEGER NOT NULL,
+    gloss TEXT NOT NULL,           -- Definition/explanation
+    FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+
+CREATE TABLE IF NOT EXISTS derivation (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    sid INTEGER NOT NULL,
+    type TEXT NOT NULL,            -- Type of derivation (prefix, suffix, compound, etc.)
+    text TEXT NOT NULL,            -- Derivative text
+    tags JSONB,                    -- Additional metadata
+    FOREIGN KEY (sid) REFERENCES senses(id) ON DELETE CASCADE
+);
+
+/**
+ * PHONETIC ANALYSIS TABLES (FROM prosodyschema.sql)
+ * 
+ * Comprehensive syllable breakdown system for Thai language analysis
+ * These tables provide detailed phonetic analysis beyond basic JSONB fields
+ */
+
+
+-- Syllable component tables
+CREATE TABLE IF NOT EXISTS tones (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    name TEXT NOT NULL,
+    nums INTEGER NOT NULL,         -- Tone number (1-5 for Thai)
+    CONSTRAINT tone_unique UNIQUE (ipa, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS onsets (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    CONSTRAINT onset_unique UNIQUE (ipa, text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS medials (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    CONSTRAINT medial_unique UNIQUE (ipa, text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS nucleus (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    CONSTRAINT nucleus_unique UNIQUE (ipa, text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS codas (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    CONSTRAINT coda_unique UNIQUE (ipa, text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS rhymes (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    ipa TEXT NOT NULL,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    CONSTRAINT rhyme_unique UNIQUE (ipa, text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+-- Complete syllable breakdown (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS syllables (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    lang TEXT NOT NULL,
+    ipa TEXT NOT NULL,
+    long INTEGER NOT NULL,        -- Vowel length (0=short, 1=long)
+    text TEXT NOT NULL,
+    onset INTEGER NOT NULL,
+    medial INTEGER NOT NULL,
+    nucleus INTEGER NOT NULL,
+    coda INTEGER NOT NULL,
+    rhyme INTEGER NOT NULL,
+    tone INTEGER NOT NULL,
+    notes TEXT,
+    FOREIGN KEY (lang) REFERENCES languages(code),
+    FOREIGN KEY (onset) REFERENCES onsets(id),
+    FOREIGN KEY (medial) REFERENCES medials(id),
+    FOREIGN KEY (nucleus) REFERENCES nucleus(id),
+    FOREIGN KEY (coda) REFERENCES codas(id),
+    FOREIGN KEY (rhyme) REFERENCES rhymes(id),
+    FOREIGN KEY (tone) REFERENCES tones(id),
+    CONSTRAINT syllable_unique UNIQUE (text, ipa, lang)
+);
+
+-- Phonetic pattern storage (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS word_phonetics (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    word_id INTEGER NOT NULL,
+    sense_id INTEGER,
+    ipa TEXT NOT NULL,
+    ipa_sequence TEXT NOT NULL,         -- IPA representation sequence
+    syllable_count INTEGER NOT NULL,
+    syllable_sequence TEXT NOT NULL,   -- Comma-separated syllables
+    stressed INTEGER,                  -- index of stressed syllable
+    tone_sequence TEXT,                -- Comma-separated tones
+    tag JSONB,                           -- Pattern/usage tag
+    notes TEXT,
+    FOREIGN KEY (word_id) REFERENCES expressions(id),
+    FOREIGN KEY (sense_id) REFERENCES senses(id)
+);
+
+-- Rhyme analysis tables (UNIQUE to prosodyschema.sql)
+CREATE TABLE IF NOT EXISTS word_rhymes (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    text TEXT NOT NULL,
+    lang TEXT NOT NULL,
+    notes TEXT,
+    CONSTRAINT wrhyme_unique UNIQUE (text, lang),
+    FOREIGN KEY (lang) REFERENCES languages(code)
+);
+
+CREATE TABLE IF NOT EXISTS words_wrhymes (
+    word_id INTEGER NOT NULL,
+    wrhyme_id INTEGER NOT NULL,
+    FOREIGN KEY (word_id) REFERENCES word_phonetics(id),
+    FOREIGN KEY (wrhyme_id) REFERENCES word_rhymes(id),
+    PRIMARY KEY (word_id, wrhyme_id)
+);
+
+-- Junction tables for relationships
+
+-- Expressions to syllables mapping
+CREATE TABLE IF NOT EXISTS syllables_words (
+    syl_id INTEGER NOT NULL,
+    word_id INTEGER NOT NULL,
+    idx INTEGER NOT NULL,             -- Position in word
+    stressed INTEGER,                 -- Stress accent (0=none, 1=stressed)
+    FOREIGN KEY (syl_id) REFERENCES syllables(id),
+    FOREIGN KEY (word_id) REFERENCES word_phonetics(id),
+    PRIMARY KEY (syl_id, word_id, idx)
+);
+
+/**
+ * COURSE AND LEARNING TABLES (FROM schema.sql)
+ * 
+ * Complete course structure and lesson management system
+ */
+
+-- User management
+CREATE TABLE IF NOT EXISTS users (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name TEXT NOT NULL,
+    creds TEXT NOT NULL,
+    CONSTRAINT name_unique UNIQUE (name)
+);
+
+CREATE TABLE sessions (
+  id TEXT PRIMARY KEY,              -- Session ID/token (usually UUID or random hash)
+  user_id INTEGER NOT NULL,
+  created_at INTEGER NOT NULL,      -- Timestamp when created
+  expires_at INTEGER NOT NULL,      -- Timestamp when expires
+  last_activity INTEGER,             -- Last activity timestamp
+  ip_address TEXT,                  -- Optional: track IP
+  user_agent TEXT,                  -- Optional: track browser/client
+  data JSONB,                       -- Optional: session-specific data
+  FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
+);
+
+CREATE INDEX idx_sessions_user ON sessions(user_id);
+CREATE INDEX idx_sessions_expires ON sessions(expires_at);
+
+  -- The separation is cleaner:
+  -- - sessions table - Server-side session management
+  -- - cookies are just the client-side token that references the session ID
+
+  -- Some apps also add:
+  -- - refresh_tokens table for JWT refresh tokens
+  -- - remember_tokens table for "remember me" functionality
+  -- - active_sessions view for currently valid sessions
+
+  -- The cookie itself just stores the session ID, while all the actual session data lives in the database. This is more secure and gives you better control over session
+  --  invalidation.
+-- Course structure
+CREATE TABLE IF NOT EXISTS lessons (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name TEXT NOT NULL,
+    position INTEGER NOT NULL DEFAULT 0,
+    description TEXT
+);
+
+CREATE TABLE IF NOT EXISTS lang_lessons (
+    lesson_id INTEGER NOT NULL,
+    lang TEXT NOT NULL,
+    PRIMARY KEY (lang, lesson_id),
+    FOREIGN KEY (lang) REFERENCES languages(code),
+    FOREIGN KEY (lesson_id) REFERENCES lessons(id)
+);
+
+CREATE TABLE IF NOT EXISTS cards (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    text TEXT NOT NULL,
+    note TEXT
+);
+
+CREATE TABLE IF NOT EXISTS cards_expressions (
+    expression_id INTEGER NOT NULL,
+    card_id INTEGER NOT NULL,
+    PRIMARY KEY (card_id, expression_id),
+    FOREIGN KEY (card_id) REFERENCES cards(id),
+    FOREIGN KEY (expression_id) REFERENCES expressions(id)
+);
+
+CREATE TABLE IF NOT EXISTS cards_lessons (
+    lesson_id INTEGER,
+    card_id INTEGER NOT NULL,
+    PRIMARY KEY (card_id, lesson_id),
+    FOREIGN KEY (card_id) REFERENCES cards(id),
+    FOREIGN KEY (lesson_id) REFERENCES lessons(id)
+);
+
+
+/**
+ * USER PROGRESS AND SRS TRACKING (FROM schema.sql)
+ * 
+ * Complete spaced repetition system with attempt tracking
+ */
+
+CREATE TABLE IF NOT EXISTS user_progress (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    card_id INTEGER NOT NULL,
+    repetition_count INTEGER DEFAULT 0,
+    ease_factor REAL DEFAULT 2.5,
+    interval INTEGER DEFAULT 1,
+    next_review_date INTEGER,
+    last_reviewed INTEGER,
+    is_mastered BOOLEAN DEFAULT FALSE,
+    FOREIGN KEY (user_id) REFERENCES users(id),
+    FOREIGN KEY (card_id) REFERENCES cards(id),
+    CONSTRAINT progress_unique UNIQUE (user_id, card_id)
+);
+
+CREATE TABLE IF NOT EXISTS attempts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    timestamp INTEGER NOT NULL,
+    card_id INTEGER NOT NULL,
+    good INTEGER NOT NULL,           -- 0 or 1 for success/failure
+    FOREIGN KEY (user_id) REFERENCES users(id),
+    FOREIGN KEY (card_id) REFERENCES cards(id)
+);
+
+/**
+ * USER FEATURES (FROM schema.sql)
+ */
+
+CREATE TABLE IF NOT EXISTS bookmarks (
+    word_id INTEGER NOT NULL,
+    user_id INTEGER NOT NULL,
+    notes TEXT,
+    created INTEGER NOT NULL,
+    PRIMARY KEY (word_id, user_id),
+    FOREIGN KEY (word_id) REFERENCES expressions(id),
+    FOREIGN KEY (user_id) REFERENCES users(id)
+);
+
+/**
+ * INDEXES FOR PERFORMANCE OPTIMIZATION
+ * 
+ * Comprehensive indexes based on analysis of query patterns
+ * from all three schemas
+ */
+
+-- Language and expression indexes
+CREATE INDEX IF NOT EXISTS idx_expressions_spelling ON expressions(spelling);
+CREATE INDEX IF NOT EXISTS idx_expressions_type ON expressions(type);
+CREATE INDEX IF NOT EXISTS idx_expressions_lang ON expressions(lang);
+CREATE INDEX IF NOT EXISTS idx_expressions_lang_freq ON expressions(lang, frequency DESC);
+
+-- Sense and semantic indexes
+CREATE INDEX IF NOT EXISTS idx_senses_parent ON senses(parent_id);
+CREATE INDEX IF NOT EXISTS idx_senses_pos ON senses(pos);
+CREATE INDEX IF NOT EXISTS idx_subsenses_sid ON subsenses(sid);
+CREATE INDEX IF NOT EXISTS idx_derivation_sid ON derivation(sid);
+
+-- Phonetic analysis indexes (FROM prosodyschema.sql)
+CREATE INDEX IF NOT EXISTS idx_tones_name_lang ON tones(name, lang);
+CREATE INDEX IF NOT EXISTS idx_tones_nums_lang ON tones(nums, lang);
+CREATE INDEX IF NOT EXISTS idx_syllables_text_lang ON syllables(text, lang);
+CREATE INDEX IF NOT EXISTS idx_syllables_tone ON syllables(tone);
+
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_word_id ON word_phonetics(word_id);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_syllables ON word_phonetics(syllable_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_tones ON word_phonetics(tone_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_phonetics_count ON word_phonetics(syllable_count);
+
+-- Junction table indexes
+CREATE INDEX IF NOT EXISTS idx_syllables_words_word_idx ON syllables_words(word_id, idx);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_syl ON syllables_words(syl_id);
+CREATE INDEX IF NOT EXISTS idx_cards_expressions ON cards_expressions(expression_id, card_id);
+
+-- User progress and SRS indexes
+CREATE INDEX IF NOT EXISTS idx_user_progress_user ON user_progress(user_id);
+CREATE INDEX IF NOT EXISTS idx_user_progress_card ON user_progress(card_id);
+CREATE INDEX IF NOT EXISTS idx_user_progress_next_review ON user_progress(next_review_date);
+CREATE INDEX IF NOT EXISTS idx_attempts_user ON attempts(user_id);
+CREATE INDEX IF NOT EXISTS idx_attempts_card ON attempts(card_id);
+CREATE INDEX IF NOT EXISTS idx_attempts_user_resource ON attempts(user_id, card_id);
+
+-- User feature indexes
+CREATE INDEX IF NOT EXISTS idx_bookmarks ON bookmarks(word_id);
+CREATE INDEX IF NOT EXISTS idx_word_categories_category ON word_categories(category);
+
+-- Composite indexes for common query patterns
+CREATE INDEX IF NOT EXISTS idx_syllables_compound ON syllables(lang, text, tone);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_compound ON syllables_words(word_id, idx, syl_id);
+CREATE INDEX IF NOT EXISTS idx_word_patterns_mixed ON word_phonetics(syllable_count, syllable_sequence, tone_sequence);
+
+/**
+ * TONE-SPECIFIC SRS SYSTEM
+ * 
+ * Advanced tone pattern learning with detailed progress tracking
+ * and analytics for effective Thai tone acquisition
+ */
+
+-- Tone pattern progress tracking
+CREATE TABLE IF NOT EXISTS tone_pattern_progress (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    word_id INTEGER NOT NULL,
+    tone_sequence TEXT NOT NULL,
+    syllable_sequence TEXT NOT NULL,
+    repetition_count INTEGER DEFAULT 0,
+    ease_factor REAL DEFAULT 2.5,
+    interval INTEGER DEFAULT 1,
+    next_review_date INTEGER NOT NULL,
+    last_reviewed INTEGER NOT NULL,
+    is_mastered BOOLEAN DEFAULT FALSE,
+    difficulty REAL DEFAULT 2.5,
+    tone_accuracy REAL DEFAULT 0.0,
+    created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+    FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE,
+    UNIQUE(user_id, word_id)
+);
+
+-- Tone pattern attempt tracking with detailed metrics
+CREATE TABLE IF NOT EXISTS tone_attempts (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    word_id INTEGER NOT NULL,
+    timestamp INTEGER NOT NULL,
+    accuracy REAL NOT NULL CHECK (accuracy >= 0 AND accuracy <= 1),
+    tone_accuracy REAL NOT NULL CHECK (tone_accuracy >= 0 AND tone_accuracy <= 1),
+    pronunciation_score REAL DEFAULT 0.0 CHECK (pronunciation_score >= 0 AND pronunciation_score <= 1),
+    review_time INTEGER NOT NULL,
+    difficulty REAL NOT NULL,
+    mode TEXT DEFAULT 'exploration' CHECK (mode IN ('exploration', 'practice', 'review')),
+    created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+    FOREIGN KEY (word_id) REFERENCES expressions(id) ON DELETE CASCADE
+);
+
+-- User tone learning preferences and settings
+CREATE TABLE IF NOT EXISTS tone_learning_settings (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    preferred_session_length INTEGER DEFAULT 20 CHECK (preferred_session_length > 0),
+    target_daily_reviews INTEGER DEFAULT 50 CHECK (target_daily_reviews > 0),
+    difficulty_preference REAL DEFAULT 2.5 CHECK (difficulty_preference >= 1.0 AND difficulty_preference <= 4.0),
+    audio_enabled BOOLEAN DEFAULT TRUE,
+    tone_visualization_enabled BOOLEAN DEFAULT TRUE,
+    auto_advance BOOLEAN DEFAULT FALSE,
+    created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+    UNIQUE(user_id)
+);
+
+-- Tone pattern mastery statistics and analytics
+CREATE TABLE IF NOT EXISTS tone_mastery_stats (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    user_id INTEGER NOT NULL,
+    tone_pattern TEXT NOT NULL,
+    syllable_count INTEGER NOT NULL CHECK (syllable_count > 0),
+    total_attempts INTEGER DEFAULT 0,
+    successful_attempts INTEGER DEFAULT 0,
+    avg_accuracy REAL DEFAULT 0.0 CHECK (avg_accuracy >= 0 AND avg_accuracy <= 1),
+    avg_review_time REAL DEFAULT 0.0,
+    best_accuracy REAL DEFAULT 0.0 CHECK (best_accuracy >= 0 AND best_accuracy <= 1),
+    last_attempted INTEGER,
+    mastered_at INTEGER,
+    created_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    updated_at INTEGER DEFAULT (strftime('%s', 'now') * 1000),
+    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE,
+    UNIQUE(user_id, tone_pattern, syllable_count)
+);
+
+/**
+ * TONE-SPECIFIC INDEXES
+ * Optimized for tone pattern queries and SRS operations
+ */
+
+CREATE INDEX IF NOT EXISTS idx_tone_progress_user_word ON tone_pattern_progress(user_id, word_id);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_next_review ON tone_pattern_progress(next_review_date);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_user ON tone_pattern_progress(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_difficulty ON tone_pattern_progress(difficulty);
+CREATE INDEX IF NOT EXISTS idx_tone_progress_mastered ON tone_pattern_progress(is_mastered);
+
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_user_word ON tone_attempts(user_id, word_id);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_timestamp ON tone_attempts(timestamp);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_user ON tone_attempts(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_mode ON tone_attempts(mode);
+CREATE INDEX IF NOT EXISTS idx_tone_attempts_accuracy ON tone_attempts(accuracy, tone_accuracy);
+
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_pattern ON tone_mastery_stats(tone_pattern, syllable_count);
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_user ON tone_mastery_stats(user_id);
+CREATE INDEX IF NOT EXISTS idx_tone_mastery_accuracy ON tone_mastery_stats(avg_accuracy);
+
+/**
+ * TRIGGERS FOR TONE LEARNING SYSTEM
+ * Automatic timestamp updates and statistics calculation
+ */
+
+-- Update timestamps automatically
+CREATE TRIGGER IF NOT EXISTS update_tone_progress_updated_at 
+AFTER UPDATE ON tone_pattern_progress
+FOR EACH ROW
+BEGIN
+    UPDATE tone_pattern_progress SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS update_tone_learning_settings_updated_at 
+AFTER UPDATE ON tone_learning_settings
+FOR EACH ROW
+BEGIN
+    UPDATE tone_learning_settings SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_updated_at 
+AFTER UPDATE ON tone_mastery_stats
+FOR EACH ROW
+BEGIN
+    UPDATE tone_mastery_stats SET updated_at = (strftime('%s', 'now') * 1000) WHERE id = NEW.id;
+END;
+
+-- Update mastery stats when a pattern is mastered
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_on_progress 
+AFTER UPDATE ON tone_pattern_progress
+FOR EACH ROW
+WHEN NEW.is_mastered = 1 AND OLD.is_mastered = 0
+BEGIN
+    INSERT OR REPLACE INTO tone_mastery_stats 
+    (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, mastered_at, updated_at)
+    VALUES (
+        NEW.user_id,
+        NEW.tone_sequence,
+        (SELECT COUNT(*) FROM (SELECT value FROM json_each(NEW.syllable_sequence))),
+        NEW.repetition_count,
+        CAST(NEW.repetition_count * NEW.tone_accuracy AS INTEGER),
+        NEW.tone_accuracy,
+        (strftime('%s', 'now') * 1000),
+        (strftime('%s', 'now') * 1000)
+    );
+END;
+
+-- Update tone mastery stats after each attempt
+CREATE TRIGGER IF NOT EXISTS update_tone_mastery_after_attempt 
+AFTER INSERT ON tone_attempts
+FOR EACH ROW
+BEGIN
+    -- Get the current tone progress
+    INSERT OR REPLACE INTO tone_mastery_stats 
+    (user_id, tone_pattern, syllable_count, total_attempts, successful_attempts, avg_accuracy, avg_review_time, best_accuracy, last_attempted, updated_at)
+    SELECT 
+        ta.user_id,
+        tpp.tone_sequence,
+        (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence))) as syllable_count,
+        COALESCE(tms.total_attempts + 1, 1) as total_attempts,
+        COALESCE(tms.successful_attempts + CAST(ta.tone_accuracy >= 0.8 AS INTEGER), CAST(ta.tone_accuracy >= 0.8 AS INTEGER)) as successful_attempts,
+        CASE 
+            WHEN tms.total_attempts IS NULL THEN ta.tone_accuracy
+            ELSE (tms.avg_accuracy * tms.total_attempts + ta.tone_accuracy) / (tms.total_attempts + 1)
+        END as avg_accuracy,
+        CASE 
+            WHEN tms.total_attempts IS NULL THEN ta.review_time
+            ELSE (tms.avg_review_time * tms.total_attempts + ta.review_time) / (tms.total_attempts + 1)
+        END as avg_review_time,
+        CASE 
+            WHEN tms.best_accuracy IS NULL THEN ta.tone_accuracy
+            WHEN ta.tone_accuracy > tms.best_accuracy THEN ta.tone_accuracy
+            ELSE tms.best_accuracy
+        END as best_accuracy,
+        (strftime('%s', 'now') * 1000) as last_attempted,
+        (strftime('%s', 'now') * 1000) as updated_at
+    FROM tone_attempts ta
+    JOIN tone_pattern_progress tpp ON tpp.word_id = ta.word_id AND tpp.user_id = ta.user_id
+    LEFT JOIN tone_mastery_stats tms ON tms.user_id = ta.user_id 
+        AND tms.tone_pattern = tpp.tone_sequence 
+        AND tms.syllable_count = (SELECT COUNT(*) FROM (SELECT value FROM json_each(tpp.syllable_sequence)))
+    WHERE ta.rowid = NEW.rowid;
+END;
+
+/**
+ * VIEWS FOR TONE LEARNING ANALYTICS
+ * Common queries for dashboard and reporting
+ */
+
+-- User dashboard view combining traditional SRS and tone learning
+CREATE VIEW IF NOT EXISTS user_dashboard AS
+SELECT 
+    u.id as user_id,
+    u.name as username,
+    COUNT(DISTINCT tpp.word_id) as total_tone_words,
+    COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words,
+    COUNT(DISTINCT CASE WHEN tpp.next_review_date <= (strftime('%s', 'now') * 1000) THEN tpp.word_id END) as due_tone_words,
+    COUNT(DISTINCT up.card_id) as total_cards,
+    COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards,
+    COUNT(DISTINCT CASE WHEN up.next_review_date <= (strftime('%s', 'now') * 1000) THEN up.card_id END) as due_cards,
+    (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > (strftime('%s', 'now') * 1000 - 86400000)) as reviews_today,
+    (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > (strftime('%s', 'now') * 1000 - 604800000)) as weekly_tone_accuracy
+FROM users u
+LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id
+LEFT JOIN user_progress up ON up.user_id = u.id
+GROUP BY u.id, u.name;
+
+-- Tone learning statistics view
+CREATE VIEW IF NOT EXISTS tone_learning_stats_view AS
+SELECT 
+    tms.user_id,
+    tms.tone_pattern,
+    tms.syllable_count,
+    tms.total_attempts,
+    tms.successful_attempts,
+    tms.avg_accuracy,
+    tms.avg_review_time,
+    tms.best_accuracy,
+    tms.last_attempted,
+    tms.mastered_at,
+    COUNT(tpp.word_id) as word_count_with_pattern,
+    AVG(tpp.difficulty) as avg_difficulty,
+    AVG(tpp.tone_accuracy) as avg_current_accuracy
+FROM tone_mastery_stats tms
+LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = tms.user_id 
+    AND tpp.tone_sequence = tms.tone_pattern
+GROUP BY tms.user_id, tms.tone_pattern, tms.syllable_count, tms.total_attempts, 
+         tms.successful_attempts, tms.avg_accuracy, tms.avg_review_time, 
+         tms.best_accuracy, tms.last_attempted, tms.mastered_at;
+
+/**
+ * MIGRATION NOTES:
+ * ===============
+ * 
+ * TONE SRS SYSTEM ADDITIONS:
+ * - Added comprehensive tone learning tables to the unified schema
+ * - Maintains consistency with existing phonetic analysis structure
+ * - Supports both traditional SRS and tone-specific learning
+ * - Provides detailed analytics for tone pattern mastery
+ * 
+ * BACKWARD COMPATIBILITY:
+ * - All existing tables and relationships preserved
+ * - New tables are additive and don't break existing functionality
+ * - Views provide unified dashboard with both learning systems
+ * 
+ * PERFORMANCE:
+ * - Added specialized indexes for tone pattern queries
+ * - Optimized for Thai tone learning analytics
+ * - Supports real-time progress tracking
+ * 
+ * This enhanced unified schema now provides comprehensive support for:
+ * - Thai language learning with detailed phonetic analysis
+ * - Course management and lesson structure  
+ * - Traditional spaced repetition system
+ * - Advanced tone pattern learning with SRS
+ * - User management and personalization
+ * - Comprehensive learning analytics
+ * 
+ * TOTAL: Unified 4 schemas while preserving all unique functionality
+ */
diff --git a/packages/db/src/semantic.ts b/packages/db/src/semantic.ts
new file mode 100644
index 0000000..a3fe44b
--- /dev/null
+++ b/packages/db/src/semantic.ts
@@ -0,0 +1,554 @@
+import { Database } from 'bun:sqlite';
+
+export interface Expression {
+  id: number;
+  spelling: string;
+  lang: string;
+  frequency?: number;
+  type: string;
+  syllables?: number;
+  notes?: string;
+  ipa?: any; // JSONB
+  prosody?: any; // JSONB
+  confidence: number;
+}
+
+export interface Sense {
+  id: number;
+  parent_id: number;
+  spelling: string;
+  pos?: string;
+  etymology?: string;
+  senses?: any; // JSONB
+  forms?: any; // JSONB
+  related?: any; // JSONB
+  ipa?: any; // JSONB
+  prosody?: any; // JSONB
+  confidence: number;
+}
+
+export interface Subsense {
+  id: number;
+  sid: number;
+  gloss: string;
+  examples?: any; // JSONB
+}
+
+export interface Derivation {
+  id: number;
+  sid: number;
+  type: string;
+  text: string;
+  tags?: any; // JSONB
+}
+
+export interface Category {
+  name: string;
+}
+
+export interface WordCategory {
+  word_id: number;
+  category: string;
+}
+
+export interface Bookmark {
+  word_id: number;
+  user_id: number;
+  notes?: string;
+  created: number;
+}
+
+export interface Language {
+  code: string;
+  name: string;
+  native_name?: string;
+  iso6392?: string;
+  english?: string;
+}
+
+export class SemanticQueries {
+  constructor(private db: Database) {}
+
+  // Language operations
+  getLanguages(): Language[] {
+    const query = this.db.query(`
+      SELECT code, name, native_name, iso6392, english
+      FROM languages
+      ORDER BY name
+    `);
+    return query.all() as Language[];
+  }
+
+  getLanguageByCode(code: string): Language | null {
+    const query = this.db.query(`
+      SELECT code, name, native_name, iso6392, english
+      FROM languages
+      WHERE code = ?
+    `);
+    return query.get(code) as Language | null;
+  }
+
+  // Expression operations
+  getExpressionById(id: number): Expression | null {
+    const query = this.db.query(`
+      SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+      FROM expressions
+      WHERE id = ?
+    `);
+    const result = query.get(id) as Expression | null;
+    if (result) {
+      // Parse JSON fields
+      result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+      result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+    }
+    return result;
+  }
+
+  getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] {
+    const query = this.db.query(`
+      SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+      FROM expressions
+      WHERE lang = ?
+      ORDER BY frequency DESC, spelling
+      LIMIT ? OFFSET ?
+    `);
+    const results = query.all(lang, limit, offset) as Expression[];
+    return results.map(result => ({
+      ...result,
+      ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+      prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+    }));
+  }
+
+  searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+        FROM expressions
+        WHERE lang = ? AND spelling LIKE ?
+        ORDER BY 
+          CASE 
+            WHEN spelling = ? THEN 1
+            WHEN spelling LIKE ? THEN 2
+            ELSE 3
+          END,
+          frequency DESC
+        LIMIT ?
+      `);
+      const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+        FROM expressions
+        WHERE spelling LIKE ?
+        ORDER BY 
+          CASE 
+            WHEN spelling = ? THEN 1
+            WHEN spelling LIKE ? THEN 2
+            ELSE 3
+          END,
+          frequency DESC
+        LIMIT ?
+      `);
+      const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+        FROM expressions
+        WHERE type = ? AND lang = ?
+        ORDER BY frequency DESC, spelling
+        LIMIT ?
+      `);
+      const results = query.all(type, lang, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+        FROM expressions
+        WHERE type = ?
+        ORDER BY frequency DESC, spelling
+        LIMIT ?
+      `);
+      const results = query.all(type, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] {
+    const query = this.db.query(`
+      SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+      FROM expressions
+      WHERE lang = ? AND frequency BETWEEN ? AND ?
+      ORDER BY frequency DESC, spelling
+      LIMIT ?
+    `);
+    const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[];
+    return results.map(result => ({
+      ...result,
+      ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+      prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+    }));
+  }
+
+  // Sense operations
+  getSensesForExpression(expressionId: number): Sense[] {
+    const query = this.db.query(`
+      SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+      FROM senses
+      WHERE parent_id = ?
+      ORDER BY confidence DESC
+    `);
+    const results = query.all(expressionId) as Sense[];
+    return results.map(result => ({
+      ...result,
+      senses: result.senses ? JSON.parse(result.senses) : undefined,
+      forms: result.forms ? JSON.parse(result.forms) : undefined,
+      related: result.related ? JSON.parse(result.related) : undefined,
+      ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+      prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+    }));
+  }
+
+  getSenseById(id: number): Sense | null {
+    const query = this.db.query(`
+      SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+      FROM senses
+      WHERE id = ?
+    `);
+    const result = query.get(id) as Sense | null;
+    if (result) {
+      result.senses = result.senses ? JSON.parse(result.senses) : undefined;
+      result.forms = result.forms ? JSON.parse(result.forms) : undefined;
+      result.related = result.related ? JSON.parse(result.related) : undefined;
+      result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+      result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+    }
+    return result;
+  }
+
+  searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+               e.lang
+        FROM senses s
+        JOIN expressions e ON s.parent_id = e.id
+        WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?)
+        ORDER BY s.confidence DESC
+        LIMIT ?
+      `);
+      const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+      return results.map(result => ({
+        ...result,
+        senses: result.senses ? JSON.parse(result.senses) : undefined,
+        forms: result.forms ? JSON.parse(result.forms) : undefined,
+        related: result.related ? JSON.parse(result.related) : undefined,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+               e.lang
+        FROM senses s
+        JOIN expressions e ON s.parent_id = e.id
+        WHERE s.spelling LIKE ? OR s.pos LIKE ?
+        ORDER BY s.confidence DESC
+        LIMIT ?
+      `);
+      const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+      return results.map(result => ({
+        ...result,
+        senses: result.senses ? JSON.parse(result.senses) : undefined,
+        forms: result.forms ? JSON.parse(result.forms) : undefined,
+        related: result.related ? JSON.parse(result.related) : undefined,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  // Subsense operations
+  getSubsensesForSense(senseId: number): Subsense[] {
+    const query = this.db.query(`
+      SELECT id, sid, gloss, examples
+      FROM subsenses
+      WHERE sid = ?
+      ORDER BY id
+    `);
+    const results = query.all(senseId) as Subsense[];
+    return results.map(result => ({
+      ...result,
+      examples: result.examples ? JSON.parse(result.examples) : undefined,
+    }));
+  }
+
+  // Derivation operations
+  getDerivationsForSense(senseId: number): Derivation[] {
+    const query = this.db.query(`
+      SELECT id, sid, type, text, tags
+      FROM derivation
+      WHERE sid = ?
+      ORDER BY type, text
+    `);
+    const results = query.all(senseId) as Derivation[];
+    return results.map(result => ({
+      ...result,
+      tags: result.tags ? JSON.parse(result.tags) : undefined,
+    }));
+  }
+
+  // Category operations
+  getCategories(): Category[] {
+    const query = this.db.query(`
+      SELECT name
+      FROM categories
+      ORDER BY name
+    `);
+    return query.all() as Category[];
+  }
+
+  getCategoriesForWord(wordId: number): Category[] {
+    const query = this.db.query(`
+      SELECT c.name
+      FROM categories c
+      JOIN word_categories wc ON c.name = wc.category
+      WHERE wc.word_id = ?
+      ORDER BY c.name
+    `);
+    return query.all(wordId) as Category[];
+  }
+
+  getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+        FROM expressions e
+        JOIN word_categories wc ON e.id = wc.word_id
+        WHERE wc.category = ? AND e.lang = ?
+        ORDER BY e.frequency DESC, e.spelling
+        LIMIT ?
+      `);
+      const results = query.all(category, lang, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+        FROM expressions e
+        JOIN word_categories wc ON e.id = wc.word_id
+        WHERE wc.category = ?
+        ORDER BY e.frequency DESC, e.spelling
+        LIMIT ?
+      `);
+      const results = query.all(category, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  // Bookmark operations
+  getBookmarksForUser(userId: number): Bookmark[] {
+    const query = this.db.query(`
+      SELECT word_id, user_id, notes, created
+      FROM bookmarks
+      WHERE user_id = ?
+      ORDER BY created DESC
+    `);
+    return query.all(userId) as Bookmark[];
+  }
+
+  createBookmark(userId: number, wordId: number, notes?: string): Bookmark {
+    const query = this.db.query(`
+      INSERT INTO bookmarks (word_id, user_id, notes, created)
+      VALUES (?, ?, ?, ?)
+      RETURNING word_id, user_id, notes, created
+    `);
+    return query.get(wordId, userId, notes, Date.now()) as Bookmark;
+  }
+
+  updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null {
+    const query = this.db.query(`
+      UPDATE bookmarks
+      SET notes = ?
+      WHERE user_id = ? AND word_id = ?
+      RETURNING word_id, user_id, notes, created
+    `);
+    return query.get(notes, userId, wordId) as Bookmark | null;
+  }
+
+  deleteBookmark(userId: number, wordId: number): boolean {
+    const query = this.db.query(`
+      DELETE FROM bookmarks
+      WHERE user_id = ? AND word_id = ?
+    `);
+    const result = query.run(userId, wordId);
+    return result.changes > 0;
+  }
+
+  isBookmarked(userId: number, wordId: number): boolean {
+    const query = this.db.query(`
+      SELECT 1
+      FROM bookmarks
+      WHERE user_id = ? AND word_id = ?
+      LIMIT 1
+    `);
+    return query.get(userId, wordId) !== undefined;
+  }
+
+  // Advanced search operations
+  searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+        FROM expressions e
+        JOIN senses s ON e.id = s.parent_id
+        WHERE e.lang = ? AND s.pos = ?
+        ORDER BY e.frequency DESC, e.spelling
+        LIMIT ?
+      `);
+      const results = query.all(lang, pos, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+        FROM expressions e
+        JOIN senses s ON e.id = s.parent_id
+        WHERE s.pos = ?
+        ORDER BY e.frequency DESC, e.spelling
+        LIMIT ?
+      `);
+      const results = query.all(pos, limit) as Expression[];
+      return results.map(result => ({
+        ...result,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] {
+    if (lang) {
+      const query = this.db.query(`
+        SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+               e.lang
+        FROM senses s
+        JOIN expressions e ON s.parent_id = e.id
+        WHERE e.lang = ? AND s.etymology LIKE ?
+        ORDER BY s.confidence DESC
+        LIMIT ?
+      `);
+      const results = query.all(lang, `%${term}%`, limit) as Sense[];
+      return results.map(result => ({
+        ...result,
+        senses: result.senses ? JSON.parse(result.senses) : undefined,
+        forms: result.forms ? JSON.parse(result.forms) : undefined,
+        related: result.related ? JSON.parse(result.related) : undefined,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    } else {
+      const query = this.db.query(`
+        SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+               e.lang
+        FROM senses s
+        JOIN expressions e ON s.parent_id = e.id
+        WHERE s.etymology LIKE ?
+        ORDER BY s.confidence DESC
+        LIMIT ?
+      `);
+      const results = query.all(`%${term}%`, limit) as Sense[];
+      return results.map(result => ({
+        ...result,
+        senses: result.senses ? JSON.parse(result.senses) : undefined,
+        forms: result.forms ? JSON.parse(result.forms) : undefined,
+        related: result.related ? JSON.parse(result.related) : undefined,
+        ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+        prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+      }));
+    }
+  }
+
+  // Statistics operations
+  getSemanticStats(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        COUNT(DISTINCT e.id) as total_expressions,
+        COUNT(DISTINCT s.id) as total_senses,
+        COUNT(DISTINCT s.pos) as unique_pos,
+        COUNT(DISTINCT c.name) as unique_categories,
+        AVG(e.confidence) as avg_confidence,
+        COUNT(DISTINCT e.type) as unique_types
+      FROM expressions e
+      LEFT JOIN senses s ON e.id = s.parent_id
+      LEFT JOIN word_categories wc ON e.id = wc.word_id
+      LEFT JOIN categories c ON wc.category = c.name
+      WHERE e.lang = ?
+    `);
+    return query.get(lang);
+  }
+
+  getPOSTDistribution(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        s.pos,
+        COUNT(DISTINCT s.parent_id) as expression_count,
+        COUNT(s.id) as sense_count,
+        ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage
+      FROM senses s
+      JOIN expressions e ON s.parent_id = e.id
+      WHERE e.lang = ? AND s.pos IS NOT NULL
+      GROUP BY s.pos
+      ORDER BY sense_count DESC
+    `);
+    return query.all(lang);
+  }
+
+  getCategoryDistribution(lang: string) {
+    const query = this.db.query(`
+      SELECT 
+        c.name as category,
+        COUNT(DISTINCT wc.word_id) as word_count,
+        ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage
+      FROM categories c
+      JOIN word_categories wc ON c.name = wc.category
+      JOIN expressions e ON wc.word_id = e.id
+      WHERE e.lang = ?
+      GROUP BY c.name
+      ORDER BY word_count DESC
+    `);
+    return query.all(lang);
+  }
+}
+\ No newline at end of file
diff --git a/packages/db/src/srs.ts b/packages/db/src/srs.ts
new file mode 100644
index 0000000..0b80960
--- /dev/null
+++ b/packages/db/src/srs.ts
@@ -0,0 +1,402 @@
+import { Database } from 'bun:sqlite';
+
+// Traditional SRS Types
+export interface SRSProgress {
+  id?: number;
+  user_id: number;
+  card_id: number;
+  repetition_count: number;
+  ease_factor: number;
+  interval: number;
+  next_review_date: number | null;
+  last_reviewed: number | null;
+  is_mastered: boolean;
+  created_at?: number;
+  updated_at?: number;
+}
+
+export interface SRSAttempt {
+  id?: number;
+  user_id: number;
+  timestamp: number;
+  card_id: number;
+  good: number; // 0 or 1
+  created_at?: number;
+}
+
+// Tone SRS Types
+export interface ToneProgress {
+  id?: number;
+  user_id: number;
+  word_id: number;
+  tone_sequence: string;
+  syllable_sequence: string;
+  repetition_count: number;
+  ease_factor: number;
+  interval: number;
+  next_review_date: number;
+  last_reviewed: number;
+  is_mastered: boolean;
+  difficulty: number;
+  tone_accuracy: number;
+  created_at?: number;
+  updated_at?: number;
+}
+
+export interface ToneAttempt {
+  id?: number;
+  user_id: number;
+  word_id: number;
+  timestamp: number;
+  accuracy: number;
+  tone_accuracy: number;
+  pronunciation_score: number;
+  review_time: number;
+  difficulty: number;
+  mode: 'exploration' | 'practice' | 'review';
+  created_at?: number;
+}
+
+export interface ToneMasteryStats {
+  id?: number;
+  user_id: number;
+  tone_pattern: string;
+  syllable_count: number;
+  total_attempts: number;
+  successful_attempts: number;
+  avg_accuracy: number;
+  avg_review_time: number;
+  best_accuracy: number;
+  last_attempted: number | null;
+  mastered_at: number | null;
+  created_at?: number;
+  updated_at?: number;
+}
+
+export class SRSQueries {
+  constructor(private db: Database) {}
+
+  // Traditional SRS Methods
+
+  /**
+   * Get or create SRS progress for a user and card
+   */
+  getOrCreateSRSProgress(userId: number, cardId: number): SRSProgress {
+    const query = this.db.query(`
+      INSERT INTO user_progress (user_id, card_id)
+      VALUES (?, ?)
+      ON CONFLICT (user_id, card_id) DO UPDATE SET
+        user_id = excluded.user_id,
+        card_id = excluded.card_id
+      RETURNING id, user_id, card_id, repetition_count, ease_factor, interval,
+                next_review_date, last_reviewed, is_mastered, created_at, updated_at
+    `);
+    
+    return query.get(userId, cardId) as SRSProgress;
+  }
+
+  /**
+   * Update SRS progress after a review
+   */
+  updateSRSProgress(progress: Omit<SRSProgress, 'id' | 'created_at' | 'updated_at'>): SRSProgress {
+    const query = this.db.query(`
+      UPDATE user_progress
+      SET repetition_count = ?,
+          ease_factor = ?,
+          interval = ?,
+          next_review_date = ?,
+          last_reviewed = ?,
+          is_mastered = ?,
+          updated_at = (strftime('%s', 'now') * 1000)
+      WHERE user_id = ? AND card_id = ?
+      RETURNING id, user_id, card_id, repetition_count, ease_factor, interval,
+                next_review_date, last_reviewed, is_mastered, created_at, updated_at
+    `);
+    
+    return query.get(
+      progress.repetition_count,
+      progress.ease_factor,
+      progress.interval,
+      progress.next_review_date,
+      progress.last_reviewed,
+      progress.is_mastered,
+      progress.user_id,
+      progress.card_id
+    ) as SRSProgress;
+  }
+
+  /**
+   * Record a traditional SRS attempt
+   */
+  recordSRSAttempt(attempt: Omit<SRSAttempt, 'id' | 'created_at'>): SRSAttempt {
+    const query = this.db.query(`
+      INSERT INTO attempts (user_id, timestamp, card_id, good)
+      VALUES (?, ?, ?, ?)
+      RETURNING id, user_id, timestamp, card_id, good, created_at
+    `);
+    
+    return query.get(
+      attempt.user_id,
+      attempt.timestamp,
+      attempt.card_id,
+      attempt.good
+    ) as SRSAttempt;
+  }
+
+  /**
+   * Get due cards for a user
+   */
+  getDueCards(userId: number, limit: number = 20): number[] {
+    const query = this.db.query(`
+      SELECT card_id
+      FROM user_progress
+      WHERE user_id = ? 
+        AND (next_review_date IS NULL OR next_review_date <= ?)
+        AND is_mastered = 0
+      ORDER BY next_review_date ASC
+      LIMIT ?
+    `);
+    
+    const results = query.all(userId, Date.now(), limit) as { card_id: number }[];
+    return results.map(r => r.card_id);
+  }
+
+  /**
+   * Get SRS progress statistics for a user
+   */
+  getSRSStats(userId: number) {
+    const query = this.db.query(`
+      SELECT 
+        COUNT(*) as total_cards,
+        COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_cards,
+        COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_cards,
+        COUNT(CASE WHEN next_review_date > ? THEN 1 END) as future_cards,
+        AVG(ease_factor) as avg_ease_factor,
+        AVG(interval) as avg_interval
+      FROM user_progress
+      WHERE user_id = ?
+    `);
+    
+    return query.get(Date.now(), Date.now(), userId);
+  }
+
+  // Tone SRS Methods
+
+  /**
+   * Get or create tone progress for a user and word
+   */
+  getOrCreateToneProgress(userId: number, wordId: number, toneSequence: string, syllableSequence: string): ToneProgress {
+    const query = this.db.query(`
+      INSERT INTO tone_pattern_progress (user_id, word_id, tone_sequence, syllable_sequence, next_review_date, last_reviewed)
+      VALUES (?, ?, ?, ?, ?, ?)
+      ON CONFLICT (user_id, word_id) DO UPDATE SET
+        tone_sequence = excluded.tone_sequence,
+        syllable_sequence = excluded.syllable_sequence
+      RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count,
+                ease_factor, interval, next_review_date, last_reviewed, is_mastered,
+                difficulty, tone_accuracy, created_at, updated_at
+    `);
+    
+    return query.get(
+      userId,
+      wordId,
+      toneSequence,
+      syllableSequence,
+      Date.now(), // next_review_date
+      Date.now()  // last_reviewed
+    ) as ToneProgress;
+  }
+
+  /**
+   * Update tone progress after a practice session
+   */
+  updateToneProgress(progress: Omit<ToneProgress, 'id' | 'created_at' | 'updated_at'>): ToneProgress {
+    const query = this.db.query(`
+      UPDATE tone_pattern_progress
+      SET repetition_count = ?,
+          ease_factor = ?,
+          interval = ?,
+          next_review_date = ?,
+          last_reviewed = ?,
+          is_mastered = ?,
+          difficulty = ?,
+          tone_accuracy = ?,
+          updated_at = (strftime('%s', 'now') * 1000)
+      WHERE user_id = ? AND word_id = ?
+      RETURNING id, user_id, word_id, tone_sequence, syllable_sequence, repetition_count,
+                ease_factor, interval, next_review_date, last_reviewed, is_mastered,
+                difficulty, tone_accuracy, created_at, updated_at
+    `);
+    
+    return query.get(
+      progress.repetition_count,
+      progress.ease_factor,
+      progress.interval,
+      progress.next_review_date,
+      progress.last_reviewed,
+      progress.is_mastered,
+      progress.difficulty,
+      progress.tone_accuracy,
+      progress.user_id,
+      progress.word_id
+    ) as ToneProgress;
+  }
+
+  /**
+   * Record a tone attempt
+   */
+  recordToneAttempt(attempt: Omit<ToneAttempt, 'id' | 'created_at'>): ToneAttempt {
+    const query = this.db.query(`
+      INSERT INTO tone_attempts (user_id, word_id, timestamp, accuracy, tone_accuracy, 
+                                pronunciation_score, review_time, difficulty, mode)
+      VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+      RETURNING id, user_id, word_id, timestamp, accuracy, tone_accuracy,
+                pronunciation_score, review_time, difficulty, mode, created_at
+    `);
+    
+    return query.get(
+      attempt.user_id,
+      attempt.word_id,
+      attempt.timestamp,
+      attempt.accuracy,
+      attempt.tone_accuracy,
+      attempt.pronunciation_score,
+      attempt.review_time,
+      attempt.difficulty,
+      attempt.mode
+    ) as ToneAttempt;
+  }
+
+  /**
+   * Get due tone words for a user
+   */
+  getDueToneWords(userId: number, limit: number = 20): number[] {
+    const query = this.db.query(`
+      SELECT word_id
+      FROM tone_pattern_progress
+      WHERE user_id = ? 
+        AND next_review_date <= ?
+        AND is_mastered = 0
+      ORDER BY next_review_date ASC, difficulty ASC
+      LIMIT ?
+    `);
+    
+    const results = query.all(userId, Date.now(), limit) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  /**
+   * Get tone progress statistics for a user
+   */
+  getToneStats(userId: number) {
+    const query = this.db.query(`
+      SELECT 
+        COUNT(*) as total_tone_words,
+        COUNT(CASE WHEN is_mastered = 1 THEN 1 END) as mastered_tone_words,
+        COUNT(CASE WHEN next_review_date <= ? THEN 1 END) as due_tone_words,
+        AVG(difficulty) as avg_difficulty,
+        AVG(tone_accuracy) as avg_tone_accuracy,
+        AVG(ease_factor) as avg_ease_factor
+      FROM tone_pattern_progress
+      WHERE user_id = ?
+    `);
+    
+    return query.get(Date.now(), userId);
+  }
+
+  /**
+   * Get recent tone attempts for a user
+   */
+  getRecentToneAttempts(userId: number, limit: number = 10): ToneAttempt[] {
+    const query = this.db.query(`
+      SELECT id, user_id, word_id, timestamp, accuracy, tone_accuracy,
+             pronunciation_score, review_time, difficulty, mode, created_at
+      FROM tone_attempts
+      WHERE user_id = ?
+      ORDER BY timestamp DESC
+      LIMIT ?
+    `);
+    
+    return query.all(userId, limit) as ToneAttempt[];
+  }
+
+  /**
+   * Get tone mastery statistics for a user
+   */
+  getToneMasteryStats(userId: number): ToneMasteryStats[] {
+    const query = this.db.query(`
+      SELECT id, user_id, tone_pattern, syllable_count, total_attempts, successful_attempts,
+             avg_accuracy, avg_review_time, best_accuracy, last_attempted, mastered_at,
+             created_at, updated_at
+      FROM tone_mastery_stats
+      WHERE user_id = ?
+      ORDER BY avg_accuracy DESC, last_attempted DESC
+    `);
+    
+    return query.all(userId) as ToneMasteryStats[];
+  }
+
+  /**
+   * Get tone words for practice (prioritizes due words, then random new words)
+   */
+  getToneWordsForPractice(userId: number, limit: number = 20): number[] {
+    const query = this.db.query(`
+      WITH due_words AS (
+        SELECT word_id, 0 as priority
+        FROM tone_pattern_progress
+        WHERE user_id = ? AND next_review_date <= ? AND is_mastered = 0
+        LIMIT ?
+      ),
+      new_words AS (
+        SELECT e.id as word_id, 1 as priority
+        FROM expressions e
+        LEFT JOIN tone_pattern_progress tpp ON e.id = tpp.word_id AND tpp.user_id = ?
+        WHERE tpp.word_id IS NULL 
+          AND e.lang = 'th' 
+          AND e.type = 'word'
+        ORDER BY e.frequency DESC
+        LIMIT ?
+      )
+      SELECT word_id
+      FROM (
+        SELECT word_id, priority FROM due_words
+        UNION ALL
+        SELECT word_id, priority FROM new_words
+      )
+      ORDER BY priority, RANDOM()
+      LIMIT ?
+    `);
+    
+    const results = query.all(userId, Date.now(), limit, userId, limit, limit) as { word_id: number }[];
+    return results.map(r => r.word_id);
+  }
+
+  /**
+   * Get user dashboard data (combines traditional SRS and tone SRS)
+   */
+  getUserDashboard(userId: number) {
+    const query = this.db.query(`
+      SELECT 
+        u.id as user_id,
+        u.name as username,
+        COUNT(DISTINCT tpp.word_id) as total_tone_words,
+        COUNT(DISTINCT CASE WHEN tpp.is_mastered = 1 THEN tpp.word_id END) as mastered_tone_words,
+        COUNT(DISTINCT CASE WHEN tpp.next_review_date <= ? THEN tpp.word_id END) as due_tone_words,
+        COUNT(DISTINCT up.card_id) as total_cards,
+        COUNT(DISTINCT CASE WHEN up.is_mastered = 1 THEN up.card_id END) as mastered_cards,
+        COUNT(DISTINCT CASE WHEN up.next_review_date <= ? THEN up.card_id END) as due_cards,
+        (SELECT COUNT(*) FROM attempts WHERE user_id = u.id AND timestamp > ?) as reviews_today,
+        (SELECT AVG(ta.tone_accuracy) FROM tone_attempts ta WHERE ta.user_id = u.id AND ta.timestamp > ?) as weekly_tone_accuracy
+      FROM users u
+      LEFT JOIN tone_pattern_progress tpp ON tpp.user_id = u.id
+      LEFT JOIN user_progress up ON up.user_id = u.id
+      WHERE u.id = ?
+      GROUP BY u.id, u.name
+    `);
+    
+    const weekAgo = Date.now() - (7 * 24 * 60 * 60 * 1000);
+    const today = Date.now() - (24 * 60 * 60 * 1000);
+    
+    return query.get(Date.now(), Date.now(), today, weekAgo, userId);
+  }
+}
+\ No newline at end of file
diff --git a/packages/db/src/test.ts b/packages/db/src/test.ts
new file mode 100644
index 0000000..6767c13
--- /dev/null
+++ b/packages/db/src/test.ts
@@ -0,0 +1,149 @@
+import ndb from ".";
+import toneDb from "@/lib/db/prosodydb";
+
+// function test() {
+//   const tones = ["low", "mid"];
+//   const ndb = new BigQueries();
+//   const nq = ndb.fetchWordsByToneAndSyls(tones);
+//   const oq = toneDb.fetchWordsByToneAndSyls(tones);
+//   // const senses = JSON.parse(res[0].senses_array);
+//   console.log({ nq });
+//   console.log(nq.length, oq.length);
+// }
+function test() {
+  const tones = ["low", "mid"];
+  console.log("wtf");
+  // const qq = ndb.fetchWordsByToneAndSyls1(tones);
+  // const oq = ndb.fetchWordsByToneAndSylsO(tones);
+  const nq = ndb.fetchWordsByToneAndSyls(tones);
+  // const roq = toneDb.fetchWordsByToneAndSyls(tones);
+  // const senses = JSON.parse(res[0].senses_array);
+  // console.log({ nq });
+  // console.log("old", oq.length);
+  // console.log("old db", roq.length);
+  console.log("new", nq);
+  // console.log("test", qq.length);
+}
+test();
+
+// `
+// `
+//       WITH word_tone_sequences AS (
+//           SELECT
+//               w.id as word_id,
+//               w.spelling,
+//               wp.ipa,
+//               w.frequency,
+//               GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+//               GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+//               COUNT(sw.syl_id) as syllable_count,
+//               (SELECT
+//                 json_group_array(json_object(
+//                   'id', s.id,
+//                   'pos', s.pos,
+//                   'etymology', s.etymology,
+//                   'confidence', s.confidence,
+//                   'subsenses', (
+//                     SELECT json_group_array(json_object(
+//                       'id', ss.id,
+//                       'gloss', ss.gloss
+//                     ))
+//                     FROM subsenses ss
+//                     WHERE ss.sid = s.id
+//                   ),
+//                   'examples', (
+//                     SELECT json_group_array(json_object(
+//                       'id', ex.id,
+//                       'example', ex.example,
+//                       'ref', ex.ref
+//                     ))
+//                     FROM examples ex
+//                     WHERE ex.sid = s.id
+//                   ),
+//                   'derivation', (
+//                     SELECT json_group_array(json_object(
+//                       'id', d.id,
+//                       'type', d.type,
+//                       'text', d.text,
+//                       'tags', d.tags
+//                     ))
+//                     FROM derivation d
+//                     WHERE d.sid = s.id
+//                   ),
+//                   'categories', (
+//                     SELECT json_group_array(wc.category)
+//                     FROM word_categories wc
+//                     WHERE wc.word_id = s.id
+//                   )
+//                 ))
+//                 FROM senses s
+//                 WHERE s.parent_id = w.id
+//               ) as senses_array
+//           FROM expressions w
+//           JOIN word_phonetics wp ON w.id = wp.word_id
+//           JOIN syllables_words sw ON wp.id = sw.word_id
+//           JOIN syllables sy ON sw.syl_id = sy.id
+//           JOIN tones t ON sy.tone = t.id
+//           GROUP BY w.id, w.spelling, w.lang, w.frequency
+//       )
+//         SELECT *
+//         FROM word_tone_sequences
+//         WHERE tone_sequence LIKE ?
+//         AND syllable_count = ?
+//         ORDER BY frequency ASC NULLS LAST;
+//       `
+//
+//
+//
+// `
+//       WITH word_tone_sequences AS (
+//           SELECT
+//               w.id as word_id,
+//               w.spelling,
+//               wp.ipa,
+//               w.frequency,
+//               GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
+//               GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+//               COUNT(sw.syl_id) as syllable_count
+//           FROM expressions w
+//           JOIN word_phonetics wp ON w.id = wp.word_id
+//           JOIN syllables_words sw ON wp.id = sw.word_id
+//           JOIN syllables sy ON sw.syl_id = sy.id
+//           JOIN tones t ON sy.tone = t.id
+//           GROUP BY w.id, w.spelling, w.lang, w.frequency, wp.ipa
+//       ),
+//       sense_data AS (
+//         SELECT
+//           s.*,
+//           GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+//           GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+//           GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+//           GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+//         FROM senses s
+//         LEFT JOIN subsenses ss ON ss.sid = s.id
+//         LEFT JOIN examples ex ON ex.sid = s.id
+//         LEFT JOIN derivation d ON d.sid = s.id
+//         LEFT JOIN word_categories wc ON wc.word_id = s.id
+//         GROUP BY s.id
+//       )
+//       SELECT
+//         wts.*,
+//         (SELECT
+//           json_group_array(json_object(
+//             'id', sd.id,
+//             'pos', sd.pos,
+//             'etymology', sd.etymology,
+//             'confidence', sd.confidence,
+//             'subsenses_data', sd.subsenses_data,
+//             'examples_data', sd.examples_data,
+//             'derivation_data', sd.derivation_data,
+//             'categories_data', sd.categories_data
+//           ))
+//           FROM sense_data sd
+//           WHERE sd.parent_id = wts.word_id
+//         ) as senses_array
+//       FROM word_tone_sequences wts
+//       WHERE wts.tone_sequence LIKE ?
+//       AND wts.syllable_count = ?
+//       ORDER BY wts.frequency ASC NULLS LAST;
+//       `
diff --git a/packages/db/src/types.ts b/packages/db/src/types.ts
new file mode 100644
index 0000000..66c2826
--- /dev/null
+++ b/packages/db/src/types.ts
@@ -0,0 +1,84 @@
+export type Tone = {
+  letters: string;
+  numbers: number;
+  name: string;
+};
+
+export type Phoneme = {
+  ipa: string;
+  spelling: string;
+};
+export type Syllable = {
+  stressed: boolean;
+  long: boolean;
+  spelling: string;
+  ipa: string;
+  nucleus: Phoneme;
+  onset: Phoneme;
+  medial: Phoneme;
+  coda: Phoneme;
+  rhyme: Phoneme;
+  tone: Tone;
+};
+
+export type ToneQuery = Array<string | null>;
+export type MutationType = { change: string } | { keep: string };
+export type MutationOrder = MutationType[];
+
+export type PhoneticData = {
+  word_id: number;
+  tone_sequence: string;
+  syllable_count: number;
+  syl_seq: string;
+  spelling: string;
+  ipa: string;
+  frequency: number;
+};
+
+export const thaiTones: Record<string, string> = {
+  "˧": "mid",
+  "˨˩": "low",
+  "˥˩": "falling",
+  "˦˥": "high",
+  "˩˩˦": "rising",
+};
+export const thaiToneNums: Record<string, number> = {
+  "˧": 33,
+  "˨˩": 21,
+  "˥˩": 41,
+  "˦˥": 45,
+  "˩˩˦": 214,
+};
+
+export type FullWordDataDB = {
+  word_id: number;
+  tone_sequence: string;
+  syllable_count: number;
+  syl_seq: string;
+  spelling: string;
+  ipa: string;
+  frequency: number;
+  senses_array: string;
+};
+export type FullWordData = {
+  word_id: number;
+  tone_sequence: string;
+  syllable_count: number;
+  syl_seq: string;
+  spelling: string;
+  ipa: string;
+  frequency: number;
+  senses: Sense[];
+};
+export type Sense = {
+  confidence: number;
+  examples: Example[];
+  categories: string[];
+  etymology: string;
+  derivation: Derivation[];
+  pos: string;
+  forms: Array<{ form: string; tags: string[] }>;
+  glosses: string[];
+};
+export type Example = { ref: string; text: string };
+export type Derivation = { type: string; text: string; tags: any };
diff --git a/packages/db/src/users.ts b/packages/db/src/users.ts
new file mode 100644
index 0000000..ebf715c
--- /dev/null
+++ b/packages/db/src/users.ts
@@ -0,0 +1,305 @@
+import { Database } from "bun:sqlite";
+
+export interface User {
+  id: number;
+  name: string;
+  creds: string;
+}
+
+export interface Session {
+  id: string;
+  user_id: number;
+  created_at: number;
+  expires_at: number;
+  last_activity?: number;
+  ip_address?: string;
+  user_agent?: string;
+  data?: any;
+}
+
+export interface UserSettings {
+  id?: number;
+  user_id: number;
+  preferred_session_length: number;
+  target_daily_reviews: number;
+  difficulty_preference: number;
+  audio_enabled: boolean;
+  tone_visualization_enabled: boolean;
+  auto_advance: boolean;
+  created_at?: number;
+  updated_at?: number;
+}
+
+export class UserQueries {
+  constructor(private db: Database) {}
+
+  /**
+   * Create a new user
+   */
+  createUser(name: string, creds: string): User {
+    const query = this.db.query(`
+      INSERT INTO users (name, creds)
+      VALUES (?, ?)
+      RETURNING id, name, creds
+    `);
+
+    return query.get(name, creds) as User;
+  }
+
+  /**
+   * Get user by ID
+   */
+  getUserById(id: number): User | null {
+    const query = this.db.query(`
+      SELECT id, name, creds
+      FROM users
+      WHERE id = ?
+    `);
+
+    return query.get(id) as User | null;
+  }
+
+  /**
+   * Get user by name
+   */
+  getUserByName(name: string): User | null {
+    const query = this.db.query(`
+      SELECT id, name, creds
+      FROM users
+      WHERE name = ?
+    `);
+
+    return query.get(name) as User | null;
+  }
+
+  /**
+   * Create a new session for a user
+   */
+  createSession(userId: number, sessionToken: string, expiresAt: number): Session {
+    const query = this.db.query(`
+      INSERT INTO sessions (id, user_id, created_at, expires_at, last_activity)
+      VALUES (?, ?, ?, ?, ?)
+      RETURNING id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+    `);
+
+    const now = Date.now();
+    return query.get(sessionToken, userId, now, expiresAt, now) as Session;
+  }
+
+  /**
+   * Get session by token
+   */
+  getSessionByToken(sessionToken: string): Session | null {
+    const query = this.db.query(`
+      SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+      FROM sessions
+      WHERE id = ? AND expires_at > ?
+    `);
+
+    return query.get(sessionToken, Date.now()) as Session | null;
+  }
+
+  /**
+   * Get user from session token
+   */
+  getUserFromSession(sessionToken: string): User | null {
+    const query = this.db.query(`
+      SELECT u.id, u.name, u.creds
+      FROM users u
+      JOIN sessions s ON u.id = s.user_id
+      WHERE s.id = ? AND s.expires_at > ?
+    `);
+
+    return query.get(sessionToken, Date.now()) as User | null;
+  }
+
+  /**
+   * Delete session by token
+   */
+  deleteSession(sessionToken: string): boolean {
+    const query = this.db.query(`
+      DELETE FROM sessions
+      WHERE id = ?
+    `);
+
+    const result = query.run(sessionToken);
+    return result.changes > 0;
+  }
+
+  /**
+   * Clean up expired sessions
+   */
+  cleanupExpiredSessions(): number {
+    const query = this.db.query(`
+      DELETE FROM sessions
+      WHERE expires_at <= ?
+    `);
+
+    const result = query.run(Date.now());
+    return result.changes;
+  }
+
+  /**
+   * Update session last activity
+   */
+  updateSessionActivity(sessionToken: string): boolean {
+    const query = this.db.query(`
+      UPDATE sessions
+      SET last_activity = ?
+      WHERE id = ? AND expires_at > ?
+    `);
+
+    const result = query.run(Date.now(), sessionToken, Date.now());
+    return result.changes > 0;
+  }
+
+  /**
+   * Extend session expiry
+   */
+  extendSessionExpiry(sessionToken: string, newExpiry: number): boolean {
+    const query = this.db.query(`
+      UPDATE sessions
+      SET expires_at = ?, last_activity = ?
+      WHERE id = ? AND expires_at > ?
+    `);
+
+    const result = query.run(newExpiry, Date.now(), sessionToken, Date.now());
+    return result.changes > 0;
+  }
+
+  /**
+   * Get all active sessions for a user
+   */
+  getUserSessions(userId: number): Session[] {
+    const query = this.db.query(`
+      SELECT id, user_id, created_at, expires_at, last_activity, ip_address, user_agent, data
+      FROM sessions
+      WHERE user_id = ? AND expires_at > ?
+      ORDER BY last_activity DESC
+    `);
+
+    return query.all(userId, Date.now()) as Session[];
+  }
+
+  /**
+   * Delete all sessions for a user (for logout all devices)
+   */
+  deleteAllUserSessions(userId: number): number {
+    const query = this.db.query(`
+      DELETE FROM sessions
+      WHERE user_id = ?
+    `);
+
+    const result = query.run(userId);
+    return result.changes;
+  }
+
+  /**
+   * Get user settings
+   */
+  getUserSettings(userId: number): UserSettings | null {
+    const query = this.db.query(`
+      SELECT id, user_id, preferred_session_length, target_daily_reviews, 
+             difficulty_preference, audio_enabled, tone_visualization_enabled, 
+             auto_advance, created_at, updated_at
+      FROM tone_learning_settings
+      WHERE user_id = ?
+    `);
+
+    return query.get(userId) as UserSettings | null;
+  }
+
+  /**
+   * Create or update user settings
+   */
+  upsertUserSettings(
+    settings: Omit<UserSettings, "id" | "created_at" | "updated_at">,
+  ): UserSettings {
+    const query = this.db.query(`
+      INSERT INTO tone_learning_settings 
+        (user_id, preferred_session_length, target_daily_reviews, 
+         difficulty_preference, audio_enabled, tone_visualization_enabled, auto_advance)
+      VALUES (?, ?, ?, ?, ?, ?, ?)
+      ON CONFLICT (user_id) DO UPDATE SET
+        preferred_session_length = excluded.preferred_session_length,
+        target_daily_reviews = excluded.target_daily_reviews,
+        difficulty_preference = excluded.difficulty_preference,
+        audio_enabled = excluded.audio_enabled,
+        tone_visualization_enabled = excluded.tone_visualization_enabled,
+        auto_advance = excluded.auto_advance,
+        updated_at = (strftime('%s', 'now') * 1000)
+      RETURNING id, user_id, preferred_session_length, target_daily_reviews,
+                difficulty_preference, audio_enabled, tone_visualization_enabled,
+                auto_advance, created_at, updated_at
+    `);
+
+    return query.get(
+      settings.user_id,
+      settings.preferred_session_length,
+      settings.target_daily_reviews,
+      settings.difficulty_preference,
+      settings.audio_enabled,
+      settings.tone_visualization_enabled,
+      settings.auto_advance,
+    ) as UserSettings;
+  }
+
+  /**
+   * Update user settings partially
+   */
+  updateUserSettings(
+    userId: number,
+    updates: Partial<UserSettings>,
+  ): UserSettings | null {
+    const setClause = Object.keys(updates)
+      .filter(
+        (key) => key !== "id" && key !== "user_id" && key !== "created_at",
+      )
+      .map((key) => `${key} = ?`)
+      .join(", ");
+
+    if (!setClause) return null;
+
+    const values = Object.values(updates).filter((_, index) => {
+      const key = Object.keys(updates)[index];
+      return key !== "id" && key !== "user_id" && key !== "created_at";
+    });
+
+    const query = this.db.query(`
+      UPDATE tone_learning_settings
+      SET ${setClause}, updated_at = (strftime('%s', 'now') * 1000)
+      WHERE user_id = ?
+      RETURNING id, user_id, preferred_session_length, target_daily_reviews,
+                difficulty_preference, audio_enabled, tone_visualization_enabled,
+                auto_advance, created_at, updated_at
+    `);
+
+    return query.get(...values, userId) as UserSettings | null;
+  }
+
+  /**
+   * Get all users (admin function)
+   */
+  getAllUsers(): User[] {
+    const query = this.db.query(`
+      SELECT id, name, creds
+      FROM users
+      ORDER BY name
+    `);
+
+    return query.all() as User[];
+  }
+
+  /**
+   * Delete user (admin function)
+   */
+  deleteUser(userId: number): boolean {
+    const query = this.db.query(`
+      DELETE FROM users
+      WHERE id = ?
+    `);
+
+    const result = query.run(userId);
+    return result.changes > 0;
+  }
+}
diff --git a/packages/db/tsconfig.json b/packages/db/tsconfig.json
new file mode 100644
index 0000000..bfa0fea
--- /dev/null
+++ b/packages/db/tsconfig.json
@@ -0,0 +1,29 @@
+{
+  "compilerOptions": {
+    // Environment setup & latest features
+    "lib": ["ESNext"],
+    "target": "ESNext",
+    "module": "Preserve",
+    "moduleDetection": "force",
+    "jsx": "react-jsx",
+    "allowJs": true,
+
+    // Bundler mode
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "noEmit": true,
+
+    // Best practices
+    "strict": true,
+    "skipLibCheck": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedIndexedAccess": true,
+    "noImplicitOverride": true,
+
+    // Some stricter flags (disabled by default)
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noPropertyAccessFromIndexSignature": false
+  }
+}
author	polwex <polwex@sortug.com>	2025-11-23 01:12:53 +0700
committer	polwex <polwex@sortug.com>	2025-11-23 01:12:53 +0700
commit	cb1b56f5a0eddbf77446f415f2beda57c8305f85 (patch)
tree	d333ca5c143063af8ee1b2f9e2d1d25f8ef2007c /packages/db