1 files changed, 244 insertions, 83 deletions
diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts
index 6a89ee2..1425a43 100644
--- a/packages/db/src/index.ts
+++ b/packages/db/src/index.ts
@@ -1,39 +1,242 @@
-import { Database } from "bun:sqlite";
-import type { FullWordDataDB } from "./types";
+import { SQL } from "bun";
 import { SRSQueries } from "./srs";
+// NEW API
+//
+// https://bun.com/docs/runtime/sql
+// import { sql } from "bun";
+
+// // Basic insert with direct values
+// const [user] = await sql`
+//   INSERT INTO users (name, email)
+//   VALUES (${name}, ${email})
+//   RETURNING *
+// `;
+
+// // Using object helper for cleaner syntax
+// const userData = {
+//   name: "Alice",
+//   email: "alice@example.com",
+// };
+
+// const [newUser] = await sql`
+//   INSERT INTO users ${sql(userData)}
+//   RETURNING *
+// `;
+// // Expands to: INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')
+
+const DB_PATH = "/home/y/code/bun/sorlang/bulkdata/db.db";
+import type { FullWordDataDB } from "@sortug/langlib";
 
 export class Queries {
-  db: Database;
-  srs: SRSQueries;
+  db: SQL;
+  ready = false;
+  // srs: SRSQueries;
   constructor() {
-    const db = new Database("/home/y/code/bun/sorlang/bulkdata/unified.db");
-    db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
-    db.exec("PRAGMA foreign_keys = ON");
-    db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB
-    db.exec("PRAGMA mmap_size = 30000000000");
-    db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory
-    db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster
-
+    console.log("setting sorlang DB");
+    const db = new SQL({
+      adapter: "sqlite",
+      filename: DB_PATH,
+      create: false,
+    });
     this.db = db;
-    this.srs = new SRSQueries(db);
+    // this.srs = new SRSQueries(db);
+  }
+  async init() {
+    try {
+      await this.db`PRAGMA journal_mode = WAL`; // Enable Write-Ahead Logging for better performance
+      await this.db`PRAGMA foreign_keys = ON`;
+      await this.db`PRAGMA cache_size = -8000`; // Increase cache size to 8MB
+      await this.db`PRAGMA mmap_size = 30000000000`;
+      await this.db`PRAGMA temp_store = MEMORY`; // Store temp tables in memory
+      await this.db`PRAGMA synchronous = NORMAL`; // Slightly less safe but faster
+      this.ready = true;
+    } catch (e) {
+      console.error("error starting db", e);
+    }
+  }
+
+  async fetchExpressionById(id: number) {
+    const query = await this.db`
+      SELECT * FROM expressions WHERE id = ${id}
+      `;
+    return query;
+  }
+  // TODO word_phonetics is MANY TO ONE vs expressions, so it's possible for word_phonetics to lack entries for syllables
+  async fetchExpressionBySpelling(
+    spelling: string,
+    lang: string,
+  ): Promise<FullWordDataDB | null> {
+    const data = await this.db`
+SELECT 
+    json_object(
+        'id', e.id,
+        'spelling', e.spelling,
+        'frequency', e.frequency,
+        'phonetic', (
+            SELECT json_object(
+                'id', wp.id,
+                'tone_sequence', wp.tone_sequence,
+                'syl_seq', wp.syllable_sequence,
+                'syllable_count', wp.syllable_count,
+                'ipa', wp.ipa,
+                'syllables', (
+                    SELECT json_group_array(
+                        json_object(
+                            'stressed', CASE WHEN sw.stressed = 1 THEN json('true') ELSE json('false') END,
+                            'long', CASE WHEN s.long = 1 THEN json('true') ELSE json('false') END,
+                            'spelling', s.text,
+                            'ipa', s.ipa,
+                            'onset', (
+                                SELECT json_object('ipa', o.ipa, 'spelling', o.text) 
+                                FROM onsets o WHERE o.id = s.onset
+                            ),
+                            'nucleus', (
+                                SELECT json_object('ipa', n.ipa, 'spelling', n.text) 
+                                FROM nucleus n WHERE n.id = s.nucleus
+                            ),
+                            'medial', (
+                                SELECT json_object('ipa', m.ipa, 'spelling', m.text) 
+                                FROM medials m WHERE m.id = s.medial
+                            ),
+                            'coda', (
+                                SELECT json_object('ipa', c.ipa, 'spelling', c.text) 
+                                FROM codas c WHERE c.id = s.coda
+                            ),
+                            'rhyme', (
+                                SELECT json_object('ipa', r.ipa, 'spelling', r.text) 
+                                FROM rhymes r WHERE r.id = s.rhyme
+                            ),
+                            'tone', (
+                                SELECT json_object('letters', t.ipa, 'numbers', t.nums, 'name', t.name) 
+                                FROM tones t WHERE t.id = s.tone
+                            )
+                        )
+                    )
+                    FROM syllables_words sw
+                    JOIN syllables s ON sw.syl_id = s.id
+                    WHERE sw.word_id = wp.id
+                    ORDER BY sw.idx ASC
+                )
+            )
+            FROM word_phonetics wp
+            WHERE wp.word_id = e.id
+            -- Select the most general pronunciation (sense_id IS NULL) or the first available
+            ORDER BY (wp.sense_id IS NULL) DESC, wp.id ASC
+            LIMIT 1
+        ),
+        'senses', (
+            SELECT json_group_array(
+                json_object(
+                    'id', sn.id,
+                    'confidence', sn.confidence,
+                    'etymology', sn.etymology,
+                    'pos', sn.pos,
+                    'glosses', (
+                        SELECT json_group_array(sub.gloss)
+                        FROM subsenses sub
+                        WHERE sub.sid = sn.id
+                    ),
+                    'examples', (
+                        SELECT json_group_array(
+                            json_object(
+                                'example', ex.example, 
+                                'ref', ex.ref
+                            )
+                        )
+                        FROM examples ex
+                        WHERE ex.sid = sn.id
+                    ),
+                    'categories', (
+                        SELECT json_group_array(wc.category)
+                        FROM word_categories wc
+                        WHERE wc.word_id = sn.id
+                    ),
+                    'derivation', (
+                        SELECT json_group_array(
+                            json_object(
+                                'type', d.type, 
+                                'text', d.text, 
+                                'tags', json(d.tags)
+                            )
+                        )
+                        FROM derivation d
+                        WHERE d.sid = sn.id
+                    )
+                )
+            )
+            FROM senses sn
+            WHERE sn.parent_id = e.id
+        )
+    ) AS full_word_data
+FROM expressions e
+WHERE e.spelling = ${spelling} AND e.lang = ${lang};
+      `;
+
+    if (data.length > 1) {
+      console.log({ spelling, lang, data });
+      throw new Error("more rows than 1 wtf");
+    }
+    if (data.length === 0) {
+      console.log({ spelling, lang, data });
+      return null;
+    }
+    const row = data[0];
+    const json = JSON.parse(row.full_word_data);
+    const phonetic = JSON.parse(json.phonetic);
+    const obj = { ...json, phonetic };
+    return obj;
   }
-  fetchExpressionById(id: number) {
-    const query = this.db.query(
-      `
-      SELECT * FROM expressions WHERE id = ?
-      `,
-    );
-    return query.get(id) as any;
+
+  // TODO combine with this old query to get both phonetic and semantic data
+  // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+  async fetchSenses(spelling: string, lang: string) {
+    const rows = await this.db`
+      WITH sense_data AS (
+        SELECT
+          s.*,
+          GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+          GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+          GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+          GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+        FROM senses s
+        LEFT JOIN subsenses ss ON ss.sid = s.id
+        LEFT JOIN examples ex ON ex.sid = s.id
+        LEFT JOIN derivation d ON d.sid = s.id
+        LEFT JOIN word_categories wc ON wc.word_id = s.id
+        GROUP BY s.id
+      )
+      SELECT e.*,
+        (SELECT
+          json_group_array(json_object(
+            'id', sd.id,
+            'pos', sd.pos,
+            'etymology', sd.etymology,
+            'confidence', sd.confidence,
+            'subsenses_data', sd.subsenses_data,
+            'examples_data', sd.examples_data,
+            'derivation_data', sd.derivation_data,
+            'categories_data', sd.categories_data
+          ))
+          FROM sense_data sd
+          WHERE sd.parent_id = e.id
+        ) as senses_array
+      FROM expressions e
+      WHERE e.spelling = ${spelling} AND e.lang = ${lang}
+      ORDER BY e.frequency DESC`;
+
+    return rows;
   }
-  fetchWordsByToneAndSyls1(tones: Array<string | null>) {
+
+  // Tones and syls
+  async fetchWordsByToneAndSyls1(tones: Array<string | null>) {
     const toneString = tones
       .reduce((acc: string, item) => {
         if (!item) return `${acc},%`;
         else return `${acc},${item}`;
       }, "")
       .slice(1);
-    const query = this.db.query(
-      `
+    const data = await this.db`
       WITH word_tone_sequences AS (
           SELECT 
               wp.ipa,
@@ -48,21 +251,19 @@ export class Queries {
       )        
         SELECT *
         FROM word_tone_sequences
-        WHERE tone_sequence LIKE ?
-        AND syllable_count = ?
-      `,
-    );
-    return query.all(toneString, tones.length) as any[];
+        WHERE tone_sequence LIKE ${toneString}
+        AND syllable_count = ${tones.length}
+      `;
+    return data;
   }
-  fetchWordsByToneAndSylsO(tones: Array<string | null>) {
+  async fetchWordsByToneAndSylsO(tones: Array<string | null>) {
     const toneString = tones
       .reduce((acc: string, item) => {
         if (!item) return `${acc},%`;
         else return `${acc},${item}`;
       }, "")
       .slice(1);
-    const query = this.db.query(
-      `
+    const data = await this.db`
       WITH word_tone_sequences AS (
           SELECT 
               w.id as word_id,
@@ -87,25 +288,25 @@ export class Queries {
                tone_sequence,
                syllable_count
         FROM word_tone_sequences
-        WHERE tone_sequence LIKE ?
-        AND syllable_count = ?
+        WHERE tone_sequence LIKE ${toneString}
+        AND syllable_count = ${tones.length}
         ORDER BY frequency ASC NULLS LAST;
-      `,
-    );
+      `;
     // TODO combine with this old query to get both phonetic and semantic data
     // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
 
-    return query.all(toneString, tones.length) as any[];
+    return data;
   }
-  fetchWordsByToneAndSyls(tones: Array<string | null>): FullWordDataDB[] {
+  async fetchWordsByToneAndSyls(
+    tones: Array<string | null>,
+  ): Promise<FullWordDataDB[]> {
     const toneString = tones
       .reduce((acc: string, item) => {
         if (!item) return `${acc},%`;
         else return `${acc},${item}`;
       }, "")
       .slice(1);
-    const query = this.db.query(
-      `
+    const data = await this.db`
       WITH word_tone_sequences AS (
           SELECT 
               w.id as word_id,
@@ -161,54 +362,14 @@ export class Queries {
       )        
         SELECT *
         FROM word_tone_sequences
-        WHERE tone_sequence LIKE ?
-        AND syllable_count = ?
+        WHERE tone_sequence LIKE ${toneString}
+        AND syllable_count = ${tones.length}
         ORDER BY frequency ASC NULLS LAST;
-      `,
-    );
+      `;
     // TODO combine with this old query to get both phonetic and semantic data
     // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
 
-    return query.all(toneString, tones.length) as any[];
-  }
-  // TODO combine with this old query to get both phonetic and semantic data
-  // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
-
-  fetchSenses(spelling: string, lang: string) {
-    const query = this.db.query(`
-      WITH sense_data AS (
-        SELECT
-          s.*,
-          GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
-          GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
-          GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
-          GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
-        FROM senses s
-        LEFT JOIN subsenses ss ON ss.sid = s.id
-        LEFT JOIN examples ex ON ex.sid = s.id
-        LEFT JOIN derivation d ON d.sid = s.id
-        LEFT JOIN word_categories wc ON wc.word_id = s.id
-        GROUP BY s.id
-      )
-      SELECT e.*,
-        (SELECT
-          json_group_array(json_object(
-            'id', sd.id,
-            'pos', sd.pos,
-            'etymology', sd.etymology,
-            'confidence', sd.confidence,
-            'subsenses_data', sd.subsenses_data,
-            'examples_data', sd.examples_data,
-            'derivation_data', sd.derivation_data,
-            'categories_data', sd.categories_data
-          ))
-          FROM sense_data sd
-          WHERE sd.parent_id = e.id
-        ) as senses_array
-      FROM expressions e
-      WHERE e.spelling = ? AND e.lang = ?
-      ORDER BY e.frequency DESC`);
-    return query.all(spelling, lang);
+    return data;
   }
 }
 export default Queries;