summaryrefslogtreecommitdiff
path: root/packages/db/src/index.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/db/src/index.ts')
-rw-r--r--packages/db/src/index.ts327
1 files changed, 244 insertions, 83 deletions
diff --git a/packages/db/src/index.ts b/packages/db/src/index.ts
index 6a89ee2..1425a43 100644
--- a/packages/db/src/index.ts
+++ b/packages/db/src/index.ts
@@ -1,39 +1,242 @@
-import { Database } from "bun:sqlite";
-import type { FullWordDataDB } from "./types";
+import { SQL } from "bun";
import { SRSQueries } from "./srs";
+// NEW API
+//
+// https://bun.com/docs/runtime/sql
+// import { sql } from "bun";
+
+// // Basic insert with direct values
+// const [user] = await sql`
+// INSERT INTO users (name, email)
+// VALUES (${name}, ${email})
+// RETURNING *
+// `;
+
+// // Using object helper for cleaner syntax
+// const userData = {
+// name: "Alice",
+// email: "alice@example.com",
+// };
+
+// const [newUser] = await sql`
+// INSERT INTO users ${sql(userData)}
+// RETURNING *
+// `;
+// // Expands to: INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')
+
+const DB_PATH = "/home/y/code/bun/sorlang/bulkdata/db.db";
+import type { FullWordDataDB } from "@sortug/langlib";
export class Queries {
- db: Database;
- srs: SRSQueries;
+ db: SQL;
+ ready = false;
+ // srs: SRSQueries;
constructor() {
- const db = new Database("/home/y/code/bun/sorlang/bulkdata/unified.db");
- db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
- db.exec("PRAGMA foreign_keys = ON");
- db.exec("PRAGMA cache_size = -8000"); // Increase cache size to 8MB
- db.exec("PRAGMA mmap_size = 30000000000");
- db.exec("PRAGMA temp_store = MEMORY"); // Store temp tables in memory
- db.exec("PRAGMA synchronous = NORMAL"); // Slightly less safe but faster
-
+ console.log("setting sorlang DB");
+ const db = new SQL({
+ adapter: "sqlite",
+ filename: DB_PATH,
+ create: false,
+ });
this.db = db;
- this.srs = new SRSQueries(db);
+ // this.srs = new SRSQueries(db);
+ }
+ async init() {
+ try {
+ await this.db`PRAGMA journal_mode = WAL`; // Enable Write-Ahead Logging for better performance
+ await this.db`PRAGMA foreign_keys = ON`;
+ await this.db`PRAGMA cache_size = -8000`; // Increase cache size to 8MB
+ await this.db`PRAGMA mmap_size = 30000000000`;
+ await this.db`PRAGMA temp_store = MEMORY`; // Store temp tables in memory
+ await this.db`PRAGMA synchronous = NORMAL`; // Slightly less safe but faster
+ this.ready = true;
+ } catch (e) {
+ console.error("error starting db", e);
+ }
+ }
+
+ async fetchExpressionById(id: number) {
+ const query = await this.db`
+ SELECT * FROM expressions WHERE id = ${id}
+ `;
+ return query;
+ }
+ // TODO word_phonetics is MANY TO ONE vs expressions, so it's possible for word_phonetics to lack entries for syllables
+ async fetchExpressionBySpelling(
+ spelling: string,
+ lang: string,
+ ): Promise<FullWordDataDB | null> {
+ const data = await this.db`
+SELECT
+ json_object(
+ 'id', e.id,
+ 'spelling', e.spelling,
+ 'frequency', e.frequency,
+ 'phonetic', (
+ SELECT json_object(
+ 'id', wp.id,
+ 'tone_sequence', wp.tone_sequence,
+ 'syl_seq', wp.syllable_sequence,
+ 'syllable_count', wp.syllable_count,
+ 'ipa', wp.ipa,
+ 'syllables', (
+ SELECT json_group_array(
+ json_object(
+ 'stressed', CASE WHEN sw.stressed = 1 THEN json('true') ELSE json('false') END,
+ 'long', CASE WHEN s.long = 1 THEN json('true') ELSE json('false') END,
+ 'spelling', s.text,
+ 'ipa', s.ipa,
+ 'onset', (
+ SELECT json_object('ipa', o.ipa, 'spelling', o.text)
+ FROM onsets o WHERE o.id = s.onset
+ ),
+ 'nucleus', (
+ SELECT json_object('ipa', n.ipa, 'spelling', n.text)
+ FROM nucleus n WHERE n.id = s.nucleus
+ ),
+ 'medial', (
+ SELECT json_object('ipa', m.ipa, 'spelling', m.text)
+ FROM medials m WHERE m.id = s.medial
+ ),
+ 'coda', (
+ SELECT json_object('ipa', c.ipa, 'spelling', c.text)
+ FROM codas c WHERE c.id = s.coda
+ ),
+ 'rhyme', (
+ SELECT json_object('ipa', r.ipa, 'spelling', r.text)
+ FROM rhymes r WHERE r.id = s.rhyme
+ ),
+ 'tone', (
+ SELECT json_object('letters', t.ipa, 'numbers', t.nums, 'name', t.name)
+ FROM tones t WHERE t.id = s.tone
+ )
+ )
+ )
+ FROM syllables_words sw
+ JOIN syllables s ON sw.syl_id = s.id
+ WHERE sw.word_id = wp.id
+ ORDER BY sw.idx ASC
+ )
+ )
+ FROM word_phonetics wp
+ WHERE wp.word_id = e.id
+ -- Select the most general pronunciation (sense_id IS NULL) or the first available
+ ORDER BY (wp.sense_id IS NULL) DESC, wp.id ASC
+ LIMIT 1
+ ),
+ 'senses', (
+ SELECT json_group_array(
+ json_object(
+ 'id', sn.id,
+ 'confidence', sn.confidence,
+ 'etymology', sn.etymology,
+ 'pos', sn.pos,
+ 'glosses', (
+ SELECT json_group_array(sub.gloss)
+ FROM subsenses sub
+ WHERE sub.sid = sn.id
+ ),
+ 'examples', (
+ SELECT json_group_array(
+ json_object(
+ 'example', ex.example,
+ 'ref', ex.ref
+ )
+ )
+ FROM examples ex
+ WHERE ex.sid = sn.id
+ ),
+ 'categories', (
+ SELECT json_group_array(wc.category)
+ FROM word_categories wc
+ WHERE wc.word_id = sn.id
+ ),
+ 'derivation', (
+ SELECT json_group_array(
+ json_object(
+ 'type', d.type,
+ 'text', d.text,
+ 'tags', json(d.tags)
+ )
+ )
+ FROM derivation d
+ WHERE d.sid = sn.id
+ )
+ )
+ )
+ FROM senses sn
+ WHERE sn.parent_id = e.id
+ )
+ ) AS full_word_data
+FROM expressions e
+WHERE e.spelling = ${spelling} AND e.lang = ${lang};
+ `;
+
+ if (data.length > 1) {
+ console.log({ spelling, lang, data });
+ throw new Error("more rows than 1 wtf");
+ }
+ if (data.length === 0) {
+ console.log({ spelling, lang, data });
+ return null;
+ }
+ const row = data[0];
+ const json = JSON.parse(row.full_word_data);
+ const phonetic = JSON.parse(json.phonetic);
+ const obj = { ...json, phonetic };
+ return obj;
}
- fetchExpressionById(id: number) {
- const query = this.db.query(
- `
- SELECT * FROM expressions WHERE id = ?
- `,
- );
- return query.get(id) as any;
+
+ // TODO combine with this old query to get both phonetic and semantic data
+ // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
+
+ async fetchSenses(spelling: string, lang: string) {
+ const rows = await this.db`
+ WITH sense_data AS (
+ SELECT
+ s.*,
+ GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
+ GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
+ GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
+ GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
+ FROM senses s
+ LEFT JOIN subsenses ss ON ss.sid = s.id
+ LEFT JOIN examples ex ON ex.sid = s.id
+ LEFT JOIN derivation d ON d.sid = s.id
+ LEFT JOIN word_categories wc ON wc.word_id = s.id
+ GROUP BY s.id
+ )
+ SELECT e.*,
+ (SELECT
+ json_group_array(json_object(
+ 'id', sd.id,
+ 'pos', sd.pos,
+ 'etymology', sd.etymology,
+ 'confidence', sd.confidence,
+ 'subsenses_data', sd.subsenses_data,
+ 'examples_data', sd.examples_data,
+ 'derivation_data', sd.derivation_data,
+ 'categories_data', sd.categories_data
+ ))
+ FROM sense_data sd
+ WHERE sd.parent_id = e.id
+ ) as senses_array
+ FROM expressions e
+ WHERE e.spelling = ${spelling} AND e.lang = ${lang}
+ ORDER BY e.frequency DESC`;
+
+ return rows;
}
- fetchWordsByToneAndSyls1(tones: Array<string | null>) {
+
+ // Tones and syls
+ async fetchWordsByToneAndSyls1(tones: Array<string | null>) {
const toneString = tones
.reduce((acc: string, item) => {
if (!item) return `${acc},%`;
else return `${acc},${item}`;
}, "")
.slice(1);
- const query = this.db.query(
- `
+ const data = await this.db`
WITH word_tone_sequences AS (
SELECT
wp.ipa,
@@ -48,21 +251,19 @@ export class Queries {
)
SELECT *
FROM word_tone_sequences
- WHERE tone_sequence LIKE ?
- AND syllable_count = ?
- `,
- );
- return query.all(toneString, tones.length) as any[];
+ WHERE tone_sequence LIKE ${toneString}
+ AND syllable_count = ${tones.length}
+ `;
+ return data;
}
- fetchWordsByToneAndSylsO(tones: Array<string | null>) {
+ async fetchWordsByToneAndSylsO(tones: Array<string | null>) {
const toneString = tones
.reduce((acc: string, item) => {
if (!item) return `${acc},%`;
else return `${acc},${item}`;
}, "")
.slice(1);
- const query = this.db.query(
- `
+ const data = await this.db`
WITH word_tone_sequences AS (
SELECT
w.id as word_id,
@@ -87,25 +288,25 @@ export class Queries {
tone_sequence,
syllable_count
FROM word_tone_sequences
- WHERE tone_sequence LIKE ?
- AND syllable_count = ?
+ WHERE tone_sequence LIKE ${toneString}
+ AND syllable_count = ${tones.length}
ORDER BY frequency ASC NULLS LAST;
- `,
- );
+ `;
// TODO combine with this old query to get both phonetic and semantic data
// Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
- return query.all(toneString, tones.length) as any[];
+ return data;
}
- fetchWordsByToneAndSyls(tones: Array<string | null>): FullWordDataDB[] {
+ async fetchWordsByToneAndSyls(
+ tones: Array<string | null>,
+ ): Promise<FullWordDataDB[]> {
const toneString = tones
.reduce((acc: string, item) => {
if (!item) return `${acc},%`;
else return `${acc},${item}`;
}, "")
.slice(1);
- const query = this.db.query(
- `
+ const data = await this.db`
WITH word_tone_sequences AS (
SELECT
w.id as word_id,
@@ -161,54 +362,14 @@ export class Queries {
)
SELECT *
FROM word_tone_sequences
- WHERE tone_sequence LIKE ?
- AND syllable_count = ?
+ WHERE tone_sequence LIKE ${toneString}
+ AND syllable_count = ${tones.length}
ORDER BY frequency ASC NULLS LAST;
- `,
- );
+ `;
// TODO combine with this old query to get both phonetic and semantic data
// Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
- return query.all(toneString, tones.length) as any[];
- }
- // TODO combine with this old query to get both phonetic and semantic data
- // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table
-
- fetchSenses(spelling: string, lang: string) {
- const query = this.db.query(`
- WITH sense_data AS (
- SELECT
- s.*,
- GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
- GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
- GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
- GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
- FROM senses s
- LEFT JOIN subsenses ss ON ss.sid = s.id
- LEFT JOIN examples ex ON ex.sid = s.id
- LEFT JOIN derivation d ON d.sid = s.id
- LEFT JOIN word_categories wc ON wc.word_id = s.id
- GROUP BY s.id
- )
- SELECT e.*,
- (SELECT
- json_group_array(json_object(
- 'id', sd.id,
- 'pos', sd.pos,
- 'etymology', sd.etymology,
- 'confidence', sd.confidence,
- 'subsenses_data', sd.subsenses_data,
- 'examples_data', sd.examples_data,
- 'derivation_data', sd.derivation_data,
- 'categories_data', sd.categories_data
- ))
- FROM sense_data sd
- WHERE sd.parent_id = e.id
- ) as senses_array
- FROM expressions e
- WHERE e.spelling = ? AND e.lang = ?
- ORDER BY e.frequency DESC`);
- return query.all(spelling, lang);
+ return data;
}
}
export default Queries;