summaryrefslogtreecommitdiff
path: root/packages/db/src/semantic.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/db/src/semantic.ts')
-rw-r--r--packages/db/src/semantic.ts554
1 files changed, 554 insertions, 0 deletions
diff --git a/packages/db/src/semantic.ts b/packages/db/src/semantic.ts
new file mode 100644
index 0000000..a3fe44b
--- /dev/null
+++ b/packages/db/src/semantic.ts
@@ -0,0 +1,554 @@
+import { Database } from 'bun:sqlite';
+
+export interface Expression {
+ id: number;
+ spelling: string;
+ lang: string;
+ frequency?: number;
+ type: string;
+ syllables?: number;
+ notes?: string;
+ ipa?: any; // JSONB
+ prosody?: any; // JSONB
+ confidence: number;
+}
+
+export interface Sense {
+ id: number;
+ parent_id: number;
+ spelling: string;
+ pos?: string;
+ etymology?: string;
+ senses?: any; // JSONB
+ forms?: any; // JSONB
+ related?: any; // JSONB
+ ipa?: any; // JSONB
+ prosody?: any; // JSONB
+ confidence: number;
+}
+
+export interface Subsense {
+ id: number;
+ sid: number;
+ gloss: string;
+ examples?: any; // JSONB
+}
+
+export interface Derivation {
+ id: number;
+ sid: number;
+ type: string;
+ text: string;
+ tags?: any; // JSONB
+}
+
+export interface Category {
+ name: string;
+}
+
+export interface WordCategory {
+ word_id: number;
+ category: string;
+}
+
+export interface Bookmark {
+ word_id: number;
+ user_id: number;
+ notes?: string;
+ created: number;
+}
+
+export interface Language {
+ code: string;
+ name: string;
+ native_name?: string;
+ iso6392?: string;
+ english?: string;
+}
+
+export class SemanticQueries {
+ constructor(private db: Database) {}
+
+ // Language operations
+ getLanguages(): Language[] {
+ const query = this.db.query(`
+ SELECT code, name, native_name, iso6392, english
+ FROM languages
+ ORDER BY name
+ `);
+ return query.all() as Language[];
+ }
+
+ getLanguageByCode(code: string): Language | null {
+ const query = this.db.query(`
+ SELECT code, name, native_name, iso6392, english
+ FROM languages
+ WHERE code = ?
+ `);
+ return query.get(code) as Language | null;
+ }
+
+ // Expression operations
+ getExpressionById(id: number): Expression | null {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE id = ?
+ `);
+ const result = query.get(id) as Expression | null;
+ if (result) {
+ // Parse JSON fields
+ result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+ result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+ }
+ return result;
+ }
+
+ getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ? OFFSET ?
+ `);
+ const results = query.all(lang, limit, offset) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ? AND spelling LIKE ?
+ ORDER BY
+ CASE
+ WHEN spelling = ? THEN 1
+ WHEN spelling LIKE ? THEN 2
+ ELSE 3
+ END,
+ frequency DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE spelling LIKE ?
+ ORDER BY
+ CASE
+ WHEN spelling = ? THEN 1
+ WHEN spelling LIKE ? THEN 2
+ ELSE 3
+ END,
+ frequency DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE type = ? AND lang = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(type, lang, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE type = ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(type, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence
+ FROM expressions
+ WHERE lang = ? AND frequency BETWEEN ? AND ?
+ ORDER BY frequency DESC, spelling
+ LIMIT ?
+ `);
+ const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ // Sense operations
+ getSensesForExpression(expressionId: number): Sense[] {
+ const query = this.db.query(`
+ SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+ FROM senses
+ WHERE parent_id = ?
+ ORDER BY confidence DESC
+ `);
+ const results = query.all(expressionId) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+
+ getSenseById(id: number): Sense | null {
+ const query = this.db.query(`
+ SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence
+ FROM senses
+ WHERE id = ?
+ `);
+ const result = query.get(id) as Sense | null;
+ if (result) {
+ result.senses = result.senses ? JSON.parse(result.senses) : undefined;
+ result.forms = result.forms ? JSON.parse(result.forms) : undefined;
+ result.related = result.related ? JSON.parse(result.related) : undefined;
+ result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined;
+ result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined;
+ }
+ return result;
+ }
+
+ searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?)
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE s.spelling LIKE ? OR s.pos LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Subsense operations
+ getSubsensesForSense(senseId: number): Subsense[] {
+ const query = this.db.query(`
+ SELECT id, sid, gloss, examples
+ FROM subsenses
+ WHERE sid = ?
+ ORDER BY id
+ `);
+ const results = query.all(senseId) as Subsense[];
+ return results.map(result => ({
+ ...result,
+ examples: result.examples ? JSON.parse(result.examples) : undefined,
+ }));
+ }
+
+ // Derivation operations
+ getDerivationsForSense(senseId: number): Derivation[] {
+ const query = this.db.query(`
+ SELECT id, sid, type, text, tags
+ FROM derivation
+ WHERE sid = ?
+ ORDER BY type, text
+ `);
+ const results = query.all(senseId) as Derivation[];
+ return results.map(result => ({
+ ...result,
+ tags: result.tags ? JSON.parse(result.tags) : undefined,
+ }));
+ }
+
+ // Category operations
+ getCategories(): Category[] {
+ const query = this.db.query(`
+ SELECT name
+ FROM categories
+ ORDER BY name
+ `);
+ return query.all() as Category[];
+ }
+
+ getCategoriesForWord(wordId: number): Category[] {
+ const query = this.db.query(`
+ SELECT c.name
+ FROM categories c
+ JOIN word_categories wc ON c.name = wc.category
+ WHERE wc.word_id = ?
+ ORDER BY c.name
+ `);
+ return query.all(wordId) as Category[];
+ }
+
+ getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN word_categories wc ON e.id = wc.word_id
+ WHERE wc.category = ? AND e.lang = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(category, lang, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN word_categories wc ON e.id = wc.word_id
+ WHERE wc.category = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(category, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Bookmark operations
+ getBookmarksForUser(userId: number): Bookmark[] {
+ const query = this.db.query(`
+ SELECT word_id, user_id, notes, created
+ FROM bookmarks
+ WHERE user_id = ?
+ ORDER BY created DESC
+ `);
+ return query.all(userId) as Bookmark[];
+ }
+
+ createBookmark(userId: number, wordId: number, notes?: string): Bookmark {
+ const query = this.db.query(`
+ INSERT INTO bookmarks (word_id, user_id, notes, created)
+ VALUES (?, ?, ?, ?)
+ RETURNING word_id, user_id, notes, created
+ `);
+ return query.get(wordId, userId, notes, Date.now()) as Bookmark;
+ }
+
+ updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null {
+ const query = this.db.query(`
+ UPDATE bookmarks
+ SET notes = ?
+ WHERE user_id = ? AND word_id = ?
+ RETURNING word_id, user_id, notes, created
+ `);
+ return query.get(notes, userId, wordId) as Bookmark | null;
+ }
+
+ deleteBookmark(userId: number, wordId: number): boolean {
+ const query = this.db.query(`
+ DELETE FROM bookmarks
+ WHERE user_id = ? AND word_id = ?
+ `);
+ const result = query.run(userId, wordId);
+ return result.changes > 0;
+ }
+
+ isBookmarked(userId: number, wordId: number): boolean {
+ const query = this.db.query(`
+ SELECT 1
+ FROM bookmarks
+ WHERE user_id = ? AND word_id = ?
+ LIMIT 1
+ `);
+ return query.get(userId, wordId) !== undefined;
+ }
+
+ // Advanced search operations
+ searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN senses s ON e.id = s.parent_id
+ WHERE e.lang = ? AND s.pos = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(lang, pos, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence
+ FROM expressions e
+ JOIN senses s ON e.id = s.parent_id
+ WHERE s.pos = ?
+ ORDER BY e.frequency DESC, e.spelling
+ LIMIT ?
+ `);
+ const results = query.all(pos, limit) as Expression[];
+ return results.map(result => ({
+ ...result,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND s.etymology LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(lang, `%${term}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence,
+ e.lang
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE s.etymology LIKE ?
+ ORDER BY s.confidence DESC
+ LIMIT ?
+ `);
+ const results = query.all(`%${term}%`, limit) as Sense[];
+ return results.map(result => ({
+ ...result,
+ senses: result.senses ? JSON.parse(result.senses) : undefined,
+ forms: result.forms ? JSON.parse(result.forms) : undefined,
+ related: result.related ? JSON.parse(result.related) : undefined,
+ ipa: result.ipa ? JSON.parse(result.ipa) : undefined,
+ prosody: result.prosody ? JSON.parse(result.prosody) : undefined,
+ }));
+ }
+ }
+
+ // Statistics operations
+ getSemanticStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(DISTINCT e.id) as total_expressions,
+ COUNT(DISTINCT s.id) as total_senses,
+ COUNT(DISTINCT s.pos) as unique_pos,
+ COUNT(DISTINCT c.name) as unique_categories,
+ AVG(e.confidence) as avg_confidence,
+ COUNT(DISTINCT e.type) as unique_types
+ FROM expressions e
+ LEFT JOIN senses s ON e.id = s.parent_id
+ LEFT JOIN word_categories wc ON e.id = wc.word_id
+ LEFT JOIN categories c ON wc.category = c.name
+ WHERE e.lang = ?
+ `);
+ return query.get(lang);
+ }
+
+ getPOSTDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ s.pos,
+ COUNT(DISTINCT s.parent_id) as expression_count,
+ COUNT(s.id) as sense_count,
+ ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage
+ FROM senses s
+ JOIN expressions e ON s.parent_id = e.id
+ WHERE e.lang = ? AND s.pos IS NOT NULL
+ GROUP BY s.pos
+ ORDER BY sense_count DESC
+ `);
+ return query.all(lang);
+ }
+
+ getCategoryDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ c.name as category,
+ COUNT(DISTINCT wc.word_id) as word_count,
+ ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage
+ FROM categories c
+ JOIN word_categories wc ON c.name = wc.category
+ JOIN expressions e ON wc.word_id = e.id
+ WHERE e.lang = ?
+ GROUP BY c.name
+ ORDER BY word_count DESC
+ `);
+ return query.all(lang);
+ }
+} \ No newline at end of file