import { Database } from 'bun:sqlite'; export interface Expression { id: number; spelling: string; lang: string; frequency?: number; type: string; syllables?: number; notes?: string; ipa?: any; // JSONB prosody?: any; // JSONB confidence: number; } export interface Sense { id: number; parent_id: number; spelling: string; pos?: string; etymology?: string; senses?: any; // JSONB forms?: any; // JSONB related?: any; // JSONB ipa?: any; // JSONB prosody?: any; // JSONB confidence: number; } export interface Subsense { id: number; sid: number; gloss: string; examples?: any; // JSONB } export interface Derivation { id: number; sid: number; type: string; text: string; tags?: any; // JSONB } export interface Category { name: string; } export interface WordCategory { word_id: number; category: string; } export interface Bookmark { word_id: number; user_id: number; notes?: string; created: number; } export interface Language { code: string; name: string; native_name?: string; iso6392?: string; english?: string; } export class SemanticQueries { constructor(private db: Database) {} // Language operations getLanguages(): Language[] { const query = this.db.query(` SELECT code, name, native_name, iso6392, english FROM languages ORDER BY name `); return query.all() as Language[]; } getLanguageByCode(code: string): Language | null { const query = this.db.query(` SELECT code, name, native_name, iso6392, english FROM languages WHERE code = ? `); return query.get(code) as Language | null; } // Expression operations getExpressionById(id: number): Expression | null { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE id = ? `); const result = query.get(id) as Expression | null; if (result) { // Parse JSON fields result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; } return result; } getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE lang = ? ORDER BY frequency DESC, spelling LIMIT ? OFFSET ? `); const results = query.all(lang, limit, offset) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] { if (lang) { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE lang = ? AND spelling LIKE ? ORDER BY CASE WHEN spelling = ? THEN 1 WHEN spelling LIKE ? THEN 2 ELSE 3 END, frequency DESC LIMIT ? `); const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE spelling LIKE ? ORDER BY CASE WHEN spelling = ? THEN 1 WHEN spelling LIKE ? THEN 2 ELSE 3 END, frequency DESC LIMIT ? `); const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] { if (lang) { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE type = ? AND lang = ? ORDER BY frequency DESC, spelling LIMIT ? `); const results = query.all(type, lang, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE type = ? ORDER BY frequency DESC, spelling LIMIT ? `); const results = query.all(type, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] { const query = this.db.query(` SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence FROM expressions WHERE lang = ? AND frequency BETWEEN ? AND ? ORDER BY frequency DESC, spelling LIMIT ? `); const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } // Sense operations getSensesForExpression(expressionId: number): Sense[] { const query = this.db.query(` SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence FROM senses WHERE parent_id = ? ORDER BY confidence DESC `); const results = query.all(expressionId) as Sense[]; return results.map(result => ({ ...result, senses: result.senses ? JSON.parse(result.senses) : undefined, forms: result.forms ? JSON.parse(result.forms) : undefined, related: result.related ? JSON.parse(result.related) : undefined, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } getSenseById(id: number): Sense | null { const query = this.db.query(` SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence FROM senses WHERE id = ? `); const result = query.get(id) as Sense | null; if (result) { result.senses = result.senses ? JSON.parse(result.senses) : undefined; result.forms = result.forms ? JSON.parse(result.forms) : undefined; result.related = result.related ? JSON.parse(result.related) : undefined; result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; } return result; } searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] { if (lang) { const query = this.db.query(` SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, e.lang FROM senses s JOIN expressions e ON s.parent_id = e.id WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?) ORDER BY s.confidence DESC LIMIT ? `); const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; return results.map(result => ({ ...result, senses: result.senses ? JSON.parse(result.senses) : undefined, forms: result.forms ? JSON.parse(result.forms) : undefined, related: result.related ? JSON.parse(result.related) : undefined, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, e.lang FROM senses s JOIN expressions e ON s.parent_id = e.id WHERE s.spelling LIKE ? OR s.pos LIKE ? ORDER BY s.confidence DESC LIMIT ? `); const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; return results.map(result => ({ ...result, senses: result.senses ? JSON.parse(result.senses) : undefined, forms: result.forms ? JSON.parse(result.forms) : undefined, related: result.related ? JSON.parse(result.related) : undefined, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } // Subsense operations getSubsensesForSense(senseId: number): Subsense[] { const query = this.db.query(` SELECT id, sid, gloss, examples FROM subsenses WHERE sid = ? ORDER BY id `); const results = query.all(senseId) as Subsense[]; return results.map(result => ({ ...result, examples: result.examples ? JSON.parse(result.examples) : undefined, })); } // Derivation operations getDerivationsForSense(senseId: number): Derivation[] { const query = this.db.query(` SELECT id, sid, type, text, tags FROM derivation WHERE sid = ? ORDER BY type, text `); const results = query.all(senseId) as Derivation[]; return results.map(result => ({ ...result, tags: result.tags ? JSON.parse(result.tags) : undefined, })); } // Category operations getCategories(): Category[] { const query = this.db.query(` SELECT name FROM categories ORDER BY name `); return query.all() as Category[]; } getCategoriesForWord(wordId: number): Category[] { const query = this.db.query(` SELECT c.name FROM categories c JOIN word_categories wc ON c.name = wc.category WHERE wc.word_id = ? ORDER BY c.name `); return query.all(wordId) as Category[]; } getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] { if (lang) { const query = this.db.query(` SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence FROM expressions e JOIN word_categories wc ON e.id = wc.word_id WHERE wc.category = ? AND e.lang = ? ORDER BY e.frequency DESC, e.spelling LIMIT ? `); const results = query.all(category, lang, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence FROM expressions e JOIN word_categories wc ON e.id = wc.word_id WHERE wc.category = ? ORDER BY e.frequency DESC, e.spelling LIMIT ? `); const results = query.all(category, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } // Bookmark operations getBookmarksForUser(userId: number): Bookmark[] { const query = this.db.query(` SELECT word_id, user_id, notes, created FROM bookmarks WHERE user_id = ? ORDER BY created DESC `); return query.all(userId) as Bookmark[]; } createBookmark(userId: number, wordId: number, notes?: string): Bookmark { const query = this.db.query(` INSERT INTO bookmarks (word_id, user_id, notes, created) VALUES (?, ?, ?, ?) RETURNING word_id, user_id, notes, created `); return query.get(wordId, userId, notes, Date.now()) as Bookmark; } updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null { const query = this.db.query(` UPDATE bookmarks SET notes = ? WHERE user_id = ? AND word_id = ? RETURNING word_id, user_id, notes, created `); return query.get(notes, userId, wordId) as Bookmark | null; } deleteBookmark(userId: number, wordId: number): boolean { const query = this.db.query(` DELETE FROM bookmarks WHERE user_id = ? AND word_id = ? `); const result = query.run(userId, wordId); return result.changes > 0; } isBookmarked(userId: number, wordId: number): boolean { const query = this.db.query(` SELECT 1 FROM bookmarks WHERE user_id = ? AND word_id = ? LIMIT 1 `); return query.get(userId, wordId) !== undefined; } // Advanced search operations searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] { if (lang) { const query = this.db.query(` SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence FROM expressions e JOIN senses s ON e.id = s.parent_id WHERE e.lang = ? AND s.pos = ? ORDER BY e.frequency DESC, e.spelling LIMIT ? `); const results = query.all(lang, pos, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence FROM expressions e JOIN senses s ON e.id = s.parent_id WHERE s.pos = ? ORDER BY e.frequency DESC, e.spelling LIMIT ? `); const results = query.all(pos, limit) as Expression[]; return results.map(result => ({ ...result, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] { if (lang) { const query = this.db.query(` SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, e.lang FROM senses s JOIN expressions e ON s.parent_id = e.id WHERE e.lang = ? AND s.etymology LIKE ? ORDER BY s.confidence DESC LIMIT ? `); const results = query.all(lang, `%${term}%`, limit) as Sense[]; return results.map(result => ({ ...result, senses: result.senses ? JSON.parse(result.senses) : undefined, forms: result.forms ? JSON.parse(result.forms) : undefined, related: result.related ? JSON.parse(result.related) : undefined, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } else { const query = this.db.query(` SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, e.lang FROM senses s JOIN expressions e ON s.parent_id = e.id WHERE s.etymology LIKE ? ORDER BY s.confidence DESC LIMIT ? `); const results = query.all(`%${term}%`, limit) as Sense[]; return results.map(result => ({ ...result, senses: result.senses ? JSON.parse(result.senses) : undefined, forms: result.forms ? JSON.parse(result.forms) : undefined, related: result.related ? JSON.parse(result.related) : undefined, ipa: result.ipa ? JSON.parse(result.ipa) : undefined, prosody: result.prosody ? JSON.parse(result.prosody) : undefined, })); } } // Statistics operations getSemanticStats(lang: string) { const query = this.db.query(` SELECT COUNT(DISTINCT e.id) as total_expressions, COUNT(DISTINCT s.id) as total_senses, COUNT(DISTINCT s.pos) as unique_pos, COUNT(DISTINCT c.name) as unique_categories, AVG(e.confidence) as avg_confidence, COUNT(DISTINCT e.type) as unique_types FROM expressions e LEFT JOIN senses s ON e.id = s.parent_id LEFT JOIN word_categories wc ON e.id = wc.word_id LEFT JOIN categories c ON wc.category = c.name WHERE e.lang = ? `); return query.get(lang); } getPOSTDistribution(lang: string) { const query = this.db.query(` SELECT s.pos, COUNT(DISTINCT s.parent_id) as expression_count, COUNT(s.id) as sense_count, ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage FROM senses s JOIN expressions e ON s.parent_id = e.id WHERE e.lang = ? AND s.pos IS NOT NULL GROUP BY s.pos ORDER BY sense_count DESC `); return query.all(lang); } getCategoryDistribution(lang: string) { const query = this.db.query(` SELECT c.name as category, COUNT(DISTINCT wc.word_id) as word_count, ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage FROM categories c JOIN word_categories wc ON c.name = wc.category JOIN expressions e ON wc.word_id = e.id WHERE e.lang = ? GROUP BY c.name ORDER BY word_count DESC `); return query.all(lang); } }