diff options
Diffstat (limited to 'packages/db/src/semantic.ts')
| -rw-r--r-- | packages/db/src/semantic.ts | 554 |
1 files changed, 554 insertions, 0 deletions
diff --git a/packages/db/src/semantic.ts b/packages/db/src/semantic.ts new file mode 100644 index 0000000..a3fe44b --- /dev/null +++ b/packages/db/src/semantic.ts @@ -0,0 +1,554 @@ +import { Database } from 'bun:sqlite'; + +export interface Expression { + id: number; + spelling: string; + lang: string; + frequency?: number; + type: string; + syllables?: number; + notes?: string; + ipa?: any; // JSONB + prosody?: any; // JSONB + confidence: number; +} + +export interface Sense { + id: number; + parent_id: number; + spelling: string; + pos?: string; + etymology?: string; + senses?: any; // JSONB + forms?: any; // JSONB + related?: any; // JSONB + ipa?: any; // JSONB + prosody?: any; // JSONB + confidence: number; +} + +export interface Subsense { + id: number; + sid: number; + gloss: string; + examples?: any; // JSONB +} + +export interface Derivation { + id: number; + sid: number; + type: string; + text: string; + tags?: any; // JSONB +} + +export interface Category { + name: string; +} + +export interface WordCategory { + word_id: number; + category: string; +} + +export interface Bookmark { + word_id: number; + user_id: number; + notes?: string; + created: number; +} + +export interface Language { + code: string; + name: string; + native_name?: string; + iso6392?: string; + english?: string; +} + +export class SemanticQueries { + constructor(private db: Database) {} + + // Language operations + getLanguages(): Language[] { + const query = this.db.query(` + SELECT code, name, native_name, iso6392, english + FROM languages + ORDER BY name + `); + return query.all() as Language[]; + } + + getLanguageByCode(code: string): Language | null { + const query = this.db.query(` + SELECT code, name, native_name, iso6392, english + FROM languages + WHERE code = ? + `); + return query.get(code) as Language | null; + } + + // Expression operations + getExpressionById(id: number): Expression | null { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE id = ? + `); + const result = query.get(id) as Expression | null; + if (result) { + // Parse JSON fields + result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; + result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; + } + return result; + } + + getExpressionsByLanguage(lang: string, limit: number = 100, offset: number = 0): Expression[] { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? + ORDER BY frequency DESC, spelling + LIMIT ? OFFSET ? + `); + const results = query.all(lang, limit, offset) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + searchExpressions(searchTerm: string, lang?: string, limit: number = 50): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? AND spelling LIKE ? + ORDER BY + CASE + WHEN spelling = ? THEN 1 + WHEN spelling LIKE ? THEN 2 + ELSE 3 + END, + frequency DESC + LIMIT ? + `); + const results = query.all(lang, `%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE spelling LIKE ? + ORDER BY + CASE + WHEN spelling = ? THEN 1 + WHEN spelling LIKE ? THEN 2 + ELSE 3 + END, + frequency DESC + LIMIT ? + `); + const results = query.all(`%${searchTerm}%`, searchTerm, `${searchTerm}%`, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + getExpressionsByType(type: string, lang?: string, limit: number = 100): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE type = ? AND lang = ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(type, lang, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE type = ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(type, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + getExpressionsByFrequency(lang: string, minFrequency: number = 0, maxFrequency: number = 1000, limit: number = 100): Expression[] { + const query = this.db.query(` + SELECT id, spelling, lang, frequency, type, syllables, notes, ipa, prosody, confidence + FROM expressions + WHERE lang = ? AND frequency BETWEEN ? AND ? + ORDER BY frequency DESC, spelling + LIMIT ? + `); + const results = query.all(lang, minFrequency, maxFrequency, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + // Sense operations + getSensesForExpression(expressionId: number): Sense[] { + const query = this.db.query(` + SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence + FROM senses + WHERE parent_id = ? + ORDER BY confidence DESC + `); + const results = query.all(expressionId) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + + getSenseById(id: number): Sense | null { + const query = this.db.query(` + SELECT id, parent_id, spelling, pos, etymology, senses, forms, related, ipa, prosody, confidence + FROM senses + WHERE id = ? + `); + const result = query.get(id) as Sense | null; + if (result) { + result.senses = result.senses ? JSON.parse(result.senses) : undefined; + result.forms = result.forms ? JSON.parse(result.forms) : undefined; + result.related = result.related ? JSON.parse(result.related) : undefined; + result.ipa = result.ipa ? JSON.parse(result.ipa) : undefined; + result.prosody = result.prosody ? JSON.parse(result.prosody) : undefined; + } + return result; + } + + searchSenses(searchTerm: string, lang?: string, limit: number = 50): Sense[] { + if (lang) { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND (s.spelling LIKE ? OR s.pos LIKE ?) + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(lang, `%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE s.spelling LIKE ? OR s.pos LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(`%${searchTerm}%`, `%${searchTerm}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Subsense operations + getSubsensesForSense(senseId: number): Subsense[] { + const query = this.db.query(` + SELECT id, sid, gloss, examples + FROM subsenses + WHERE sid = ? + ORDER BY id + `); + const results = query.all(senseId) as Subsense[]; + return results.map(result => ({ + ...result, + examples: result.examples ? JSON.parse(result.examples) : undefined, + })); + } + + // Derivation operations + getDerivationsForSense(senseId: number): Derivation[] { + const query = this.db.query(` + SELECT id, sid, type, text, tags + FROM derivation + WHERE sid = ? + ORDER BY type, text + `); + const results = query.all(senseId) as Derivation[]; + return results.map(result => ({ + ...result, + tags: result.tags ? JSON.parse(result.tags) : undefined, + })); + } + + // Category operations + getCategories(): Category[] { + const query = this.db.query(` + SELECT name + FROM categories + ORDER BY name + `); + return query.all() as Category[]; + } + + getCategoriesForWord(wordId: number): Category[] { + const query = this.db.query(` + SELECT c.name + FROM categories c + JOIN word_categories wc ON c.name = wc.category + WHERE wc.word_id = ? + ORDER BY c.name + `); + return query.all(wordId) as Category[]; + } + + getWordsByCategory(category: string, lang?: string, limit: number = 100): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN word_categories wc ON e.id = wc.word_id + WHERE wc.category = ? AND e.lang = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(category, lang, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN word_categories wc ON e.id = wc.word_id + WHERE wc.category = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(category, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Bookmark operations + getBookmarksForUser(userId: number): Bookmark[] { + const query = this.db.query(` + SELECT word_id, user_id, notes, created + FROM bookmarks + WHERE user_id = ? + ORDER BY created DESC + `); + return query.all(userId) as Bookmark[]; + } + + createBookmark(userId: number, wordId: number, notes?: string): Bookmark { + const query = this.db.query(` + INSERT INTO bookmarks (word_id, user_id, notes, created) + VALUES (?, ?, ?, ?) + RETURNING word_id, user_id, notes, created + `); + return query.get(wordId, userId, notes, Date.now()) as Bookmark; + } + + updateBookmark(userId: number, wordId: number, notes?: string): Bookmark | null { + const query = this.db.query(` + UPDATE bookmarks + SET notes = ? + WHERE user_id = ? AND word_id = ? + RETURNING word_id, user_id, notes, created + `); + return query.get(notes, userId, wordId) as Bookmark | null; + } + + deleteBookmark(userId: number, wordId: number): boolean { + const query = this.db.query(` + DELETE FROM bookmarks + WHERE user_id = ? AND word_id = ? + `); + const result = query.run(userId, wordId); + return result.changes > 0; + } + + isBookmarked(userId: number, wordId: number): boolean { + const query = this.db.query(` + SELECT 1 + FROM bookmarks + WHERE user_id = ? AND word_id = ? + LIMIT 1 + `); + return query.get(userId, wordId) !== undefined; + } + + // Advanced search operations + searchByPOS(pos: string, lang?: string, limit: number = 50): Expression[] { + if (lang) { + const query = this.db.query(` + SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN senses s ON e.id = s.parent_id + WHERE e.lang = ? AND s.pos = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(lang, pos, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT DISTINCT e.id, e.spelling, e.lang, e.frequency, e.type, e.syllables, e.notes, e.ipa, e.prosody, e.confidence + FROM expressions e + JOIN senses s ON e.id = s.parent_id + WHERE s.pos = ? + ORDER BY e.frequency DESC, e.spelling + LIMIT ? + `); + const results = query.all(pos, limit) as Expression[]; + return results.map(result => ({ + ...result, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + searchByEtymology(term: string, lang?: string, limit: number = 50): Sense[] { + if (lang) { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND s.etymology LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(lang, `%${term}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } else { + const query = this.db.query(` + SELECT s.id, s.parent_id, s.spelling, s.pos, s.etymology, s.senses, s.forms, s.related, s.ipa, s.prosody, s.confidence, + e.lang + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE s.etymology LIKE ? + ORDER BY s.confidence DESC + LIMIT ? + `); + const results = query.all(`%${term}%`, limit) as Sense[]; + return results.map(result => ({ + ...result, + senses: result.senses ? JSON.parse(result.senses) : undefined, + forms: result.forms ? JSON.parse(result.forms) : undefined, + related: result.related ? JSON.parse(result.related) : undefined, + ipa: result.ipa ? JSON.parse(result.ipa) : undefined, + prosody: result.prosody ? JSON.parse(result.prosody) : undefined, + })); + } + } + + // Statistics operations + getSemanticStats(lang: string) { + const query = this.db.query(` + SELECT + COUNT(DISTINCT e.id) as total_expressions, + COUNT(DISTINCT s.id) as total_senses, + COUNT(DISTINCT s.pos) as unique_pos, + COUNT(DISTINCT c.name) as unique_categories, + AVG(e.confidence) as avg_confidence, + COUNT(DISTINCT e.type) as unique_types + FROM expressions e + LEFT JOIN senses s ON e.id = s.parent_id + LEFT JOIN word_categories wc ON e.id = wc.word_id + LEFT JOIN categories c ON wc.category = c.name + WHERE e.lang = ? + `); + return query.get(lang); + } + + getPOSTDistribution(lang: string) { + const query = this.db.query(` + SELECT + s.pos, + COUNT(DISTINCT s.parent_id) as expression_count, + COUNT(s.id) as sense_count, + ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM senses WHERE pos IS NOT NULL), 2) as percentage + FROM senses s + JOIN expressions e ON s.parent_id = e.id + WHERE e.lang = ? AND s.pos IS NOT NULL + GROUP BY s.pos + ORDER BY sense_count DESC + `); + return query.all(lang); + } + + getCategoryDistribution(lang: string) { + const query = this.db.query(` + SELECT + c.name as category, + COUNT(DISTINCT wc.word_id) as word_count, + ROUND(COUNT(DISTINCT wc.word_id) * 100.0 / (SELECT COUNT(*) FROM word_categories), 2) as percentage + FROM categories c + JOIN word_categories wc ON c.name = wc.category + JOIN expressions e ON wc.word_id = e.id + WHERE e.lang = ? + GROUP BY c.name + ORDER BY word_count DESC + `); + return query.all(lang); + } +}
\ No newline at end of file |
