summaryrefslogtreecommitdiff
path: root/packages/db/src/phonetics.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/db/src/phonetics.ts')
-rw-r--r--packages/db/src/phonetics.ts523
1 files changed, 523 insertions, 0 deletions
diff --git a/packages/db/src/phonetics.ts b/packages/db/src/phonetics.ts
new file mode 100644
index 0000000..cf62434
--- /dev/null
+++ b/packages/db/src/phonetics.ts
@@ -0,0 +1,523 @@
+import { Database } from 'bun:sqlite';
+
+export interface Syllable {
+ id: number;
+ lang: string;
+ ipa: string;
+ long: number;
+ text: string;
+ onset: number;
+ medial: number;
+ nucleus: number;
+ coda: number;
+ rhyme: number;
+ tone: number;
+ notes?: string;
+}
+
+export interface Tone {
+ id: number;
+ ipa: string;
+ lang: string;
+ name: string;
+ nums: number;
+}
+
+export interface Onset {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Medial {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Nucleus {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Coda {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface Rhyme {
+ id: number;
+ ipa: string;
+ text: string;
+ lang: string;
+}
+
+export interface WordPhonetics {
+ id?: number;
+ word_id: number;
+ ipa: string;
+ syllable_count: number;
+ syllable_sequence: string;
+ tone_sequence: string;
+ ipa_sequence: string;
+ tag?: string;
+ notes?: string;
+}
+
+export interface WordRhyme {
+ id: number;
+ text: string;
+ lang: string;
+ notes?: string;
+}
+
+export interface Idiom {
+ id: number;
+ spelling: string;
+ lang: string;
+ frequency?: number;
+}
+
+export interface SyllableWordMapping {
+ syl_id: number;
+ word_id: number;
+ idx: number;
+ stressed?: number;
+}
+
+export class PhoneticsQueries {
+ constructor(private db: Database) {}
+
+ // Tone operations
+ getTones(lang?: string): Tone[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE lang = ?
+ ORDER BY nums
+ `);
+ return query.all(lang) as Tone[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ ORDER BY lang, nums
+ `);
+ return query.all() as Tone[];
+ }
+ }
+
+ getToneById(id: number): Tone | null {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE id = ?
+ `);
+ return query.get(id) as Tone | null;
+ }
+
+ getToneByName(name: string, lang: string): Tone | null {
+ const query = this.db.query(`
+ SELECT id, ipa, lang, name, nums
+ FROM tones
+ WHERE name = ? AND lang = ?
+ `);
+ return query.get(name, lang) as Tone | null;
+ }
+
+ // Syllable component operations
+ getOnsets(lang?: string): Onset[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM onsets
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Onset[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM onsets
+ ORDER BY lang, text
+ `);
+ return query.all() as Onset[];
+ }
+ }
+
+ getMedials(lang?: string): Medial[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM medials
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Medial[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM medials
+ ORDER BY lang, text
+ `);
+ return query.all() as Medial[];
+ }
+ }
+
+ getNucleus(lang?: string): Nucleus[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM nucleus
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Nucleus[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM nucleus
+ ORDER BY lang, text
+ `);
+ return query.all() as Nucleus[];
+ }
+ }
+
+ getCodas(lang?: string): Coda[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM codas
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Coda[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM codas
+ ORDER BY lang, text
+ `);
+ return query.all() as Coda[];
+ }
+ }
+
+ getRhymes(lang?: string): Rhyme[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM rhymes
+ WHERE lang = ?
+ ORDER BY text
+ `);
+ return query.all(lang) as Rhyme[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, ipa, text, lang
+ FROM rhymes
+ ORDER BY lang, text
+ `);
+ return query.all() as Rhyme[];
+ }
+ }
+
+ // Complete syllable operations
+ getSyllables(lang?: string, tone?: number): Syllable[] {
+ if (lang && tone !== undefined) {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ? AND s.tone = ?
+ ORDER BY s.text
+ `);
+ return query.all(lang, tone) as Syllable[];
+ } else if (lang) {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ?
+ ORDER BY s.text, s.tone
+ `);
+ return query.all(lang) as Syllable[];
+ } else {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ ORDER BY s.lang, s.text, s.tone
+ `);
+ return query.all() as Syllable[];
+ }
+ }
+
+ getSyllableById(id: number): Syllable | null {
+ const query = this.db.query(`
+ SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes,
+ o.text as onset_text, m.text as medial_text, n.text as nucleus_text,
+ c.text as coda_text, r.text as rhyme_text, t.name as tone_name
+ FROM syllables s
+ LEFT JOIN onsets o ON s.onset = o.id
+ LEFT JOIN medials m ON s.medial = m.id
+ LEFT JOIN nucleus n ON s.nucleus = n.id
+ LEFT JOIN codas c ON s.coda = c.id
+ LEFT JOIN rhymes r ON s.rhyme = r.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE s.id = ?
+ `);
+ return query.get(id) as Syllable | null;
+ }
+
+ // Word phonetics operations
+ getWordPhonetics(wordId: number): WordPhonetics | null {
+ const query = this.db.query(`
+ SELECT id, word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes
+ FROM word_phonetics
+ WHERE word_id = ?
+ `);
+ return query.get(wordId) as WordPhonetics | null;
+ }
+
+ getWordsByTonePattern(toneSequence: string, syllableCount?: number, limit: number = 50): number[] {
+ if (syllableCount !== undefined) {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE tone_sequence = ? AND syllable_count = ?
+ ORDER BY id
+ LIMIT ?
+ `);
+ const results = query.all(toneSequence, syllableCount, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ } else {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE tone_sequence = ?
+ ORDER BY syllable_count, id
+ LIMIT ?
+ `);
+ const results = query.all(toneSequence, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+ }
+
+ getWordsBySyllablePattern(syllableSequence: string, limit: number = 50): number[] {
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ WHERE syllable_sequence = ?
+ ORDER BY id
+ LIMIT ?
+ `);
+ const results = query.all(syllableSequence, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ searchWordsByPhoneticPattern(pattern: {
+ toneSequence?: string;
+ syllableCount?: number;
+ minSyllableCount?: number;
+ maxSyllableCount?: number;
+ limit?: number;
+ }): number[] {
+ let conditions: string[] = [];
+ let params: any[] = [];
+
+ if (pattern.toneSequence) {
+ conditions.push('tone_sequence LIKE ?');
+ params.push(`%${pattern.toneSequence}%`);
+ }
+
+ if (pattern.syllableCount !== undefined) {
+ conditions.push('syllable_count = ?');
+ params.push(pattern.syllableCount);
+ }
+
+ if (pattern.minSyllableCount !== undefined) {
+ conditions.push('syllable_count >= ?');
+ params.push(pattern.minSyllableCount);
+ }
+
+ if (pattern.maxSyllableCount !== undefined) {
+ conditions.push('syllable_count <= ?');
+ params.push(pattern.maxSyllableCount);
+ }
+
+ const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
+ const limit = pattern.limit || 50;
+ params.push(limit);
+
+ const query = this.db.query(`
+ SELECT word_id
+ FROM word_phonetics
+ ${whereClause}
+ ORDER BY syllable_count, id
+ LIMIT ?
+ `);
+
+ const results = query.all(...params) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Word-syllable mapping operations
+ getSyllablesForWord(wordId: number): SyllableWordMapping[] {
+ const query = this.db.query(`
+ SELECT sw.syl_id, sw.word_id, sw.idx, sw.stressed,
+ s.text as syllable_text, s.ipa as syllable_ipa, s.tone, s.long,
+ t.name as tone_name
+ FROM syllables_words sw
+ JOIN syllables s ON sw.syl_id = s.id
+ LEFT JOIN tones t ON s.tone = t.id
+ WHERE sw.word_id = ?
+ ORDER BY sw.idx
+ `);
+ return query.all(wordId) as SyllableWordMapping[];
+ }
+
+ getWordsForSyllable(syllableId: number): number[] {
+ const query = this.db.query(`
+ SELECT DISTINCT word_id
+ FROM syllables_words
+ WHERE syl_id = ?
+ ORDER BY word_id
+ `);
+ const results = query.all(syllableId) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Rhyme operations
+ getWordRhymes(wordId: number): WordRhyme[] {
+ const query = this.db.query(`
+ SELECT wr.id, wr.text, wr.lang, wr.notes
+ FROM word_rhymes wr
+ JOIN words_wrhymes wwr ON wr.id = wwr.wrhyme_id
+ WHERE wwr.word_id = ?
+ `);
+ return query.all(wordId) as WordRhyme[];
+ }
+
+ getWordsByRhyme(rhymeText: string, lang: string, limit: number = 50): number[] {
+ const query = this.db.query(`
+ SELECT DISTINCT ww.word_id
+ FROM word_rhymes wr
+ JOIN words_wrhymes ww ON wr.id = ww.wrhyme_id
+ WHERE wr.text = ? AND wr.lang = ?
+ LIMIT ?
+ `);
+ const results = query.all(rhymeText, lang, limit) as { word_id: number }[];
+ return results.map(r => r.word_id);
+ }
+
+ // Idiom operations
+ getIdioms(lang?: string): Idiom[] {
+ if (lang) {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency
+ FROM idioms
+ WHERE lang = ?
+ ORDER BY frequency DESC, spelling
+ `);
+ return query.all(lang) as Idiom[];
+ } else {
+ const query = this.db.query(`
+ SELECT id, spelling, lang, frequency
+ FROM idioms
+ ORDER BY lang, frequency DESC, spelling
+ `);
+ return query.all() as Idiom[];
+ }
+ }
+
+ getIdiomsForWord(wordId: number): Idiom[] {
+ const query = this.db.query(`
+ SELECT i.id, i.spelling, i.lang, i.frequency
+ FROM idioms i
+ JOIN words_idioms wi ON i.id = wi.idiom_id
+ WHERE wi.word_id = ?
+ ORDER BY i.frequency DESC, i.spelling
+ `);
+ return query.all(wordId) as Idiom[];
+ }
+
+ // Phonetic statistics and analysis
+ getPhoneticStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ COUNT(DISTINCT s.id) as total_syllables,
+ COUNT(DISTINCT s.tone) as unique_tones,
+ AVG(s.long) as avg_vowel_length,
+ COUNT(DISTINCT s.onset) as unique_onsets,
+ COUNT(DISTINCT s.medial) as unique_medials,
+ COUNT(DISTINCT s.nucleus) as unique_nucleus,
+ COUNT(DISTINCT s.coda) as unique_codas,
+ COUNT(DISTINCT s.rhyme) as unique_rhymes,
+ COUNT(DISTINCT wp.id) as total_word_phonetics,
+ AVG(wp.syllable_count) as avg_syllables_per_word
+ FROM syllables s
+ LEFT JOIN word_phonetics wp ON 1=1
+ WHERE s.lang = ?
+ `);
+ return query.get(lang);
+ }
+
+ getToneDistribution(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ t.name as tone_name,
+ t.nums as tone_number,
+ COUNT(s.id) as syllable_count,
+ ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM syllables WHERE lang = ?), 2) as percentage
+ FROM syllables s
+ JOIN tones t ON s.tone = t.id
+ WHERE s.lang = ?
+ GROUP BY t.id, t.name, t.nums
+ ORDER BY t.nums
+ `);
+ return query.all(lang, lang);
+ }
+
+ getSyllableComplexityStats(lang: string) {
+ const query = this.db.query(`
+ SELECT
+ syllable_count,
+ COUNT(*) as word_count,
+ ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM word_phonetics), 2) as percentage
+ FROM word_phonetics wp
+ JOIN expressions e ON wp.word_id = e.id
+ WHERE e.lang = ?
+ GROUP BY syllable_count
+ ORDER BY syllable_count
+ `);
+ return query.all(lang);
+ }
+} \ No newline at end of file