diff options
Diffstat (limited to 'packages/db/src/phonetics.ts')
| -rw-r--r-- | packages/db/src/phonetics.ts | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/packages/db/src/phonetics.ts b/packages/db/src/phonetics.ts new file mode 100644 index 0000000..cf62434 --- /dev/null +++ b/packages/db/src/phonetics.ts @@ -0,0 +1,523 @@ +import { Database } from 'bun:sqlite'; + +export interface Syllable { + id: number; + lang: string; + ipa: string; + long: number; + text: string; + onset: number; + medial: number; + nucleus: number; + coda: number; + rhyme: number; + tone: number; + notes?: string; +} + +export interface Tone { + id: number; + ipa: string; + lang: string; + name: string; + nums: number; +} + +export interface Onset { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Medial { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Nucleus { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Coda { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface Rhyme { + id: number; + ipa: string; + text: string; + lang: string; +} + +export interface WordPhonetics { + id?: number; + word_id: number; + ipa: string; + syllable_count: number; + syllable_sequence: string; + tone_sequence: string; + ipa_sequence: string; + tag?: string; + notes?: string; +} + +export interface WordRhyme { + id: number; + text: string; + lang: string; + notes?: string; +} + +export interface Idiom { + id: number; + spelling: string; + lang: string; + frequency?: number; +} + +export interface SyllableWordMapping { + syl_id: number; + word_id: number; + idx: number; + stressed?: number; +} + +export class PhoneticsQueries { + constructor(private db: Database) {} + + // Tone operations + getTones(lang?: string): Tone[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE lang = ? + ORDER BY nums + `); + return query.all(lang) as Tone[]; + } else { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + ORDER BY lang, nums + `); + return query.all() as Tone[]; + } + } + + getToneById(id: number): Tone | null { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE id = ? + `); + return query.get(id) as Tone | null; + } + + getToneByName(name: string, lang: string): Tone | null { + const query = this.db.query(` + SELECT id, ipa, lang, name, nums + FROM tones + WHERE name = ? AND lang = ? + `); + return query.get(name, lang) as Tone | null; + } + + // Syllable component operations + getOnsets(lang?: string): Onset[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM onsets + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Onset[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM onsets + ORDER BY lang, text + `); + return query.all() as Onset[]; + } + } + + getMedials(lang?: string): Medial[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM medials + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Medial[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM medials + ORDER BY lang, text + `); + return query.all() as Medial[]; + } + } + + getNucleus(lang?: string): Nucleus[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM nucleus + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Nucleus[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM nucleus + ORDER BY lang, text + `); + return query.all() as Nucleus[]; + } + } + + getCodas(lang?: string): Coda[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM codas + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Coda[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM codas + ORDER BY lang, text + `); + return query.all() as Coda[]; + } + } + + getRhymes(lang?: string): Rhyme[] { + if (lang) { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM rhymes + WHERE lang = ? + ORDER BY text + `); + return query.all(lang) as Rhyme[]; + } else { + const query = this.db.query(` + SELECT id, ipa, text, lang + FROM rhymes + ORDER BY lang, text + `); + return query.all() as Rhyme[]; + } + } + + // Complete syllable operations + getSyllables(lang?: string, tone?: number): Syllable[] { + if (lang && tone !== undefined) { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.lang = ? AND s.tone = ? + ORDER BY s.text + `); + return query.all(lang, tone) as Syllable[]; + } else if (lang) { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.lang = ? + ORDER BY s.text, s.tone + `); + return query.all(lang) as Syllable[]; + } else { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + ORDER BY s.lang, s.text, s.tone + `); + return query.all() as Syllable[]; + } + } + + getSyllableById(id: number): Syllable | null { + const query = this.db.query(` + SELECT s.id, s.lang, s.ipa, s.long, s.text, s.onset, s.medial, s.nucleus, s.coda, s.rhyme, s.tone, s.notes, + o.text as onset_text, m.text as medial_text, n.text as nucleus_text, + c.text as coda_text, r.text as rhyme_text, t.name as tone_name + FROM syllables s + LEFT JOIN onsets o ON s.onset = o.id + LEFT JOIN medials m ON s.medial = m.id + LEFT JOIN nucleus n ON s.nucleus = n.id + LEFT JOIN codas c ON s.coda = c.id + LEFT JOIN rhymes r ON s.rhyme = r.id + LEFT JOIN tones t ON s.tone = t.id + WHERE s.id = ? + `); + return query.get(id) as Syllable | null; + } + + // Word phonetics operations + getWordPhonetics(wordId: number): WordPhonetics | null { + const query = this.db.query(` + SELECT id, word_id, ipa, syllable_count, syllable_sequence, tone_sequence, ipa_sequence, tag, notes + FROM word_phonetics + WHERE word_id = ? + `); + return query.get(wordId) as WordPhonetics | null; + } + + getWordsByTonePattern(toneSequence: string, syllableCount?: number, limit: number = 50): number[] { + if (syllableCount !== undefined) { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE tone_sequence = ? AND syllable_count = ? + ORDER BY id + LIMIT ? + `); + const results = query.all(toneSequence, syllableCount, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } else { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE tone_sequence = ? + ORDER BY syllable_count, id + LIMIT ? + `); + const results = query.all(toneSequence, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + } + + getWordsBySyllablePattern(syllableSequence: string, limit: number = 50): number[] { + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + WHERE syllable_sequence = ? + ORDER BY id + LIMIT ? + `); + const results = query.all(syllableSequence, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + searchWordsByPhoneticPattern(pattern: { + toneSequence?: string; + syllableCount?: number; + minSyllableCount?: number; + maxSyllableCount?: number; + limit?: number; + }): number[] { + let conditions: string[] = []; + let params: any[] = []; + + if (pattern.toneSequence) { + conditions.push('tone_sequence LIKE ?'); + params.push(`%${pattern.toneSequence}%`); + } + + if (pattern.syllableCount !== undefined) { + conditions.push('syllable_count = ?'); + params.push(pattern.syllableCount); + } + + if (pattern.minSyllableCount !== undefined) { + conditions.push('syllable_count >= ?'); + params.push(pattern.minSyllableCount); + } + + if (pattern.maxSyllableCount !== undefined) { + conditions.push('syllable_count <= ?'); + params.push(pattern.maxSyllableCount); + } + + const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : ''; + const limit = pattern.limit || 50; + params.push(limit); + + const query = this.db.query(` + SELECT word_id + FROM word_phonetics + ${whereClause} + ORDER BY syllable_count, id + LIMIT ? + `); + + const results = query.all(...params) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Word-syllable mapping operations + getSyllablesForWord(wordId: number): SyllableWordMapping[] { + const query = this.db.query(` + SELECT sw.syl_id, sw.word_id, sw.idx, sw.stressed, + s.text as syllable_text, s.ipa as syllable_ipa, s.tone, s.long, + t.name as tone_name + FROM syllables_words sw + JOIN syllables s ON sw.syl_id = s.id + LEFT JOIN tones t ON s.tone = t.id + WHERE sw.word_id = ? + ORDER BY sw.idx + `); + return query.all(wordId) as SyllableWordMapping[]; + } + + getWordsForSyllable(syllableId: number): number[] { + const query = this.db.query(` + SELECT DISTINCT word_id + FROM syllables_words + WHERE syl_id = ? + ORDER BY word_id + `); + const results = query.all(syllableId) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Rhyme operations + getWordRhymes(wordId: number): WordRhyme[] { + const query = this.db.query(` + SELECT wr.id, wr.text, wr.lang, wr.notes + FROM word_rhymes wr + JOIN words_wrhymes wwr ON wr.id = wwr.wrhyme_id + WHERE wwr.word_id = ? + `); + return query.all(wordId) as WordRhyme[]; + } + + getWordsByRhyme(rhymeText: string, lang: string, limit: number = 50): number[] { + const query = this.db.query(` + SELECT DISTINCT ww.word_id + FROM word_rhymes wr + JOIN words_wrhymes ww ON wr.id = ww.wrhyme_id + WHERE wr.text = ? AND wr.lang = ? + LIMIT ? + `); + const results = query.all(rhymeText, lang, limit) as { word_id: number }[]; + return results.map(r => r.word_id); + } + + // Idiom operations + getIdioms(lang?: string): Idiom[] { + if (lang) { + const query = this.db.query(` + SELECT id, spelling, lang, frequency + FROM idioms + WHERE lang = ? + ORDER BY frequency DESC, spelling + `); + return query.all(lang) as Idiom[]; + } else { + const query = this.db.query(` + SELECT id, spelling, lang, frequency + FROM idioms + ORDER BY lang, frequency DESC, spelling + `); + return query.all() as Idiom[]; + } + } + + getIdiomsForWord(wordId: number): Idiom[] { + const query = this.db.query(` + SELECT i.id, i.spelling, i.lang, i.frequency + FROM idioms i + JOIN words_idioms wi ON i.id = wi.idiom_id + WHERE wi.word_id = ? + ORDER BY i.frequency DESC, i.spelling + `); + return query.all(wordId) as Idiom[]; + } + + // Phonetic statistics and analysis + getPhoneticStats(lang: string) { + const query = this.db.query(` + SELECT + COUNT(DISTINCT s.id) as total_syllables, + COUNT(DISTINCT s.tone) as unique_tones, + AVG(s.long) as avg_vowel_length, + COUNT(DISTINCT s.onset) as unique_onsets, + COUNT(DISTINCT s.medial) as unique_medials, + COUNT(DISTINCT s.nucleus) as unique_nucleus, + COUNT(DISTINCT s.coda) as unique_codas, + COUNT(DISTINCT s.rhyme) as unique_rhymes, + COUNT(DISTINCT wp.id) as total_word_phonetics, + AVG(wp.syllable_count) as avg_syllables_per_word + FROM syllables s + LEFT JOIN word_phonetics wp ON 1=1 + WHERE s.lang = ? + `); + return query.get(lang); + } + + getToneDistribution(lang: string) { + const query = this.db.query(` + SELECT + t.name as tone_name, + t.nums as tone_number, + COUNT(s.id) as syllable_count, + ROUND(COUNT(s.id) * 100.0 / (SELECT COUNT(*) FROM syllables WHERE lang = ?), 2) as percentage + FROM syllables s + JOIN tones t ON s.tone = t.id + WHERE s.lang = ? + GROUP BY t.id, t.name, t.nums + ORDER BY t.nums + `); + return query.all(lang, lang); + } + + getSyllableComplexityStats(lang: string) { + const query = this.db.query(` + SELECT + syllable_count, + COUNT(*) as word_count, + ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM word_phonetics), 2) as percentage + FROM word_phonetics wp + JOIN expressions e ON wp.word_id = e.id + WHERE e.lang = ? + GROUP BY syllable_count + ORDER BY syllable_count + `); + return query.all(lang); + } +}
\ No newline at end of file |
