summaryrefslogtreecommitdiff
path: root/src/lib/db/prosodyschema.sql
blob: 5554a02f4fc0f091ef3df8e1b7347c8593c29430 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
-- Enable foreign key support
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
PRAGMA cache_size = -2000;
PRAGMA mmap_size = 30000000000;


-- proper prosody now
--
-- 
--
CREATE TABLE IF NOT EXISTS languages(
    iso6392 TEXT PRIMARY KEY,
    -- bcp47 TEXT PRIMARY KEY,
    -- iso6393 TEXT NOT NULL,
    english TEXT NOT NULL
    -- native TEXT,
    -- iso6391 TEXT,
    -- iso6395 TEXT,
    -- glottolog TEXT
);

CREATE TABLE IF NOT EXISTS idioms(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    spelling TEXT NOT NULL,
    lang TEXT NOT NULL,
    frequency INTEGER,
    FOREIGN KEY (lang) REFERENCES languages(iso6392),
    CONSTRAINT spell_unique UNIQUE (spelling, lang)
);

CREATE INDEX IF NOT EXISTS idx_idioms_spelling ON idioms(spelling);
CREATE TABLE IF NOT EXISTS words(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    spelling TEXT NOT NULL,
    lang TEXT NOT NULL,
    frequency INTEGER,
    notes TEXT,
    FOREIGN KEY (lang) REFERENCES languages(iso6392),
    CONSTRAINT spell_unique UNIQUE (spelling, lang)
);

CREATE INDEX IF NOT EXISTS idx_words_spelling ON words(spelling);

CREATE TABLE IF NOT EXISTS word_rhymes(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    notes TEXT,
    CONSTRAINT wrhyme_unique UNIQUE (text, lang)
);
CREATE TABLE IF NOT EXISTS words_wrhymes(
    word_id INTEGER NOT NULL,
    wrhyme_id INTEGER NOT NULL,
    FOREIGN KEY (word_id) REFERENCES words(id),
    FOREIGN KEY (wrhyme_id) REFERENCES word_rhymes(id)
);

-- break up syllables
CREATE TABLE IF NOT EXISTS tones(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa TEXT NOT NULL,
    lang TEXT NOT NULL,
    name TEXT NOT NULL,
    nums INTEGER NOT NULL,
    CONSTRAINT tone_unique UNIQUE (ipa, lang)
);
CREATE TABLE IF NOT EXISTS onsets(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa  TEXT NOT NULL,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    CONSTRAINT onsets_unique UNIQUE (ipa, text, lang)
);
CREATE TABLE IF NOT EXISTS medials(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa  TEXT NOT NULL,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    CONSTRAINT onsets_unique UNIQUE (ipa, text, lang)
);
CREATE TABLE IF NOT EXISTS nucleus(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa  TEXT NOT NULL,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    CONSTRAINT onsets_unique UNIQUE (ipa, text, lang)
);
CREATE TABLE IF NOT EXISTS codas(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa  TEXT NOT NULL,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    CONSTRAINT onsets_unique UNIQUE (ipa, text, lang)
);
CREATE TABLE IF NOT EXISTS rhymes(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    ipa  TEXT NOT NULL,
    text TEXT NOT NULL,
    lang TEXT NOT NULL,
    CONSTRAINT onsets_unique UNIQUE (ipa, text, lang)
);
CREATE TABLE IF NOT EXISTS syllables(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    lang TEXT NOT NULL,
    ipa TEXT NOT NULL,
    long INTEGER NOT NULL,
    text TEXT NOT NULL,
    onset INTEGER  NOT NULL,
    medial INTEGER  NOT NULL,
    nucleus INTEGER  NOT NULL,
    coda INTEGER  NOT NULL,
    rhyme INTEGER  NOT NULL,
    tone INTEGER NOT NULL,
    notes TEXT,
    FOREIGN KEY (lang) REFERENCES languages(iso6392),
    FOREIGN KEY (onset) REFERENCES onsets(id),
    FOREIGN KEY (medial) REFERENCES medials(id),
    FOREIGN KEY (nucleus) REFERENCES nucleus(id),
    FOREIGN KEY (coda) REFERENCES codas(id),
    FOREIGN KEY (rhyme) REFERENCES rhymes(id),
    FOREIGN KEY (tone) REFERENCES tones(id),
    CONSTRAINT syllable_unique UNIQUE (text, ipa, lang)
);


-- join tables

CREATE TABLE IF NOT EXISTS syllables_words(
    syl_id INTEGER NOT NULL,
    word_id INTEGER NOT NULL,
    idx INTEGER NOT NULL,
    stressed INTEGER,
    FOREIGN KEY (syl_id) REFERENCES syllables(id),
    FOREIGN KEY (word_id) REFERENCES words(id)
);


CREATE TABLE IF NOT EXISTS words_idioms(
    word_id INTEGER NOT NULL,
    idiom_id INTEGER NOT NULL,
    FOREIGN KEY (idiom_id) REFERENCES idioms(id),
    FOREIGN KEY (word_id) REFERENCES words(id)
);


-- 

CREATE TABLE IF NOT EXISTS word_phonetics(
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    word_id INTEGER NOT NULL,
    ipa TEXT NOT NULL,
    syllable_count INTEGER NOT NULL,
    syllable_sequence TEXT NOT NULL,  -- "家,鄉"
    tone_sequence TEXT NOT NULL,      -- "rising,rising" 
    ipa_sequence TEXT NOT NULL,       -- IPA representation
    tag TEXT,
    notes TEXT,
    FOREIGN KEY (word_id) REFERENCES words(id)
);
CREATE INDEX IF NOT EXISTS idx_words_ipa ON word_phonetics(ipa, word_id);

-- -- Query 2: Even simpler with pattern table
-- -- Pattern [{ change: "rising" }, { change: "falling" }] - any 2-syllable word with rising,falling tones
-- SELECT 
--     w.spelling,
--     w.frequency,
--     wp.syllable_sequence,
--     wp.tone_sequence
-- FROM words w
-- JOIN word_patterns wp ON w.id = wp.word_id
-- WHERE wp.syllable_count = 2
--   AND wp.tone_sequence = 'rising,falling'
-- ORDER BY w.frequency DESC NULLS LAST;

-- -- Query 3: Mixed pattern [{ keep: "家" }, { change: "falling" }, { keep: "人" }]
-- SELECT DISTINCT
--     w.spelling,
--     w.frequency,
--     wp.syllable_sequence,
--     wp.tone_sequence
-- FROM words w
-- JOIN word_patterns wp ON w.id = wp.word_id
-- WHERE wp.syllable_count = 3
--   AND wp.syllable_sequence LIKE '家,%,人'  -- Simple pattern matching
--   AND EXISTS (
--       SELECT 1 FROM word_syllable_positions wsp
--       WHERE wsp.word_id = w.id 
--         AND wsp.position = 1 
--         AND wsp.tone_name = 'falling'
--   )
-- ORDER BY w.frequency DESC NULLS LAST;

-- -- Query 4: Super fast rhyme finding
-- -- Find all words that end with same syllable as "家鄉" (end with "鄉")
-- SELECT 
--     w.spelling,
--     w.frequency,
--     wp.syllable_sequence
-- FROM words w
-- JOIN word_patterns wp ON w.id = wp.word_id
-- WHERE wp.syllable_sequence LIKE '%,鄉'  -- Ends with 鄉
--   AND wp.syllable_count >= 2
-- ORDER BY w.frequency DESC NULLS LAST;




-- SELECT
--     w.id as word_id,
--     w.spelling,
--     w.lang,
--     w.frequency
-- FROM words w
-- JOIN word_phonetics wp ON wp.word_id= w.id
-- WHERE wp.syllable_sequence LIKE '%,ใจ'
-- AND wp.tone_sequence LIKE 'rising,%'
-- AND wp.syllable_count = 2
-- GROUP BY w.id, w.spelling, w.lang, w.frequency
-- ORDER BY w.frequency DESC NULLS LAST;
-- 
-- Indexes for fast pattern matching
CREATE INDEX IF NOT EXISTS idx_word_patterns_syllables ON word_phonetics(syllable_sequence);
CREATE INDEX IF NOT EXISTS idx_word_patterns_tones ON word_phonetics(tone_sequence);
CREATE INDEX IF NOT EXISTS idx_word_patterns_count ON word_phonetics(syllable_count);
CREATE INDEX IF NOT EXISTS idx_word_patterns_mixed ON word_phonetics(syllable_count, syllable_sequence, tone_sequence);


CREATE INDEX IF NOT EXISTS idx_syllables_words_word_idx ON syllables_words(word_id, idx);
CREATE INDEX IF NOT EXISTS idx_syllables_words_idx_word ON syllables_words(idx, word_id);
CREATE INDEX IF NOT EXISTS idx_syllables_words_syl ON syllables_words(syl_id);

-- 2. Syllables table indexes for text and language lookups
CREATE INDEX IF NOT EXISTS idx_syllables_text_lang ON syllables(text, lang);
CREATE INDEX IF NOT EXISTS idx_syllables_lang_text ON syllables(lang, text);
CREATE INDEX IF NOT EXISTS idx_syllables_tone ON syllables(tone);
CREATE INDEX IF NOT EXISTS idx_syllables_text_tone ON syllables(text, tone);

-- 3. Tones table indexes
CREATE INDEX IF NOT EXISTS idx_tones_name_lang ON tones(name, lang);
CREATE INDEX IF NOT EXISTS idx_tones_nums_lang ON tones(nums, lang);
CREATE INDEX IF NOT EXISTS idx_tones_lang_name ON tones(lang, name);

-- 4. Words table indexes
CREATE INDEX IF NOT EXISTS idx_words_lang_freq ON words(lang, frequency DESC);
CREATE INDEX IF NOT EXISTS idx_words_id_lang ON words(id, lang);

-- 5. Composite indexes for common query patterns
CREATE INDEX IF NOT EXISTS idx_syllables_compound ON syllables(lang, text, tone);
CREATE INDEX IF NOT EXISTS idx_syllables_words_compound ON syllables_words(word_id, idx, syl_id);