summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpolwex <polwex@sortug.com>2025-06-03 15:41:31 +0700
committerpolwex <polwex@sortug.com>2025-06-03 15:41:31 +0700
commit175ddca375cef765cec8ca5bbc527a205c40bf25 (patch)
treef2e47a5d85e4d5e0297613e5a17cebce7d09b09b
parent2401217a4019938d1c1cc61b6e33ccb233eb6e74 (diff)
preeeeettty much done FUCK YES
-rw-r--r--src/actions/tones.ts10
-rw-r--r--src/components/tones/ToneSelectorClient.tsx405
-rw-r--r--src/components/ui/select.tsx42
-rw-r--r--src/lib/calls/nlp.ts1
-rw-r--r--src/lib/db/prosodydb.ts215
-rw-r--r--src/lib/db/prosodyschema.sql98
-rw-r--r--src/lib/db/thaiseed.ts253
-rw-r--r--src/lib/db/thaiseedold.ts301
-rw-r--r--src/lib/types/phonetics.ts4
-rw-r--r--src/lib/utils.ts5
-rw-r--r--src/pages/api/tts.ts81
-rw-r--r--src/pages/tones.tsx6
12 files changed, 1109 insertions, 312 deletions
diff --git a/src/actions/tones.ts b/src/actions/tones.ts
index 0f28612..7d9cb34 100644
--- a/src/actions/tones.ts
+++ b/src/actions/tones.ts
@@ -1,6 +1,9 @@
"use server";
+import db from "@/lib/db";
import pdb from "@/lib/db/prosodydb";
+import { MutationOrder } from "@/lib/types/phonetics";
+import { randomFromArray } from "@/lib/utils";
import { WordData } from "@/zoom/logic/types";
// Helper to extract tone from prosody - assuming prosody is an array of objects like [{tone: number}, ...]
@@ -10,6 +13,13 @@ const getTonesFromProsody = (prosody: any): number[] | null => {
}
return null;
};
+
+export async function mutateToneSelection(
+ order: MutationOrder, // Array of tones, one for each syllable. null means any tone.
+) {
+ const res = pdb.fetchWordsByToneSylsWords(order);
+ return res;
+}
export async function fetchWordsByToneAndSyllables(
tones: (string | null)[], // Array of tones, one for each syllable. null means any tone.
) {
diff --git a/src/components/tones/ToneSelectorClient.tsx b/src/components/tones/ToneSelectorClient.tsx
index 0ee9433..8a0327c 100644
--- a/src/components/tones/ToneSelectorClient.tsx
+++ b/src/components/tones/ToneSelectorClient.tsx
@@ -1,52 +1,240 @@
-'use client';
+"use client";
-import { useState, useEffect, useTransition } from 'react';
-import { WordData } from '@/zoom/logic/types';
-import { fetchWordsByToneAndSyllables } from '@/actions/tones';
-import { Button } from '@/components/ui/button';
-import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select';
-import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/components/ui/card';
-import { Label } from '@/components/ui/label';
-import { Skeleton } from '@/components/ui/skeleton'; // For loading state
+import { useState, useEffect, useTransition, useRef } from "react";
+import { WordData } from "@/zoom/logic/types";
+import {
+ fetchWordsByToneAndSyllables,
+ mutateToneSelection,
+} from "@/actions/tones";
+import { Button } from "@/components/ui/button";
+import {
+ Select,
+ SelectContent,
+ SelectItem,
+ SelectTrigger,
+ SelectValue,
+} from "@/components/ui/select";
+import {
+ Card,
+ CardContent,
+ CardDescription,
+ CardFooter,
+ CardHeader,
+ CardTitle,
+} from "@/components/ui/card";
+import { Label } from "@/components/ui/label";
+import { Skeleton } from "@/components/ui/skeleton"; // For loading state
+import { MutationOrder, ToneQuery } from "@/lib/types/phonetics";
+import { ProsodySyllable } from "@/lib/types/cards";
+import { ArrowLeft, ArrowRight, Loader2, Volume2 } from "lucide-react";
+function getColorByTone(tone: string): string {
+ if (tone === "mid") return "blue";
+ if (tone === "low") return "green";
+ if (tone === "falling") return "gold";
+ if (tone === "high") return "purple";
+ if (tone === "rising") return "black";
+ else return "black";
+}
// Helper to display tones prominently
-const ProminentToneDisplay = ({ wordData }: { wordData: WordData }) => {
- if (!wordData.prosody || !Array.isArray(wordData.prosody)) {
- return <p className="text-gray-500">No prosody data</p>;
+const ProminentToneDisplay = ({ word }: { word: any }) => {
+ const tones: string[] = word.tone_sequence.split(",");
+ const syls: string[] = word.syl_seq.split(",");
+ const [isPending, startTransition] = useTransition();
+ function mutateWord(idx: number) {
+ console.log("changing", idx);
+ const mutationOrder: MutationOrder = syls.map((s, i) => {
+ if (idx === i) return { change: tones[idx]! };
+ else return { keep: syls[i]! };
+ });
+ console.log("hey hey", word);
+ startTransition(async () => {
+ const words = await mutateToneSelection(mutationOrder);
+ console.log({ words });
+ // setCurrentWord(word);
+ });
+ }
+ // playing audio
+ // const sourceRef = useRef<AudioBufferSourceNode>(null);
+ const audioRef = useRef<HTMLAudioElement>(null);
+
+ async function playAudio() {
+ // setLoading(true);
+ // const audioContext = new (window.AudioContext ||
+ // (window as any).webkitAudioContext)();
+ // const response = await fetch(audioUrl);
+ // const arrayBuffer = await response.arrayBuffer();
+ // const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
+ // if (audioContext && audioBuffer) {
+ // setLoading(false);
+ // const source = audioContext.createBufferSource();
+ // source.buffer = audioBuffer;
+ // source.connect(audioContext.destination);
+ // source.start();
+ // sourceRef.current = source;
+ // }
+ const res = await fetch(`/api/tts?word=${word.spelling}&lang=thai`);
+ const audioBlob = await res.blob();
+ const audioURL = URL.createObjectURL(audioBlob);
+ if (audioRef.current) {
+ audioRef.current.src = audioURL;
+ audioRef.current.play();
+ }
}
return (
<div className="flex flex-col items-center mb-4">
- <h1 className="text-6xl font-bold text-blue-600 mb-2">{wordData.spelling}</h1>
- <div className="flex space-x-4">
- {wordData.prosody.map((p, index) => (
- <div key={index} className="text-center">
- <p className="text-sm text-gray-500">Syllable {index + 1}</p>
- <p className="text-5xl font-semibold text-indigo-500">{p.tone ?? '?'}</p>
- </div>
+ <h1 className="text-6xl font-bold mb-2">
+ {syls.map((syl: string, idx: number) => (
+ <span
+ key={syl + idx}
+ onClick={() => mutateWord(idx)}
+ style={{ color: getColorByTone(tones[idx]!) }}
+ className="cursor-pointer hover:text-gray-700"
+ >
+ {syl}
+ </span>
))}
+ </h1>
+ <div className="mt-4 space-x-4">
+ <p className="ipa text-xl text-gray-700 mt-2">{word.ipa}</p>
+ <button
+ className="p-1 text-blue-500 hover:text-blue-700 transition-colors"
+ title="Pronounce"
+ onClick={playAudio}
+ >
+ <Volume2 size={20} />
+ </button>
+ {isPending && <Loader2 />}
+ <audio ref={audioRef} />
+ <p className="ipa text-xl text-gray-700 mt-2">{word.frequency}</p>
+ <p className="ipa text-xl text-gray-700 mt-2">{word.word_id}</p>
</div>
- {wordData.ipa && wordData.ipa.length > 0 && (
- <p className="text-xl text-gray-700 mt-2">
- {wordData.ipa.map(i => i.ipa).join(' / ')}
- </p>
- )}
</div>
);
};
+export default function ToneSelectorClient({
+ initialData,
+ initialTones,
+}: {
+ initialData: any[];
+ initialTones: ToneQuery;
+}) {
+ const [data, setData] = useState<any[]>(initialData);
+ const [currentIdx, setCurrentIdx] = useState(0);
+ const [isLoading, startTransition] = useTransition();
+ const [selectedTones, setTones] = useState<ToneQuery>(initialTones);
+
+ function goPrev() {
+ setCurrentIdx((i) => (i === 0 ? 0 : i - 1));
+ }
+ function goNext() {
+ setCurrentIdx((i) => (i === data.length - 1 ? data.length - 1 : i + 1));
+ }
+
+ const handleFetch = () => {
+ startTransition(async () => {
+ const words = await fetchWordsByToneAndSyllables(selectedTones);
+ setData(words);
+ });
+ };
-export default function ToneSelectorClient({ initialWord }: { initialWord: WordData | null }) {
- const [currentWord, setCurrentWord] = useState<WordData | null>(initialWord);
- const [syllableCount, setSyllableCount] = useState<number>(initialWord?.syllables || 1);
- const [selectedTones, setSelectedTones] = useState<(number | null)[]>(
- initialWord?.prosody?.map(p => p.tone ?? null) || [null]
+ return (
+ <div className="container mx-auto p-4 max-w-2xl">
+ <ToneForm
+ isLoading={isLoading}
+ handleFetch={handleFetch}
+ selectedTones={selectedTones}
+ setTones={setTones}
+ />
+
+ <Inner
+ isLoading={isLoading}
+ currentWord={data[currentIdx]}
+ goPrev={goPrev}
+ goNext={goNext}
+ />
+ </div>
);
- const [isLoading, startTransition] = useTransition();
+}
+type IProps = {
+ isLoading: boolean;
+ currentWord: any;
+ goPrev: () => void;
+ goNext: () => void;
+};
+function Inner({ isLoading, currentWord, goPrev, goNext }: IProps) {
+ return isLoading ? (
+ <Card>
+ <CardHeader>
+ <Skeleton className="h-12 w-3/4" />
+ </CardHeader>
+ <CardContent className="space-y-4">
+ <Skeleton className="h-8 w-1/2" />
+ <Skeleton className="h-20 w-full" />
+ <Skeleton className="h-6 w-full" />
+ </CardContent>
+ </Card>
+ ) : currentWord ? (
+ <Card>
+ <CardHeader>
+ <CardTitle className="text-center">Current Word</CardTitle>
+ </CardHeader>
+ <CardContent>
+ <ProminentToneDisplay word={currentWord} />
+ {/* You can add more details from WordData here if needed, like definitions */}
+ </CardContent>
+ <CardFooter className="justify-between">
+ <ArrowLeft onClick={goPrev} />
+ <ArrowRight onClick={goNext} />
+ </CardFooter>
+ </Card>
+ ) : (
+ <Card>
+ <CardHeader>
+ <CardTitle className="text-center">No Word Found</CardTitle>
+ </CardHeader>
+ <CardContent>
+ <p className="text-center text-gray-600">
+ Could not find a Thai word matching your criteria. Try different
+ selections.
+ </p>
+ </CardContent>
+ </Card>
+ );
+}
+
+type ToneFormProps = {
+ isLoading: boolean;
+ handleFetch: (tones: ToneQuery) => void;
+ selectedTones: ToneQuery;
+ setTones: React.Dispatch<React.SetStateAction<ToneQuery>>;
+};
+function ToneForm({
+ selectedTones,
+ setTones,
+ isLoading,
+ handleFetch,
+}: ToneFormProps) {
+ const thaiTones = [
+ { value: "mid", label: "1 (Mid)" },
+ { value: "low", label: "2 (Low)" },
+ { value: "falling", label: "3 (Falling)" },
+ { value: "high", label: "4 (High)" },
+ { value: "rising", label: "5 (Rising)" },
+ ];
+ const [syllableCount, setSyllableCount] = useState<number>(2);
+ function decrSyl() {
+ setSyllableCount((s) => (s <= 1 ? 1 : s - 1));
+ }
+ function incrSyl() {
+ setSyllableCount((s) => (s >= 5 ? 5 : s + 1));
+ }
useEffect(() => {
// Adjust selectedTones array length when syllableCount changes
- setSelectedTones(prevTones => {
+ setTones((prevTones) => {
const newTones = Array(syllableCount).fill(null);
for (let i = 0; i < Math.min(prevTones.length, syllableCount); i++) {
newTones[i] = prevTones[i];
@@ -55,79 +243,51 @@ export default function ToneSelectorClient({ initialWord }: { initialWord: WordD
});
}, [syllableCount]);
- const handleFetchWord = () => {
- startTransition(async () => {
- const word = await fetchWordsByToneAndSyllables(syllableCount, selectedTones);
- setCurrentWord(word);
- });
- };
-
const handleSyllableCountChange = (value: string) => {
const count = parseInt(value, 10);
- if (!isNaN(count) && count > 0 && count <= 5) { // Max 5 syllables for simplicity
+ if (!isNaN(count) && count > 0 && count <= 5) {
+ // Max 5 syllables for simplicity
setSyllableCount(count);
}
};
const handleToneChange = (syllableIndex: number, value: string) => {
- const tone = value === 'any' ? null : parseInt(value, 10);
- setSelectedTones(prevTones => {
+ const tone = value === "any" ? null : value;
+ setTones((prevTones) => {
const newTones = [...prevTones];
newTones[syllableIndex] = tone;
return newTones;
});
};
-
- const thaiTones = [
- { value: '1', label: '1 (Mid)' },
- { value: '2', label: '2 (Low)' },
- { value: '3', label: '3 (Falling)' },
- { value: '4', label: '4 (High)' },
- { value: '5', label: '5 (Rising)' },
- ];
return (
- <div className="container mx-auto p-4 max-w-2xl">
- <Card className="mb-6">
- <CardHeader>
- <CardTitle>Thai Tone Explorer</CardTitle>
- <CardDescription>Select syllable count and tones to find Thai words.</CardDescription>
- </CardHeader>
- <CardContent className="space-y-6">
- <div>
- <Label htmlFor="syllable-count" className="text-lg font-medium">Number of Syllables</Label>
- <Select
- value={syllableCount.toString()}
- onValueChange={handleSyllableCountChange}
- >
- <SelectTrigger id="syllable-count" className="w-full md:w-1/2 mt-1">
- <SelectValue placeholder="Select number of syllables" />
- </SelectTrigger>
- <SelectContent>
- {[1, 2, 3, 4, 5].map(num => (
- <SelectItem key={num} value={num.toString()}>
- {num} Syllable{num > 1 ? 's' : ''}
- </SelectItem>
- ))}
- </SelectContent>
- </Select>
- </div>
-
+ <Card className="mb-6">
+ <CardHeader>
+ <CardTitle>Thai Tone Explorer</CardTitle>
+ <CardDescription>
+ Select syllable count and tones to find Thai words.
+ </CardDescription>
+ </CardHeader>
+ <CardContent className="space-y-6">
+ <div className="flex gap-10 justify-center">
{Array.from({ length: syllableCount }).map((_, index) => (
- <div key={index}>
- <Label htmlFor={`tone-select-${index}`} className="text-lg font-medium">
- Tone for Syllable {index + 1}
- </Label>
+ <div key={index} className="w-fit">
<Select
- value={selectedTones[index]?.toString() || 'any'}
+ value={selectedTones[index]?.toString() || "any"}
onValueChange={(value) => handleToneChange(index, value)}
>
- <SelectTrigger id={`tone-select-${index}`} className="w-full md:w-1/2 mt-1">
- <SelectValue placeholder={`Select tone for syllable ${index + 1}`} />
+ <SelectTrigger
+ id={`tone-select-${index}`}
+ className="w-full md:w-full mt-1"
+ >
+ <SelectValue
+ className="w-full"
+ placeholder={`Select tone for syllable ${index + 1}`}
+ />
</SelectTrigger>
- <SelectContent>
+ <SelectContent className="lolol md:w-full bg-white w-full">
<SelectItem value="any">Any Tone</SelectItem>
- {thaiTones.map(tone => (
+ {thaiTones.map((tone) => (
<SelectItem key={tone.value} value={tone.value}>
{tone.label}
</SelectItem>
@@ -136,64 +296,23 @@ export default function ToneSelectorClient({ initialWord }: { initialWord: WordD
</Select>
</div>
))}
- </CardContent>
- <CardFooter>
- <Button onClick={handleFetchWord} disabled={isLoading} className="w-full md:w-auto">
- {isLoading ? 'Searching...' : 'Find Word'}
- </Button>
- </CardFooter>
- </Card>
-
- {isLoading && !currentWord && (
- <Card>
- <CardHeader><Skeleton className="h-12 w-3/4" /></CardHeader>
- <CardContent className="space-y-4">
- <Skeleton className="h-8 w-1/2" />
- <Skeleton className="h-20 w-full" />
- <Skeleton className="h-6 w-full" />
- </CardContent>
- </Card>
- )}
-
- {!isLoading && currentWord && (
- <Card>
- <CardHeader>
- <CardTitle className="text-center">Current Word</CardTitle>
- </CardHeader>
- <CardContent>
- <ProminentToneDisplay wordData={currentWord} />
- {/* You can add more details from WordData here if needed, like definitions */}
- {currentWord.senses && currentWord.senses.length > 0 && (
- <div className="mt-4 pt-4 border-t">
- <h3 className="text-lg font-semibold mb-2">Meanings:</h3>
- {currentWord.senses.map((sense, sIdx) => (
- <div key={sIdx} className="mb-2 p-2 border rounded bg-gray-50">
- <p className="font-medium text-indigo-600">{sense.pos}</p>
- {sense.senses && Array.isArray(sense.senses) && sense.senses.map((subSense, ssIdx) => (
- subSense.glosses && Array.isArray(subSense.glosses) && subSense.glosses.map((gloss: string, gIdx: number) => (
- <p key={`${ssIdx}-${gIdx}`} className="text-sm text-gray-700 ml-2">- {gloss}</p>
- ))
- ))}
- </div>
- ))}
- </div>
- )}
- </CardContent>
- </Card>
- )}
-
- {!isLoading && !currentWord && (
- <Card>
- <CardHeader>
- <CardTitle className="text-center">No Word Found</CardTitle>
- </CardHeader>
- <CardContent>
- <p className="text-center text-gray-600">
- Could not find a Thai word matching your criteria. Try different selections.
- </p>
- </CardContent>
- </Card>
- )}
- </div>
+ </div>
+ </CardContent>
+ <CardFooter className="justify-center gap-18">
+ <Button className="" onClick={decrSyl}>
+ -
+ </Button>
+ <Button
+ onClick={() => handleFetch(selectedTones)}
+ disabled={isLoading}
+ className="w-full md:w-auto"
+ >
+ {isLoading ? "Searching..." : "Fetch"}
+ </Button>
+ <Button className="" onClick={incrSyl}>
+ +
+ </Button>
+ </CardFooter>
+ </Card>
);
}
diff --git a/src/components/ui/select.tsx b/src/components/ui/select.tsx
index b624a5b..23e7161 100644
--- a/src/components/ui/select.tsx
+++ b/src/components/ui/select.tsx
@@ -1,25 +1,25 @@
-import * as React from "react"
-import * as SelectPrimitive from "@radix-ui/react-select"
-import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react"
+import * as React from "react";
+import * as SelectPrimitive from "@radix-ui/react-select";
+import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react";
-import { cn } from "@/lib/utils"
+import { cn } from "@/lib/utils";
function Select({
...props
}: React.ComponentProps<typeof SelectPrimitive.Root>) {
- return <SelectPrimitive.Root data-slot="select" {...props} />
+ return <SelectPrimitive.Root data-slot="select" {...props} />;
}
function SelectGroup({
...props
}: React.ComponentProps<typeof SelectPrimitive.Group>) {
- return <SelectPrimitive.Group data-slot="select-group" {...props} />
+ return <SelectPrimitive.Group data-slot="select-group" {...props} />;
}
function SelectValue({
...props
}: React.ComponentProps<typeof SelectPrimitive.Value>) {
- return <SelectPrimitive.Value data-slot="select-value" {...props} />
+ return <SelectPrimitive.Value data-slot="select-value" {...props} />;
}
function SelectTrigger({
@@ -32,7 +32,7 @@ function SelectTrigger({
data-slot="select-trigger"
className={cn(
"border-input data-[placeholder]:text-muted-foreground aria-invalid:border-destructive ring-ring/10 dark:ring-ring/20 dark:outline-ring/40 outline-ring/50 [&_svg:not([class*='text-'])]:text-muted-foreground flex h-9 w-full items-center justify-between rounded-md border bg-transparent px-3 py-2 text-sm shadow-xs transition-[color,box-shadow] focus-visible:ring-4 focus-visible:outline-1 disabled:cursor-not-allowed disabled:opacity-50 aria-invalid:focus-visible:ring-0 *:data-[slot=select-value]:flex *:data-[slot=select-value]:items-center *:data-[slot=select-value]:gap-2 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 [&>span]:line-clamp-1",
- className
+ className,
)}
{...props}
>
@@ -41,7 +41,7 @@ function SelectTrigger({
<ChevronDownIcon className="size-4 opacity-50" />
</SelectPrimitive.Icon>
</SelectPrimitive.Trigger>
- )
+ );
}
function SelectContent({
@@ -58,7 +58,7 @@ function SelectContent({
"bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 relative z-50 max-h-96 min-w-[8rem] overflow-hidden rounded-md border shadow-md",
position === "popper" &&
"data-[side=bottom]:translate-y-1 data-[side=left]:-translate-x-1 data-[side=right]:translate-x-1 data-[side=top]:-translate-y-1",
- className
+ className,
)}
position={position}
{...props}
@@ -68,7 +68,7 @@ function SelectContent({
className={cn(
"p-1",
position === "popper" &&
- "h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)] scroll-my-1"
+ "h-[var(--radix-select-trigger-height)] w-full min-w-[var(--radix-select-trigger-width)] scroll-my-1",
)}
>
{children}
@@ -76,7 +76,7 @@ function SelectContent({
<SelectScrollDownButton />
</SelectPrimitive.Content>
</SelectPrimitive.Portal>
- )
+ );
}
function SelectLabel({
@@ -89,7 +89,7 @@ function SelectLabel({
className={cn("px-2 py-1.5 text-sm font-semibold", className)}
{...props}
/>
- )
+ );
}
function SelectItem({
@@ -102,7 +102,7 @@ function SelectItem({
data-slot="select-item"
className={cn(
"focus:bg-accent focus:text-accent-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex w-full cursor-default items-center gap-2 rounded-sm py-1.5 pr-8 pl-2 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 *:[span]:last:flex *:[span]:last:items-center *:[span]:last:gap-2",
- className
+ className,
)}
{...props}
>
@@ -113,7 +113,7 @@ function SelectItem({
</span>
<SelectPrimitive.ItemText>{children}</SelectPrimitive.ItemText>
</SelectPrimitive.Item>
- )
+ );
}
function SelectSeparator({
@@ -126,7 +126,7 @@ function SelectSeparator({
className={cn("bg-border pointer-events-none -mx-1 my-1 h-px", className)}
{...props}
/>
- )
+ );
}
function SelectScrollUpButton({
@@ -138,13 +138,13 @@ function SelectScrollUpButton({
data-slot="select-scroll-up-button"
className={cn(
"flex cursor-default items-center justify-center py-1",
- className
+ className,
)}
{...props}
>
<ChevronUpIcon className="size-4" />
</SelectPrimitive.ScrollUpButton>
- )
+ );
}
function SelectScrollDownButton({
@@ -156,13 +156,13 @@ function SelectScrollDownButton({
data-slot="select-scroll-down-button"
className={cn(
"flex cursor-default items-center justify-center py-1",
- className
+ className,
)}
{...props}
>
<ChevronDownIcon className="size-4" />
</SelectPrimitive.ScrollDownButton>
- )
+ );
}
export {
@@ -176,4 +176,4 @@ export {
SelectSeparator,
SelectTrigger,
SelectValue,
-}
+};
diff --git a/src/lib/calls/nlp.ts b/src/lib/calls/nlp.ts
index f19c976..1e84e93 100644
--- a/src/lib/calls/nlp.ts
+++ b/src/lib/calls/nlp.ts
@@ -1,4 +1,5 @@
import { SyllableRes } from "../types/cards";
+import { randomFromArray } from "../utils";
export type ThaiNLPRes = {
word: string;
diff --git a/src/lib/db/prosodydb.ts b/src/lib/db/prosodydb.ts
index d6da389..7c067d2 100644
--- a/src/lib/db/prosodydb.ts
+++ b/src/lib/db/prosodydb.ts
@@ -1,5 +1,5 @@
import Database from "bun:sqlite";
-import { Phoneme, Tone } from "../types/phonetics";
+import { MutationOrder, Phoneme, Tone } from "../types/phonetics";
import { ProsodyWord, ProsodyWordDB } from "../types/cards";
type Str = string | null;
type ItemType = "word" | "syllable" | "idiom";
@@ -113,6 +113,7 @@ class DatabaseHandler {
w.spelling,
wp.ipa,
w.frequency,
+ GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq,
GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
COUNT(sw.syl_id) as syllable_count
FROM words w
@@ -127,17 +128,166 @@ class DatabaseHandler {
spelling,
ipa,
frequency,
+ syl_seq,
tone_sequence,
syllable_count
FROM word_tone_sequences
WHERE tone_sequence LIKE ?
AND syllable_count = ?
- ORDER BY frequency DESC NULLS LAST;
+ ORDER BY frequency ASC NULLS LAST;
`,
);
return query.all(toneString.slice(1), tones.length) as any[];
}
+ // fetchWordsByToneAndSyls(tones: Array<string | null>) {
+ // const toneString = tones.reduce((acc: string, item) => {
+ // if (!item) return `${acc},%`;
+ // else return `${acc},${item}`;
+ // }, "");
+ // console.log({ toneString });
+ // const query = this.db.query(
+ // `
+ // WITH word_tone_sequences AS (
+ // SELECT
+ // w.id as word_id,
+ // w.spelling,
+ // wp.ipa,
+ // w.frequency,
+ // GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq,
+ // GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
+ // COUNT(sw.syl_id) as syllable_count
+ // FROM words w
+ // JOIN word_phonetics wp ON w.id = wp.word_id
+ // JOIN syllables_words sw ON w.id = sw.word_id
+ // JOIN syllables s ON sw.syl_id = s.id
+ // JOIN tones t ON s.tone = t.id
+ // GROUP BY w.id, w.spelling, w.lang, w.frequency
+ // )
+ // SELECT
+ // word_id,
+ // spelling,
+ // ipa,
+ // frequency,
+ // syl_seq,
+ // tone_sequence,
+ // syllable_count
+ // FROM word_tone_sequences
+ // WHERE tone_sequence LIKE ?
+ // AND syllable_count = ?
+ // ORDER BY frequency DESC NULLS LAST;
+ // `,
+ // );
+ // return query.all(toneString.slice(1), tones.length) as any[];
+ // }
+ fetchWordsByToneSylsWords(order: MutationOrder) {
+ console.log({ order });
+ type Acc = { tones: string; syls: string };
+ const strings = order.reduce(
+ (acc: Acc, item, idx) => {
+ const startString = idx === 0 ? "" : ",";
+ if ("change" in item)
+ return {
+ tones: `${acc.tones}${startString}${item.change}`,
+ syls: `${acc.syls}${startString}%`,
+ };
+ else
+ return {
+ tones: `${acc.tones}${startString}%`,
+ syls: `${acc.syls}${startString}${item.keep}`,
+ };
+ },
+ { tones: "", syls: "" },
+ );
+ const query = this.db.query(`
+ SELECT
+ w.id as word_id,
+ w.spelling,
+ w.lang,
+ w.frequency,
+ wp.ipa,
+ wp.syllable_sequence,
+ wp.tone_sequence,
+ wp.ipa_sequence,
+ GROUP_CONCAT(s.text ORDER BY sw.idx) as syllable_pattern,
+ GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_pattern
+ FROM words w
+ JOIN syllables_words sw ON w.id = sw.word_id
+ JOIN syllables s ON sw.syl_id = s.id
+ JOIN tones t ON s.tone = t.id
+ JOIN word_phonetics wp ON wp.word_id= w.id
+ WHERE wp.syllable_sequence LIKE ?1
+ AND tone_sequence LIKE ?2
+ AND syllable_count = ?3
+ GROUP BY w.id, w.spelling, w.lang, w.frequency
+ ORDER BY w.frequency ASC NULLS LAST; `);
+ return query.all(strings.syls, strings.tones, order.length) as any[];
+ }
// inserts
+ superAdd(p: {
+ word: string;
+ lang: string;
+ frequency: number | null;
+ wordNotes: Str;
+ phonetics: Array<{
+ ipa: string;
+ syllable_count: number;
+ syllable_sequence: string;
+ tone_sequence: string;
+ ipa_sequence: string;
+ tags: Str;
+ notes: Str;
+ wordRhyme: Str;
+ syllables: Array<{
+ idx: number;
+ stressed: boolean | null;
+ spelling: string;
+ ipa: string;
+ long: boolean;
+ onset: Phoneme;
+ medial: Phoneme;
+ nucleus: Phoneme;
+ coda: Phoneme;
+ rhyme: Phoneme;
+ tone: Tone;
+ notes: Str;
+ }>;
+ }>;
+ }) {
+ const tx = this.db.transaction(() => {
+ const wordId = this.addWord(p.word, p.lang, p.frequency, p.wordNotes);
+ for (const ph of p.phonetics) {
+ this.addPronunciation(
+ wordId,
+ ph.ipa,
+ ph.syllable_count,
+ ph.syllable_sequence,
+ ph.tone_sequence,
+ ph.ipa_sequence,
+ ph.tags,
+ ph.notes,
+ );
+ for (const syl of ph.syllables) {
+ this.addSyllable(
+ wordId,
+ syl.idx,
+ syl.stressed,
+ p.lang,
+ syl.ipa,
+ syl.long,
+ syl.spelling,
+ syl.onset,
+ syl.medial,
+ syl.nucleus,
+ syl.coda,
+ syl.rhyme,
+ syl.tone,
+ syl.notes,
+ );
+ }
+ }
+ });
+ tx();
+ }
addLanguage(code: string, name: string) {
const query = this.db
@@ -147,15 +297,44 @@ class DatabaseHandler {
addPronunciation(
wordId: number | bigint,
ipa: string,
- syllables: number,
+ syllable_count: number,
+ syllable_sequence: string,
+ tone_sequence: string,
+ ipa_sequence: string,
tags: Str,
notes: Str,
) {
+ console.log({
+ wordId,
+ ipa,
+ syllable_count,
+ syllable_sequence,
+ tone_sequence,
+ ipa_sequence,
+ });
const query = this.db
.query(
- `INSERT OR IGNORE INTO word_phonetics(word_id,ipa, syllables, tag, notes) VALUES(?, ?, ?, ?, ?)`,
+ `INSERT OR IGNORE INTO word_phonetics(
+ word_id,
+ ipa,
+ syllable_count,
+ syllable_sequence,
+ tone_sequence,
+ ipa_sequence,
+ tag,
+ notes)
+ VALUES(?, ?, ?, ?, ?, ?, ?, ?)`,
)
- .run(wordId, ipa, syllables, tags, notes);
+ .run(
+ wordId,
+ ipa,
+ syllable_count,
+ syllable_sequence,
+ tone_sequence,
+ ipa_sequence,
+ tags,
+ notes,
+ );
}
addWordRhyme(wordId: number | bigint, ipa: string, lang: string, notes: Str) {
const query = this.db
@@ -212,12 +391,14 @@ class DatabaseHandler {
notes: Str,
) {
const query = this.db.query(
- `INSERT OR IGNORE INTO words(spelling, lang, frequency, notes) VALUES(?, ?, ?, ?)`,
- // `INSERT INTO words(spelling, lang) VALUES(?, ?)`,
+ `INSERT INTO words(spelling, lang, frequency, notes) VALUES(?, ?, ?, ?)
+ ON CONFLICT(spelling, lang) DO UPDATE SET
+ lang = excluded.lang
+ RETURNING rowid
+ `,
);
- const res = query.run(spelling, lang, frequency, notes);
- const wordId = res.lastInsertRowid;
- return wordId;
+ const res = query.get(spelling, lang, frequency, notes) as { id: number };
+ return res.id;
}
addSyllable(
wordId: number | bigint,
@@ -292,9 +473,15 @@ class DatabaseHandler {
.get(tone.letters, lang, tone.name, tone.numbers) as { id: number };
const query = this.db.query(
- `INSERT INTO syllables(lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ `INSERT INTO syllables(
+ lang, ipa, long, text, onset, medial, nucleus, coda, rhyme, tone, notes)
+ VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+ ON CONFLICT(text, ipa, lang) DO UPDATE SET
+ lang = excluded.lang
+ RETURNING rowid
+ `,
);
- const res = query.run(
+ const res = query.get(
lang,
ipa,
long,
@@ -306,8 +493,8 @@ class DatabaseHandler {
rhymeId.id,
toneId.id,
notes,
- );
- const sylId = res.lastInsertRowid;
+ ) as { id: number };
+ const sylId = res.id;
//
const res1 = this.db
.query(
diff --git a/src/lib/db/prosodyschema.sql b/src/lib/db/prosodyschema.sql
index c6a04fa..5554a02 100644
--- a/src/lib/db/prosodyschema.sql
+++ b/src/lib/db/prosodyschema.sql
@@ -150,9 +150,103 @@ CREATE TABLE IF NOT EXISTS word_phonetics(
id INTEGER PRIMARY KEY AUTOINCREMENT,
word_id INTEGER NOT NULL,
ipa TEXT NOT NULL,
- syllables INTEGER NOT NULL,
+ syllable_count INTEGER NOT NULL,
+ syllable_sequence TEXT NOT NULL, -- "家,鄉"
+ tone_sequence TEXT NOT NULL, -- "rising,rising"
+ ipa_sequence TEXT NOT NULL, -- IPA representation
tag TEXT,
notes TEXT,
- CONSTRAINT ipa_unique UNIQUE (ipa, word_id)
+ FOREIGN KEY (word_id) REFERENCES words(id)
);
CREATE INDEX IF NOT EXISTS idx_words_ipa ON word_phonetics(ipa, word_id);
+
+-- -- Query 2: Even simpler with pattern table
+-- -- Pattern [{ change: "rising" }, { change: "falling" }] - any 2-syllable word with rising,falling tones
+-- SELECT
+-- w.spelling,
+-- w.frequency,
+-- wp.syllable_sequence,
+-- wp.tone_sequence
+-- FROM words w
+-- JOIN word_patterns wp ON w.id = wp.word_id
+-- WHERE wp.syllable_count = 2
+-- AND wp.tone_sequence = 'rising,falling'
+-- ORDER BY w.frequency DESC NULLS LAST;
+
+-- -- Query 3: Mixed pattern [{ keep: "家" }, { change: "falling" }, { keep: "人" }]
+-- SELECT DISTINCT
+-- w.spelling,
+-- w.frequency,
+-- wp.syllable_sequence,
+-- wp.tone_sequence
+-- FROM words w
+-- JOIN word_patterns wp ON w.id = wp.word_id
+-- WHERE wp.syllable_count = 3
+-- AND wp.syllable_sequence LIKE '家,%,人' -- Simple pattern matching
+-- AND EXISTS (
+-- SELECT 1 FROM word_syllable_positions wsp
+-- WHERE wsp.word_id = w.id
+-- AND wsp.position = 1
+-- AND wsp.tone_name = 'falling'
+-- )
+-- ORDER BY w.frequency DESC NULLS LAST;
+
+-- -- Query 4: Super fast rhyme finding
+-- -- Find all words that end with same syllable as "家鄉" (end with "鄉")
+-- SELECT
+-- w.spelling,
+-- w.frequency,
+-- wp.syllable_sequence
+-- FROM words w
+-- JOIN word_patterns wp ON w.id = wp.word_id
+-- WHERE wp.syllable_sequence LIKE '%,鄉' -- Ends with 鄉
+-- AND wp.syllable_count >= 2
+-- ORDER BY w.frequency DESC NULLS LAST;
+
+
+
+
+-- SELECT
+-- w.id as word_id,
+-- w.spelling,
+-- w.lang,
+-- w.frequency
+-- FROM words w
+-- JOIN word_phonetics wp ON wp.word_id= w.id
+-- WHERE wp.syllable_sequence LIKE '%,ใจ'
+-- AND wp.tone_sequence LIKE 'rising,%'
+-- AND wp.syllable_count = 2
+-- GROUP BY w.id, w.spelling, w.lang, w.frequency
+-- ORDER BY w.frequency DESC NULLS LAST;
+--
+-- Indexes for fast pattern matching
+CREATE INDEX IF NOT EXISTS idx_word_patterns_syllables ON word_phonetics(syllable_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_patterns_tones ON word_phonetics(tone_sequence);
+CREATE INDEX IF NOT EXISTS idx_word_patterns_count ON word_phonetics(syllable_count);
+CREATE INDEX IF NOT EXISTS idx_word_patterns_mixed ON word_phonetics(syllable_count, syllable_sequence, tone_sequence);
+
+
+CREATE INDEX IF NOT EXISTS idx_syllables_words_word_idx ON syllables_words(word_id, idx);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_idx_word ON syllables_words(idx, word_id);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_syl ON syllables_words(syl_id);
+
+-- 2. Syllables table indexes for text and language lookups
+CREATE INDEX IF NOT EXISTS idx_syllables_text_lang ON syllables(text, lang);
+CREATE INDEX IF NOT EXISTS idx_syllables_lang_text ON syllables(lang, text);
+CREATE INDEX IF NOT EXISTS idx_syllables_tone ON syllables(tone);
+CREATE INDEX IF NOT EXISTS idx_syllables_text_tone ON syllables(text, tone);
+
+-- 3. Tones table indexes
+CREATE INDEX IF NOT EXISTS idx_tones_name_lang ON tones(name, lang);
+CREATE INDEX IF NOT EXISTS idx_tones_nums_lang ON tones(nums, lang);
+CREATE INDEX IF NOT EXISTS idx_tones_lang_name ON tones(lang, name);
+
+-- 4. Words table indexes
+CREATE INDEX IF NOT EXISTS idx_words_lang_freq ON words(lang, frequency DESC);
+CREATE INDEX IF NOT EXISTS idx_words_id_lang ON words(id, lang);
+
+-- 5. Composite indexes for common query patterns
+CREATE INDEX IF NOT EXISTS idx_syllables_compound ON syllables(lang, text, tone);
+CREATE INDEX IF NOT EXISTS idx_syllables_words_compound ON syllables_words(word_id, idx, syl_id);
+
+
diff --git a/src/lib/db/thaiseed.ts b/src/lib/db/thaiseed.ts
index 6c69d9c..32434da 100644
--- a/src/lib/db/thaiseed.ts
+++ b/src/lib/db/thaiseed.ts
@@ -11,7 +11,7 @@ import {
import pdb from "./prosodydb";
import { cleanIpa } from "../utils";
import { handleFile } from "./utils";
-import { Tone } from "../types/phonetics";
+import { Phoneme, Tone } from "../types/phonetics";
import { AsyncRes } from "../types";
async function readDump(lang: string) {
@@ -25,7 +25,7 @@ async function readDump(lang: string) {
// langrows = langrows.slice(10);
for (const langrow of langrows) {
count++;
- // console.log(count);
+ console.log(count);
// if (count <= 10000) continue;
// if (count > 100) break;
const j = JSON.parse(langrow.data);
@@ -68,65 +68,101 @@ async function readDump(lang: string) {
async function handleWord(word: string, j: any): AsyncRes<string> {
// TODO add categories but add a tag to see what classifying scheme we're using
//
- const sounds = j.sounds || [];
- const hasIpa = sounds.find((s: any) => "ipa" in s);
- if (!hasIpa) return { error: "meh no ipa" };
- const freq = await getThaiFreq(word);
- const wordId = pdb.addWord(word, "th", freq, null);
- if (wordId == 478 || word === "และ") {
- console.log("wtf man");
- console.dir(j, { depth: null });
- return { error: "i said wtf" };
- }
+ const frequency = await getThaiFreq(word);
const analyzed = await analyzeTHWord(word);
- for (let snd of sounds)
- if ("ipa" in snd) {
- const res = await handleIpa(wordId, j, snd, analyzed);
- if ("error" in res) return res;
- }
+ const phonetics = await Promise.all(getIpa(j, analyzed));
+
+ pdb.superAdd({ word, lang: "th", frequency, wordNotes: null, phonetics });
return { ok: "" };
}
-async function handleIpa(
- wordId: number | bigint,
- j: any,
- snd: any,
- analyzed: ThaiNLPRes,
-): AsyncRes<string> {
+function getIpa(j: any, analyzed: ThaiNLPRes) {
+ const sounds = j.sounds || [];
+ const hasIpa = sounds.find((s: any) => "ipa" in s);
+ if (!hasIpa) return [];
+ const ipaData: Promise<IPAData>[] = sounds.reduce(
+ async (acc: Promise<IPAData>[], snd: any) => {
+ if ("ipa" in snd) {
+ const data = getIpaData(snd, analyzed);
+ return [...acc, data];
+ } else return acc;
+ },
+ [],
+ );
+ return ipaData;
+}
+type IPAData = {
+ ipa: string;
+ syllable_count: number;
+ syllable_sequence: string;
+ tone_sequence: string;
+ ipa_sequence: string;
+ tags: string | null;
+ notes: string | null;
+ wordRhyme: string | null;
+ syllables: SylData[];
+};
+async function getIpaData(snd: any, analyzed: ThaiNLPRes): Promise<IPAData> {
const tags = JSON.stringify(snd.tags) || null;
// console.log("handleipa", analyzed.syllables.length);
// console.log(analyzed);
const wikiIpa = cleanIpa(snd.ipa);
const nlpIpa = cleanIpa(analyzed.ipa);
const ipa = wikiIpa || nlpIpa;
- if (j.word === "และ") {
- console.log("wtf!!");
- return { error: "wtf is this" };
- }
+ // if (j.word === "และ") {
+ // console.log("wtf!!");
+ // return { error: "wtf is this" };
+ // }
const wikiIpaSplit = wikiIpa.split(".");
const nlpIpaSplit = nlpIpa.split(".");
if (wikiIpaSplit.length !== nlpIpaSplit.length) {
- // console.log("ipa mismatch");
- // console.log(wikiIpa);
- // console.log(nlpIpa);
+ console.log("ipa mismatch");
+ console.log(wikiIpa);
+ console.log(nlpIpa);
}
if (analyzed.realSyls.length !== wikiIpaSplit.length) {
- // console.log("syllable analysis mismatch", j.word);
- // console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit });
- // console.dir(j, { depth: null });
- return { error: "meh syllable analysis mismatch" };
+ console.log("syllable analysis mismatch", analyzed.word);
+ console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit });
+ throw new Error("syllable mismatch");
}
const writtenSyls = analyzed.syllables;
- const pronouncedSyls = analyzed.realSyls;
+ const pronouncedSyls = analyzed.realSyls.map((s) =>
+ s.replace(/\u{E3A}/u, ""),
+ );
+
+ const tone_sequence = wikiIpaSplit
+ .map((s) => parseTone(s, analyzed.word))
+ .map((t) => t.name)
+ .join(",");
+ const syllable_sequence = pronouncedSyls.join(",");
+ const ipa_sequence = wikiIpaSplit.join(",");
+ const syllables = await Promise.all(
+ getSyllables(writtenSyls, pronouncedSyls, wikiIpaSplit),
+ );
+ return {
+ ipa,
+ syllable_count: pronouncedSyls.length,
+ syllable_sequence,
+ tone_sequence,
+ ipa_sequence,
+ tags,
+ notes: null,
+ wordRhyme: null,
+ syllables,
+ };
+}
+function getSyllables(
+ writtenSyls: string[],
+ pronouncedSyls: string[],
+ ipaSyls: string[],
+) {
let badSyls = false;
if (writtenSyls.length !== pronouncedSyls.length) badSyls = true;
-
- pdb.addPronunciation(wordId, ipa, pronouncedSyls.length, tags, null);
-
+ let syls: Promise<SylData>[] = [];
for (let i = 0; i < pronouncedSyls.length; i++) {
- const pronounced = pronouncedSyls[i]!.replace(/\u{E3A}/u, "");
+ const pronounced = pronouncedSyls[i]!;
const written = writtenSyls[i] || "";
const syllable = badSyls ? pronounced : written;
- const ipa = wikiIpaSplit[i]!;
+ const ipa = ipaSyls[i]!;
// TODO insert both??
const notes = pronounced === written ? null : `Pronounced ${pronounced}`;
if (pronounced !== syllable) {
@@ -134,10 +170,10 @@ async function handleIpa(
console.log(pronounced);
console.log(written);
}
- const res = await handleSyllable(syllable, ipa, wordId, i, notes);
- if ("error" in res) return res;
+ const res = getSyllable(syllable, ipa, i, notes);
+ syls.push(res);
}
- return { ok: "" };
+ return syls;
}
const thaiTones: Record<string, string> = {
"˧": "mid",
@@ -153,8 +189,22 @@ const thaiToneNums: Record<string, number> = {
"˦˥": 45,
"˩˩˦": 214,
};
+const toneRegex = new RegExp(Object.keys(thaiToneNums).join("|"));
+
function parseTone(ipa: string, spelling: string): Tone {
try {
+ const match = ipa.match(toneRegex)!;
+ const m = match[0]!;
+ const name = thaiTones[m]!;
+ const numbers = thaiToneNums[m]!;
+ return { letters: ipa, name, numbers };
+ } catch (e) {
+ console.error("meh wrong tones!!", { s: spelling, ipa });
+ throw new Error("");
+ }
+}
+function parseToneS(ipa: string, spelling: string): Tone {
+ try {
const name = thaiTones[ipa]!;
const numbers = thaiToneNums[ipa]!;
return { letters: ipa, name, numbers };
@@ -164,71 +214,44 @@ function parseTone(ipa: string, spelling: string): Tone {
}
}
-async function handleSyllable(
+type SylData = {
+ idx: number;
+ stressed: boolean | null;
+ spelling: string;
+ ipa: string;
+ long: boolean;
+ onset: Phoneme;
+ medial: Phoneme;
+ nucleus: Phoneme;
+ coda: Phoneme;
+ rhyme: Phoneme;
+ tone: Tone;
+ notes: string | null;
+};
+async function getSyllable(
spelling: string,
ipa: string,
- wordId: number | bigint,
idx: number,
notes: string | null,
-): AsyncRes<string> {
+): Promise<SylData> {
const sorsyl = await sorSyl(spelling, "th", ipa);
- const weird = [
- // "a̯n",
- // "a̯",
- // "a̯p",
- // "a̯w",
- // "a̯j",
- // "a̯ŋ",
- // "a̯k",
- // "a̯t",
- // "a̯m",
- // "a̯ʔ",
- // "ʔ",
- "s",
- "l",
- "f",
- "a̯s",
- "js",
- "t͡ɕʰ",
- "ks",
- "ns",
- "a̯l",
- "a̯f",
- "mk",
- ];
- // const weirder = sorsyl.syls.find((s) => weird.includes(s.coda));
- // if (weirder) {
- // console.log("syllable", spelling);
- // // console.dir(sorsyl, { depth: null });
- // // console.dir(j, { depth: null });
- // }
if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!");
const syl = sorsyl.syls[0]!.ipa;
- const tone = parseTone(syl.tone, spelling);
- // TODO add actual ortographic data here not just ipa
- try {
- pdb.addSyllable(
- wordId,
- idx + 1,
- null,
- "th",
- syl.all,
- syl.long,
- spelling,
- { spelling: syl.onset, ipa: syl.onset },
- { spelling: syl.medial, ipa: syl.medial },
- { spelling: syl.nucleus, ipa: syl.nucleus },
- { spelling: syl.coda, ipa: syl.coda },
- { spelling: syl.rhyme, ipa: syl.rhyme },
- tone,
- notes,
- );
- return { ok: "" };
- } catch (e) {
- // console.log("well fuck", syl);
- // console.error(e);
- return { error: `meh ${e}` };
- }
+ const tone = parseToneS(syl.tone, spelling);
+ return {
+ idx: idx + 1,
+ stressed: null,
+ spelling,
+ ipa: syl.all,
+ long: syl.long,
+ onset: { spelling: syl.onset, ipa: syl.onset },
+ medial: { spelling: syl.medial, ipa: syl.medial },
+ nucleus: { spelling: syl.nucleus, ipa: syl.nucleus },
+ coda: { spelling: syl.coda, ipa: syl.coda },
+ rhyme: { spelling: syl.rhyme, ipa: syl.rhyme },
+ tone,
+ notes,
+ };
}
async function handleIdiom(idiom: string): AsyncRes<string> {
pdb.addIdiom(idiom, "th");
@@ -236,33 +259,5 @@ async function handleIdiom(idiom: string): AsyncRes<string> {
// console.log();
return { ok: "" };
}
-// ช้า ๆ
-// งก ๆ
-// หงก ๆ
-
-async function getFrequency() {
- const files = [
- "/home/y/code/prosody/resources/langdata/thai/data/1yin_freq.csv",
- "/home/y/code/prosody/resources/langdata/thai/data/2yin_freq.csv",
- "/home/y/code/prosody/resources/langdata/thai/data/3yin_freq.csv",
- "/home/y/code/prosody/resources/langdata/thai/data/4yin_freq.csv",
- "/home/y/code/prosody/resources/langdata/thai/data/5yin_freq.csv",
- "/home/y/code/prosody/resources/langdata/thai/data/6yin_freq.csv",
- ];
- const freqMap = new Map<number, string>();
- for (const file of files) {
- await handleFile(file, (line, idx) => {
- const [spelling, IPA, tone, length, frequency, ...rest] = line.split(",");
- freqMap.set(Number(frequency!), spelling!);
- });
- }
- const orderedMap = new Map<string, number>();
- const keys = Array.from(freqMap.keys()).sort();
- for (let i = 0; i < keys.length; i++) {
- const val = freqMap.get(keys[i]!)!;
- orderedMap.set(val, i + 1);
- }
- return orderedMap;
-}
readDump("th");
diff --git a/src/lib/db/thaiseedold.ts b/src/lib/db/thaiseedold.ts
new file mode 100644
index 0000000..b9522dd
--- /dev/null
+++ b/src/lib/db/thaiseedold.ts
@@ -0,0 +1,301 @@
+import Database from "bun:sqlite";
+import {
+ analyzeTHWord,
+ deconstructSyllable,
+ segmentateThai,
+ type SorSyl,
+ type ThaiNLPRes,
+ sorSyl,
+ getThaiFreq,
+} from "../calls/nlp";
+import pdb from "./prosodydb";
+import { cleanIpa } from "../utils";
+import { handleFile } from "./utils";
+import { Tone } from "../types/phonetics";
+import { AsyncRes } from "../types";
+
+async function readDump(lang: string) {
+ await pdb.init();
+ pdb.addLanguage("th", "thai");
+ let count = 0;
+ const langdb = new Database(
+ `/home/y/code/prosody/resources/wiktionary/${lang}.db`,
+ );
+ let langrows: any = langdb.query("SELECT data FROM langs");
+ // langrows = langrows.slice(10);
+ for (const langrow of langrows) {
+ count++;
+ console.log(count);
+ // if (count <= 10000) continue;
+ // if (count > 100) break;
+ const j = JSON.parse(langrow.data);
+ const word = j.word.trim();
+ if (!word) continue;
+
+ if (word.includes("ๆ")) {
+ const res = await handleWord(word, j);
+ if ("error" in res) {
+ if (res.error.includes("meh")) continue;
+ if (res.error.includes("wtf")) {
+ console.error(res.error);
+ console.error(j.sounds);
+ }
+ break;
+ }
+ } else {
+ const split = word.split(" ");
+ if (split.length > 1) {
+ const res = await handleIdiom(word);
+ if ("error" in res) {
+ console.error(res.error);
+ break;
+ }
+ } else {
+ const res = await handleWord(word, j);
+ if ("error" in res) {
+ if (res.error.includes("meh")) continue;
+ if (res.error.includes("wtf")) {
+ console.error(res.error);
+ console.error(j.sounds);
+ }
+ // break;
+ }
+ }
+ }
+ }
+}
+
+// if (wordId == 478 || word === "และ") {
+// // console.log("wtf man");
+// // console.dir(j, { depth: null });
+// // return { error: "i said wtf" };
+// }
+async function handleWord(word: string, j: any): AsyncRes<string> {
+ // TODO add categories but add a tag to see what classifying scheme we're using
+ //
+ const sounds = j.sounds || [];
+ const hasIpa = sounds.find((s: any) => "ipa" in s);
+ if (!hasIpa) return { error: "meh no ipa" };
+ const freq = await getThaiFreq(word);
+ const wordId = pdb.addWord(word, "th", freq, null);
+ const analyzed = await analyzeTHWord(word);
+ for (let snd of sounds)
+ if ("ipa" in snd) {
+ const res = await handleIpa(wordId, j, snd, analyzed);
+ if ("error" in res) return res;
+ }
+ return { ok: "" };
+}
+async function handleIpa(
+ wordId: number | bigint,
+ j: any,
+ snd: any,
+ analyzed: ThaiNLPRes,
+): AsyncRes<string> {
+ console.log();
+ const tags = JSON.stringify(snd.tags) || null;
+ // console.log("handleipa", analyzed.syllables.length);
+ // console.log(analyzed);
+ const wikiIpa = cleanIpa(snd.ipa);
+ const nlpIpa = cleanIpa(analyzed.ipa);
+ const ipa = wikiIpa || nlpIpa;
+ // if (j.word === "และ") {
+ // console.log("wtf!!");
+ // return { error: "wtf is this" };
+ // }
+ const wikiIpaSplit = wikiIpa.split(".");
+ const nlpIpaSplit = nlpIpa.split(".");
+ if (wikiIpaSplit.length !== nlpIpaSplit.length) {
+ // console.log("ipa mismatch");
+ // console.log(wikiIpa);
+ // console.log(nlpIpa);
+ }
+ if (analyzed.realSyls.length !== wikiIpaSplit.length) {
+ // console.log("syllable analysis mismatch", j.word);
+ // console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit });
+ // console.dir(j, { depth: null });
+ return { error: "meh syllable analysis mismatch" };
+ }
+ const writtenSyls = analyzed.syllables;
+ const pronouncedSyls = analyzed.realSyls.map((s) =>
+ s.replace(/\u{E3A}/u, ""),
+ );
+ let badSyls = false;
+ if (writtenSyls.length !== pronouncedSyls.length) badSyls = true;
+
+ const tone_sequence = wikiIpaSplit
+ .map((s) => parseTone(s, j.word))
+ .map((t) => t.name)
+ .join(",");
+ const syl_sequence = pronouncedSyls.join(",");
+ const ipa_sequence = wikiIpaSplit.join(",");
+ pdb.addPronunciation(
+ wordId,
+ ipa,
+ pronouncedSyls.length,
+ syl_sequence,
+ tone_sequence,
+ ipa_sequence,
+ tags,
+ null,
+ );
+
+ for (let i = 0; i < pronouncedSyls.length; i++) {
+ const pronounced = pronouncedSyls[i]!;
+ const written = writtenSyls[i] || "";
+ const syllable = badSyls ? pronounced : written;
+ const ipa = wikiIpaSplit[i]!;
+ // TODO insert both??
+ const notes = pronounced === written ? null : `Pronounced ${pronounced}`;
+ if (pronounced !== syllable) {
+ console.log("diff");
+ console.log(pronounced);
+ console.log(written);
+ }
+ const res = await handleSyllable(syllable, ipa, wordId, i, notes);
+ if ("error" in res) return res;
+ }
+ return { ok: "" };
+}
+const thaiTones: Record<string, string> = {
+ "˧": "mid",
+ "˨˩": "low",
+ "˥˩": "falling",
+ "˦˥": "high",
+ "˩˩˦": "rising",
+};
+const thaiToneNums: Record<string, number> = {
+ "˧": 33,
+ "˨˩": 21,
+ "˥˩": 41,
+ "˦˥": 45,
+ "˩˩˦": 214,
+};
+const toneRegex = new RegExp(Object.keys(thaiToneNums).join("|"));
+
+function parseTone(ipa: string, spelling: string): Tone {
+ try {
+ const match = ipa.match(toneRegex)!;
+ const m = match[0]!;
+ const name = thaiTones[m]!;
+ const numbers = thaiToneNums[m]!;
+ return { letters: ipa, name, numbers };
+ } catch (e) {
+ console.error("meh wrong tones!!", { s: spelling, ipa });
+ throw new Error("");
+ }
+}
+function parseToneS(ipa: string, spelling: string): Tone {
+ try {
+ const name = thaiTones[ipa]!;
+ const numbers = thaiToneNums[ipa]!;
+ return { letters: ipa, name, numbers };
+ } catch (e) {
+ console.error("meh wrong tones!!", { s: spelling, ipa });
+ throw new Error("");
+ }
+}
+
+async function handleSyllable(
+ spelling: string,
+ ipa: string,
+ wordId: number | bigint,
+ idx: number,
+ notes: string | null,
+): AsyncRes<string> {
+ const sorsyl = await sorSyl(spelling, "th", ipa);
+ // console.log("ssyl", sorsyl.syls);
+ const weird = [
+ // "a̯n",
+ // "a̯",
+ // "a̯p",
+ // "a̯w",
+ // "a̯j",
+ // "a̯ŋ",
+ // "a̯k",
+ // "a̯t",
+ // "a̯m",
+ // "a̯ʔ",
+ // "ʔ",
+ "s",
+ "l",
+ "f",
+ "a̯s",
+ "js",
+ "t͡ɕʰ",
+ "ks",
+ "ns",
+ "a̯l",
+ "a̯f",
+ "mk",
+ ];
+ // const weirder = sorsyl.syls.find((s) => weird.includes(s.coda));
+ // if (weirder) {
+ // console.log("syllable", spelling);
+ // // console.dir(sorsyl, { depth: null });
+ // // console.dir(j, { depth: null });
+ // }
+ if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!");
+ const syl = sorsyl.syls[0]!.ipa;
+ const tone = parseToneS(syl.tone, spelling);
+ // TODO add actual ortographic data here not just ipa
+ try {
+ pdb.addSyllable(
+ wordId,
+ idx + 1,
+ null,
+ "th",
+ syl.all,
+ syl.long,
+ spelling,
+ { spelling: syl.onset, ipa: syl.onset },
+ { spelling: syl.medial, ipa: syl.medial },
+ { spelling: syl.nucleus, ipa: syl.nucleus },
+ { spelling: syl.coda, ipa: syl.coda },
+ { spelling: syl.rhyme, ipa: syl.rhyme },
+ tone,
+ notes,
+ );
+ return { ok: "" };
+ } catch (e) {
+ // console.log("well fuck", syl);
+ // console.error(e);
+ return { error: `meh ${e}` };
+ }
+}
+async function handleIdiom(idiom: string): AsyncRes<string> {
+ pdb.addIdiom(idiom, "th");
+ // TODO later set idiom_words once all words are populated
+ // console.log();
+ return { ok: "" };
+}
+// ช้า ๆ
+// งก ๆ
+// หงก ๆ
+
+async function getFrequency() {
+ const files = [
+ "/home/y/code/prosody/resources/langdata/thai/data/1yin_freq.csv",
+ "/home/y/code/prosody/resources/langdata/thai/data/2yin_freq.csv",
+ "/home/y/code/prosody/resources/langdata/thai/data/3yin_freq.csv",
+ "/home/y/code/prosody/resources/langdata/thai/data/4yin_freq.csv",
+ "/home/y/code/prosody/resources/langdata/thai/data/5yin_freq.csv",
+ "/home/y/code/prosody/resources/langdata/thai/data/6yin_freq.csv",
+ ];
+ const freqMap = new Map<number, string>();
+ for (const file of files) {
+ await handleFile(file, (line, idx) => {
+ const [spelling, IPA, tone, length, frequency, ...rest] = line.split(",");
+ freqMap.set(Number(frequency!), spelling!);
+ });
+ }
+ const orderedMap = new Map<string, number>();
+ const keys = Array.from(freqMap.keys()).sort();
+ for (let i = 0; i < keys.length; i++) {
+ const val = freqMap.get(keys[i]!)!;
+ orderedMap.set(val, i + 1);
+ }
+ return orderedMap;
+}
+
+readDump("th");
diff --git a/src/lib/types/phonetics.ts b/src/lib/types/phonetics.ts
index 0009e78..f7289c7 100644
--- a/src/lib/types/phonetics.ts
+++ b/src/lib/types/phonetics.ts
@@ -20,3 +20,7 @@ export type Syllable = {
rhyme: Phoneme;
tone: Tone;
};
+
+export type ToneQuery = Array<string | null>;
+export type MutationType = { change: string } | { keep: string };
+export type MutationOrder = MutationType[];
diff --git a/src/lib/utils.ts b/src/lib/utils.ts
index 0674dea..0f0c084 100644
--- a/src/lib/utils.ts
+++ b/src/lib/utils.ts
@@ -63,3 +63,8 @@ export function cleanIpa(ipa: string): string {
const r2 = /[\[\]\/]/g;
return ipa.replace(r1, "").replace(r2, "");
}
+
+export function randomFromArray<T>(arr: T[]): T {
+ const idx = Math.floor(Math.random() * arr.length);
+ return arr[idx]!;
+}
diff --git a/src/pages/api/tts.ts b/src/pages/api/tts.ts
new file mode 100644
index 0000000..bd9a697
--- /dev/null
+++ b/src/pages/api/tts.ts
@@ -0,0 +1,81 @@
+// import db from "../../lib/db";
+import { randomFromArray } from "@/lib/utils";
+import { z } from "zod";
+
+export const GET = async (request: Request): Promise<Response> => {
+ const url = URL.parse(request.url)!;
+ const params = url?.searchParams;
+ const word = params.get("word")!;
+ const lang = params.get("lang")!;
+
+ try {
+ const res = await tts(word, lang);
+ return res;
+ } catch (error) {
+ return Response.json({ message: "Failure" }, { status: 500 });
+ }
+};
+const thaiVoices = [
+ [
+ "s3://voice-cloning-zero-shot/4353be7d-8cd3-4452-9e0b-bc4078c240d7/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/4c495e1a-1352-4187-99eb-6e5dc7d55059/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/59933136-5aca-4f42-827f-d354649c62a2/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/ba9eb1c9-8897-4c41-9c79-f2cb428544a8/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/bb585812-1c85-4a16-90f7-09c24b6c8186/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/e1357526-c162-441b-afb9-285d3d21b9b4/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/edd305a3-9cd2-4dd6-873f-9efc1f73aefc/original/manifest.json",
+ "PlayDialog",
+ ],
+ [
+ "s3://voice-cloning-zero-shot/f80c355d-1075-4d2b-a53d-bb26aa4d1453/original/manifest.json",
+ "PlayDialog",
+ ],
+];
+
+async function tts(text: string, language: string) {
+ const USER_ID = Bun.env.PLAYHT_USER_ID!;
+ const API_KEY = Bun.env.PLAYHT_API_KEY!;
+ const [voice, voice_engine] = randomFromArray(thaiVoices);
+ console.log("tts", text);
+ const url = "https://api.play.ht/api/v2/tts/stream";
+ const options = {
+ method: "POST",
+ headers: {
+ accept: "*/*",
+ "content-type": "application/json",
+ "X-USER-ID": USER_ID,
+ AUTHORIZATION: API_KEY,
+ },
+ body: JSON.stringify({
+ text,
+ voice,
+ // wav, mp3, ogg, flac, mulaw
+ output_format: "wav",
+ quality: "high",
+ voice_engine,
+ language,
+ temperature: 0.7,
+ }),
+ };
+
+ const res = await fetch(url, options);
+ return res;
+}
diff --git a/src/pages/tones.tsx b/src/pages/tones.tsx
index 96ed56c..732ebd1 100644
--- a/src/pages/tones.tsx
+++ b/src/pages/tones.tsx
@@ -12,9 +12,9 @@ export const getConfig = async () => {
// Function to fetch the initial word on the server
async function InitialWordLoader() {
// Fetch a random 1-syllable Thai word with any tone initially
- const initialWord = await fetchWordsByToneAndSyllables(["rising", "mid"]);
- console.log({ initialWord });
- return <ToneSelectorClient initialWord={initialWord} />;
+ const tones = ["falling", "falling"];
+ const initialWords = await fetchWordsByToneAndSyllables(tones);
+ return <ToneSelectorClient initialData={initialWords} initialTones={tones} />;
}
// Loading fallback component