summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bun.lock7
-rw-r--r--package.json1
-rw-r--r--src/actions/lang.ts10
-rw-r--r--src/actions/tones.ts87
-rw-r--r--src/components/Flashcard/ServerCard.tsx43
-rw-r--r--src/components/Flashcard/Syllable.tsx44
-rw-r--r--src/components/Flashcard/SyllableModal.tsx110
-rw-r--r--src/components/Flashcard/SyllableSpan.tsx45
-rw-r--r--src/components/lang/ThaiPhonology.tsx250
-rw-r--r--src/components/tones/ToneSelectorClient.tsx199
-rw-r--r--src/components/ui/skeleton.tsx13
-rw-r--r--src/lib/calls/nlp.ts54
-rw-r--r--src/lib/db/codes.js203
-rw-r--r--src/lib/db/index.ts8
-rw-r--r--src/lib/db/prosodydb.ts238
-rw-r--r--src/lib/db/prosodyschema.sql178
-rw-r--r--src/lib/db/seed.ts212
-rw-r--r--src/lib/db/senseschema.sql54
-rw-r--r--src/lib/lang/utils.ts28
-rw-r--r--src/lib/types/cards.ts13
-rw-r--r--src/pages.gen.ts3
-rw-r--r--src/pages/api/nlp.ts38
-rw-r--r--src/pages/lesson/[slug].tsx25
-rw-r--r--src/pages/tones.tsx62
-rw-r--r--src/zoom/ServerSyllable.tsx84
-rw-r--r--src/zoom/ServerWord.tsx2
-rw-r--r--src/zoom/logic/types.ts12
27 files changed, 1954 insertions, 69 deletions
diff --git a/bun.lock b/bun.lock
index 32c90fc..a7b9f21 100644
--- a/bun.lock
+++ b/bun.lock
@@ -22,6 +22,7 @@
"react-hook-form": "^7.56.3",
"react-server-dom-webpack": "19.1.0",
"sonner": "^2.0.3",
+ "sorsyl": "file:../../libs/sorsyl",
"sortug-ai": "file:../../libs/models",
"tailwind-merge": "^3.2.0",
"tw-animate-css": "^1.2.9",
@@ -763,6 +764,8 @@
"sonner": ["sonner@2.0.3", "", { "peerDependencies": { "react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc", "react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc" } }, "sha512-njQ4Hht92m0sMqqHVDL32V2Oun9W1+PHO9NDv9FHfJjT3JT22IG4Jpo3FPQy+mouRKCXFWO+r67v6MrHX2zeIA=="],
+ "sorsyl": ["sorsyl@file:../../libs/sorsyl", { "devDependencies": { "@types/bun": "latest" }, "peerDependencies": { "typescript": "^5" } }],
+
"sortug-ai": ["models@file:../../libs/models", { "dependencies": { "@anthropic-ai/sdk": "^0.36.3", "@google/genai": "^0.13.0", "@google/generative-ai": "^0.21.0", "bcp-47": "^2.1.0", "franc-all": "^7.2.0", "groq-sdk": "^0.15.0", "iso-639-3": "^3.0.1", "openai": "^4.84.0", "playht": "^0.16.0", "replicate": "^1.0.1", "sortug": "file://home/y/code/npm/sortug" }, "devDependencies": { "@types/bun": "^1.2.12" }, "peerDependencies": { "typescript": "^5.7.3" } }],
"source-map": ["source-map@0.6.1", "", {}, "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g=="],
@@ -875,6 +878,8 @@
"openai/@types/node": ["@types/node@18.19.100", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-ojmMP8SZBKprc3qGrGk8Ujpo80AXkrP7G2tOT4VWr5jlr5DHjsJF+emXJz+Wm0glmy4Js62oKMdZZ6B9Y+tEcA=="],
+ "sorsyl/@types/bun": ["@types/bun@1.2.14", "", { "dependencies": { "bun-types": "1.2.14" } }, "sha512-VsFZKs8oKHzI7zwvECiAJ5oSorWndIWEVhfbYqZd4HI/45kzW7PN2Rr5biAzvGvRuNmYLSANY+H59ubHq8xw7Q=="],
+
"sortug-ai/sortug": ["sortug@file:../../../npm/sortug", {}],
"@anthropic-ai/sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="],
@@ -884,5 +889,7 @@
"groq-sdk/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="],
"openai/@types/node/undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="],
+
+ "sorsyl/@types/bun/bun-types": ["bun-types@1.2.14", "", { "dependencies": { "@types/node": "*" } }, "sha512-Kuh4Ub28ucMRWeiUUWMHsT9Wcbr4H3kLIO72RZZElSDxSu7vpetRvxIUDUaW6QtaIeixIpm7OXtNnZPf82EzwA=="],
}
}
diff --git a/package.json b/package.json
index 8b39a03..b90bf0a 100644
--- a/package.json
+++ b/package.json
@@ -28,6 +28,7 @@
"react-server-dom-webpack": "19.1.0",
"sonner": "^2.0.3",
"sortug-ai": "file:../../libs/models",
+ "sorsyl": "file:../../libs/sorsyl",
"tailwind-merge": "^3.2.0",
"tw-animate-css": "^1.2.9",
"waku": "0.22.4",
diff --git a/src/actions/lang.ts b/src/actions/lang.ts
index b38b542..e68fbfe 100644
--- a/src/actions/lang.ts
+++ b/src/actions/lang.ts
@@ -2,7 +2,8 @@
import { AsyncRes } from "@/lib/types";
import { NLP } from "sortug-ai";
import ServerWord from "@/zoom/ServerWord";
-import { analyzeTHWord, segmentateThai } from "@/pages/api/nlp";
+import { analyzeTHWord, segmentateThai } from "@/lib/calls/nlp";
+import SyllableModal from "@/components/Flashcard/SyllableModal";
// import db from "../lib/db";
export async function wordAction(
@@ -12,6 +13,13 @@ export async function wordAction(
console.log("");
return ServerWord({ word: text, lang });
}
+export async function syllableAction(
+ text: string,
+ lang: string,
+): Promise<React.ReactNode> {
+ console.log("");
+ return SyllableModal({ text, lang });
+}
export async function thaiAnalysis(text: string) {
const res = await segmentateThai(text);
diff --git a/src/actions/tones.ts b/src/actions/tones.ts
new file mode 100644
index 0000000..8089453
--- /dev/null
+++ b/src/actions/tones.ts
@@ -0,0 +1,87 @@
+'use server';
+
+import db from '@/lib/db';
+import { WordData } from '@/zoom/logic/types';
+
+// Helper to extract tone from prosody - assuming prosody is an array of objects like [{tone: number}, ...]
+const getTonesFromProsody = (prosody: any): number[] | null => {
+ if (Array.isArray(prosody) && prosody.length > 0) {
+ return prosody.map(p => p.tone).filter(t => typeof t === 'number');
+ }
+ return null;
+};
+
+export async function fetchWordsByToneAndSyllables(
+ syllableCount: number,
+ tones: (number | null)[] // Array of tones, one for each syllable. null means any tone.
+): Promise<WordData | null> {
+ if (syllableCount !== tones.length) {
+ console.error("Syllable count and tones array length mismatch");
+ return null;
+ }
+
+ const queryParams: (string | number)[] = ['th', syllableCount, syllableCount]; // lang, syllables (for WHERE), syllables (for json_array_length)
+ let toneConditions = "";
+
+ const toneClauses: string[] = [];
+ tones.forEach((tone, index) => {
+ if (tone !== null && typeof tone === 'number') {
+ // Assumes SQLite's json_extract function is available and prosody is like: [{"tone": 1}, {"tone": 3}, ...]
+ // Path for first syllable's tone: '$[0].tone'
+ toneClauses.push(`json_extract(prosody, '$[${index}].tone') = ?`);
+ queryParams.push(tone);
+ }
+ });
+
+ if (toneClauses.length > 0) {
+ toneConditions = `AND ${toneClauses.join(' AND ')}`;
+ }
+
+ const queryString = `
+ SELECT id, spelling, prosody, syllables, lang, type, frequency, confidence, ipa, senses_array
+ FROM expressions
+ WHERE lang = ?
+ AND syllables = ?
+ AND type = 'word'
+ AND json_valid(prosody)
+ AND json_array_length(prosody) = ? -- Ensures prosody array has correct number of elements
+ ${toneConditions}
+ ORDER BY RANDOM() -- Get a random word matching criteria
+ LIMIT 1
+ `;
+
+ try {
+ const query = db.db.query(queryString);
+ const row = query.get(...queryParams) as any;
+
+ if (!row) return null;
+
+ // Map to WordData (simplified, similar to initial fetch in tones.tsx or db.fetchWordBySpelling)
+ // This mapping might need to be more robust depending on actual WordData requirements.
+ const word: WordData = {
+ id: row.id,
+ spelling: row.spelling,
+ prosody: JSON.parse(row.prosody),
+ syllables: row.syllables,
+ lang: row.lang,
+ type: row.type,
+ frequency: row.frequency,
+ confidence: row.confidence,
+ ipa: row.ipa ? JSON.parse(row.ipa) : [],
+ // Senses parsing is simplified here. Adjust if full sense data is needed.
+ senses: row.senses_array ? JSON.parse(row.senses_array).map((s: any) => ({
+ pos: s.pos,
+ senses: typeof s.senses === 'string' ? JSON.parse(s.senses) : s.senses,
+ forms: typeof s.forms === 'string' ? JSON.parse(s.forms) : s.forms,
+ etymology: s.etymology,
+ related: typeof s.related === 'string' ? JSON.parse(s.related) : s.related,
+ })) : [],
+ };
+ return word;
+ } catch (error) {
+ console.error("Error fetching word by tone and syllables:", error);
+ console.error("Query:", queryString);
+ console.error("Params:", queryParams);
+ return null;
+ }
+}
diff --git a/src/components/Flashcard/ServerCard.tsx b/src/components/Flashcard/ServerCard.tsx
index d377dce..df37ba8 100644
--- a/src/components/Flashcard/ServerCard.tsx
+++ b/src/components/Flashcard/ServerCard.tsx
@@ -23,13 +23,21 @@ import {
WordData,
} from "@/zoom/logic/types";
import { CardResponse } from "@/lib/types/cards";
-import { thaiData } from "@/pages/api/nlp";
+import { thaiData } from "@/lib/calls/nlp";
import { getRandomHexColor } from "@/lib/utils";
import { BookmarkIconito } from "./BookmarkButton";
+import SyllableSpan from "./SyllableSpan";
+import SyllableCard from "./Syllable";
-export async function CardFront({ data }: { data: CardResponse }) {
+export async function CardFront({
+ data,
+ needFetch = true,
+}: {
+ data: CardResponse;
+ needFetch?: boolean;
+}) {
// const extraData = data.expression.lang
- const extraData = await thaiData(data.expression.spelling);
+ const extraData = needFetch ? await thaiData(data.expression.spelling) : [];
// console.log({ extraData });
return (
@@ -42,15 +50,26 @@ export async function CardFront({ data }: { data: CardResponse }) {
}
>
<p className="text-5xl cursor-pointer font-semibold text-slate-800 dark:text-slate-100 text-center">
- {extraData[0]?.syllables.map((syl, i) => (
- <span
- key={syl + i}
- style={{ color: getRandomHexColor() }}
- className="m-1 hover:text-6xl"
- >
- {syl}
- </span>
- ))}
+ {needFetch ? (
+ extraData[0]?.syllables.map((syl, i) => (
+ // <span
+ // key={syl + i}
+ // style={{ color: getRandomHexColor() }}
+ // className="m-1 hover:text-6xl"
+ // >
+ // {syl}
+ // </span>
+ <SyllableSpan
+ key={syl + i}
+ spelling={syl}
+ lang={data.expression.lang}
+ />
+ ))
+ ) : (
+ <p className="text-5xl cursor-pointer hover:text-blue-700 font-semibold text-slate-800 dark:text-slate-100 text-center">
+ {data.expression.spelling}
+ </p>
+ )}
</p>
</Suspense>
<IpaDisplay ipaEntries={data.expression.ipa} />
diff --git a/src/components/Flashcard/Syllable.tsx b/src/components/Flashcard/Syllable.tsx
new file mode 100644
index 0000000..e470a4b
--- /dev/null
+++ b/src/components/Flashcard/Syllable.tsx
@@ -0,0 +1,44 @@
+"use client";
+
+import { syllableAction, thaiAnalysis } from "@/actions/lang";
+import { CardResponse } from "@/lib/types/cards";
+import { ReactNode, useState, useTransition } from "react";
+import { Spinner } from "../ui/spinner";
+import Modal from "@/components/Modal";
+import { getRandomHexColor } from "@/lib/utils";
+
+const SyllableCard: React.FC<{ data: CardResponse }> = ({ data }) => {
+ return (
+ <div className="absolute w-full h-full bg-white dark:bg-slate-800 rounded-xl backface-hidden flex flex-col justify-center gap-8 items-center p-6">
+ <p className="text-5xl cursor-pointer hover:text-blue-700 font-semibold text-slate-800 dark:text-slate-100 text-center">
+ {data.expression.spelling}
+ </p>
+ <IpaDisplay ipaEntries={data.expression.ipa} />
+ </div>
+ );
+};
+
+export default SyllableCard;
+
+const IpaDisplay = ({
+ ipaEntries,
+}: {
+ ipaEntries: Array<{ ipa: string; tags?: string[] }>;
+}) => {
+ if (!ipaEntries || ipaEntries.length === 0) return null;
+ return (
+ <div className="flex items-center space-x-2 flex-wrap">
+ {ipaEntries.map((entry, index) => {
+ const tags = entry.tags ? entry.tags : [];
+ return (
+ <span key={index} className="text-lg text-blue-600 font-serif">
+ {entry.ipa}{" "}
+ {tags.length > 0 && (
+ <span className="text-xs text-gray-500">({tags.join(", ")})</span>
+ )}
+ </span>
+ );
+ })}
+ </div>
+ );
+};
diff --git a/src/components/Flashcard/SyllableModal.tsx b/src/components/Flashcard/SyllableModal.tsx
new file mode 100644
index 0000000..a00fd10
--- /dev/null
+++ b/src/components/Flashcard/SyllableModal.tsx
@@ -0,0 +1,110 @@
+// This is a Server Component
+import React from "react";
+import db from "@/lib/db";
+import {
+ Card,
+ CardHeader,
+ CardDescription,
+ CardContent,
+ CardFooter,
+ CardTitle,
+} from "@/components/ui/card";
+import { NLP } from "sortug-ai";
+import {
+ BookOpen,
+ Volume2,
+ Link as LinkIcon,
+ ChevronDown,
+ ChevronUp,
+ Search,
+ Info,
+ MessageSquareQuote,
+ Tags,
+ ListTree,
+ Lightbulb,
+} from "lucide-react";
+import {
+ Example,
+ SubSense,
+ RelatedEntry,
+ Sense,
+ WordData,
+} from "@/zoom/logic/types";
+import { isTonal } from "@/lib/lang/utils";
+
+type WordProps = { text: string; lang: string };
+export default async function (props: WordProps) {
+ const { text, lang } = props;
+ const data = db.fetchWordBySpelling(text, lang);
+
+ if (!data) return <p>oh...</p>;
+ console.log(data.senses[0]);
+ return (
+ <Card className="overflow-y-scroll max-h-[80vh]">
+ <CardHeader>
+ <CardTitle>
+ <h1 className="text-5xl">{text}</h1>
+ </CardTitle>
+ <CardDescription>
+ <IpaDisplay ipaEntries={data.ipa} />
+ </CardDescription>
+ </CardHeader>
+ <CardContent>
+ {isTonal(text) ? <Tones {...props} /> : <NotTones {...props} />}
+ </CardContent>
+ <CardFooter></CardFooter>
+ </Card>
+ );
+ // return (
+ // <div className="p-6">
+ // <h3 className="mb-2 text-2xl font-bold">{word}</h3>
+ // <p className="mb-1 text-xl text-green-600">${word.}</p>
+ // <p className="text-gray-700">{word}</p>
+ // <p className="mt-4 text-xs text-gray-500">
+ // Content rendered on the server at: {new Date().toLocaleTimeString()}
+ // </p>
+ // </div>
+ // );
+}
+
+// Helper component for IPA display
+const IpaDisplay = ({
+ ipaEntries,
+}: {
+ ipaEntries: Array<{ ipa: string; tags?: string[] }>;
+}) => {
+ if (!ipaEntries || ipaEntries.length === 0) return null;
+ return (
+ <div className="flex items-center space-x-2 flex-wrap">
+ {ipaEntries.map((entry, index) => {
+ const tags = entry.tags ? entry.tags : [];
+ return (
+ <span key={index} className="text-lg text-blue-600 font-serif">
+ {entry.ipa}{" "}
+ {tags.length > 0 && (
+ <span className="text-xs text-gray-500">({tags.join(", ")})</span>
+ )}
+ </span>
+ );
+ })}
+ <button
+ className="p-1 text-blue-500 hover:text-blue-700 transition-colors"
+ title="Pronounce"
+ // onClick={() => {
+ // /* Pronunciation logic would be client-side or a server roundtrip for audio file. */ alert(
+ // "Pronunciation feature not implemented for server component.",
+ // );
+ // }}
+ >
+ <Volume2 size={20} />
+ </button>
+ </div>
+ );
+};
+
+function Tones({ text, lang }: WordProps) {
+ return <div></div>;
+}
+function NotTones({ text, lang }: WordProps) {
+ return <div></div>;
+}
diff --git a/src/components/Flashcard/SyllableSpan.tsx b/src/components/Flashcard/SyllableSpan.tsx
new file mode 100644
index 0000000..445895e
--- /dev/null
+++ b/src/components/Flashcard/SyllableSpan.tsx
@@ -0,0 +1,45 @@
+"use client";
+
+import { syllableAction, thaiAnalysis } from "@/actions/lang";
+import { CardResponse } from "@/lib/types/cards";
+import { ReactNode, useState, useTransition } from "react";
+import { Spinner } from "../ui/spinner";
+import Modal from "@/components/Modal";
+import { getRandomHexColor } from "@/lib/utils";
+
+const SyllableSpan: React.FC<{ spelling: string; lang: string }> = ({
+ spelling,
+ lang,
+}) => {
+ const [modalContent, setModalContent] = useState<ReactNode | null>(null);
+
+ const closeModal = () => setModalContent(null);
+
+ const [isPending, startTransition] = useTransition();
+ const handleClick = (e: React.MouseEvent) => {
+ e.stopPropagation();
+ startTransition(async () => {
+ const modal = await syllableAction(spelling, lang);
+ setModalContent(modal);
+ });
+ };
+
+ return (
+ <>
+ <span
+ onClick={handleClick}
+ className="m-1 hover:text-6xl"
+ style={{ color: getRandomHexColor() }}
+ >
+ {spelling}
+ </span>
+ {modalContent && (
+ <Modal onClose={closeModal} isOpen={!!modalContent}>
+ {modalContent}
+ </Modal>
+ )}
+ </>
+ );
+};
+
+export default SyllableSpan;
diff --git a/src/components/lang/ThaiPhonology.tsx b/src/components/lang/ThaiPhonology.tsx
new file mode 100644
index 0000000..199d0b8
--- /dev/null
+++ b/src/components/lang/ThaiPhonology.tsx
@@ -0,0 +1,250 @@
+// import React from "react";
+
+// /**
+// * ThaiConsonantGrid – a visual table of Thai consonants modelled after the
+// * traditional Sanskrit‑style chart. Each cell shows the Thai glyph and its
+// * IPA. Rows are places of articulation, columns are manners. Colours follow
+// * the pedagogical convention from the reference screenshot.
+// */
+// export default function ThaiConsonantGrid() {
+// /** Column headers in display order */
+// const cols = [
+// "stopped",
+// "aspirated",
+// "voiced",
+// "voiced‑aspirated",
+// "nasal",
+// "semiVowel",
+// "sibilant",
+// "H‑aspirate",
+// "throatBase",
+// "others",
+// ] as const;
+
+// /** Row headers in display order */
+// const rows = [
+// "Deep Throat",
+// "guttural",
+// "palatal",
+// "cerebral",
+// "dental",
+// "labial",
+// ] as const;
+// type ArticulationPoint =
+// | "bilabial"
+// | "labiodental"
+// | "dental"
+// | "alveolar"
+// | "postalveolar"
+// | "retroflex"
+// | "palatal"
+// | "velar"
+// | "uvular"
+// | "pharyngeal"
+// | "glottal";
+// type ArticulationMode =
+// | "plosive"
+// | "nasal"
+// | "trill"
+// | "flap"
+// | "fricative"
+// | "affricate"
+// | "lateral fricative"
+// | "approximant"
+// | "lateral approximant";
+// type Voicing = "unvoiced" | "voiced" | "aspirated" | "voiced aspirated";
+// type VowelHeight = "high" | "close-mid" | "open-mid" | "open";
+// type VowelFront = "front" | "central" | "back";
+// type VowelRound = "rounded" | "unrounded";
+
+// /**
+// * Minimal description for each consonant we want to render. Position is
+// * given by its (rowIdx, colIdx). The colour is a Tailwind background class
+// * so you can tweak the palette in one place.
+// */
+// interface Cell {
+// row: number; // 0‑based index into rows
+// col: number; // 0‑based index into cols
+// glyph: string;
+// ipa: string;
+// colour: string; // Tailwind bg‑* class
+// highlight?: boolean; // optional neon border
+// class: "high" | "mid" | "low";
+// }
+
+// const cells: Cell[] = [
+// // ───────────────────── guttural row (index 1) ──────────────────────
+// {
+// row: 1,
+// col: 0,
+// glyph: "ก",
+// class: "high",
+// ipa: "/k/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 1,
+// glyph: "ข",
+// class: "high",
+// ipa: "/kʰ/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 1,
+// glyph: "ฃ",
+// class: "high",
+// ipa: "/kʰ/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 2,
+// glyph: "ค",
+// class: "high",
+// ipa: "/kʰ/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 2,
+// glyph: "ฅ",
+// class: "high",
+// ipa: "/kʰ/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 2,
+// glyph: "ฆ",
+// class: "high",
+// ipa: "/kʰ/",
+// colour: "bg-sky-500",
+// },
+// {
+// row: 1,
+// col: 4,
+// glyph: "ง",
+// ipa: "/ŋ/",
+// colour: "bg-sky-500",
+// highlight: true,
+// },
+
+// // ───────────────────── palatal row (index 2) ───────────────────────
+// { row: 2, col: 0, glyph: "จ", ipa: "/tɕ/", colour: "bg-pink-500" },
+// { row: 2, col: 1, glyph: "ฉ", ipa: "/tɕʰ/", colour: "bg-pink-500" },
+// { row: 2, col: 2, glyph: "ช", ipa: "/tɕʰ/", colour: "bg-pink-500" },
+// { row: 2, col: 2, glyph: "ซ", ipa: "/s/", colour: "bg-pink-500" },
+// { row: 2, col: 3, glyph: "ฌ", ipa: "/tɕʰ/", colour: "bg-pink-500" },
+// { row: 2, col: 5, glyph: "ญ", ipa: "/j/", colour: "bg-pink-500" },
+
+// // ───────────────────── cerebral row (index 3) ──────────────────────
+// { row: 3, col: 0, glyph: "ฎ", ipa: "/d/", colour: "bg-emerald-700" },
+// { row: 3, col: 0, glyph: "ฐ", ipa: "/t/", colour: "bg-emerald-700" },
+// { row: 3, col: 1, glyph: "ฏ", ipa: "/tʰ/", colour: "bg-emerald-700" },
+// {
+// row: 3,
+// col: 4,
+// glyph: "ฑ",
+// ipa: "/tʰ or d/",
+// colour: "bg-emerald-700",
+// highlight: true,
+// },
+// { row: 3, col: 3, glyph: "ฒ", ipa: "/tʰ/", colour: "bg-emerald-700" },
+// { row: 3, col: 4, glyph: "ณ", ipa: "/n/", colour: "bg-emerald-700" },
+// { row: 3, col: 5, glyph: "ศ", ipa: "/s/", colour: "bg-emerald-700" },
+// { row: 3, col: 5, glyph: "ษ", ipa: "/s/", colour: "bg-emerald-700" },
+
+// // ───────────────────── dental row (index 4) ────────────────────────
+// { row: 4, col: 0, glyph: "ต", ipa: "/d/", colour: "bg-emerald-600" },
+// { row: 4, col: 0, glyph: "ถ", ipa: "/t/", colour: "bg-emerald-600" },
+// { row: 4, col: 1, glyph: "ท", ipa: "/tʰ/", colour: "bg-emerald-600" },
+// { row: 4, col: 2, glyph: "ธ", ipa: "/tʰ/", colour: "bg-emerald-600" },
+// { row: 4, col: 4, glyph: "น", ipa: "/n/", colour: "bg-emerald-600" },
+// { row: 4, col: 6, glyph: "ส", ipa: "/s/", colour: "bg-emerald-600" },
+
+// // ───────────────────── labial row (index 5) ────────────────────────
+// { row: 5, col: 0, glyph: "บ", ipa: "/b/", colour: "bg-orange-500" },
+// { row: 5, col: 0, glyph: "ป", ipa: "/p/", colour: "bg-orange-500" },
+// { row: 5, col: 1, glyph: "ผ", ipa: "/pʰ/", colour: "bg-orange-500" },
+// { row: 5, col: 2, glyph: "พ", ipa: "/pʰ/", colour: "bg-orange-500" },
+// { row: 5, col: 2, glyph: "ฟ", ipa: "/f/", colour: "bg-orange-500" },
+// { row: 5, col: 3, glyph: "ภ", ipa: "/pʰ/", colour: "bg-orange-500" },
+// { row: 5, col: 4, glyph: "ม", ipa: "/m/", colour: "bg-orange-500" },
+// {
+// row: 5,
+// col: 9,
+// glyph: "ฟฬ",
+// ipa: "/l/",
+// colour: "bg-emerald-600",
+// highlight: true,
+// },
+
+// // ───────────────────── extra column (index^?) – throat + others ─────
+// { row: 1, col: 7, glyph: "ห", ipa: "/h/", colour: "bg-gray-400" },
+// { row: 1, col: 8, glyph: "อ", ipa: "/ʔ/", colour: "bg-gray-400" },
+// ];
+
+// return (
+// <div className="overflow-x-auto p-4">
+// {/* Column header */}
+// <div
+// className="grid"
+// style={{
+// gridTemplateColumns: `auto repeat(${cols.length}, minmax(4rem, 1fr))`,
+// }}
+// >
+// {/* top‑left empty cell */}
+// <div />
+// {cols.map((c) => (
+// <div
+// key={c}
+// className="bg-neutral-800 text-amber-300 text-center uppercase py-2 text-sm font-semibold border border-neutral-700"
+// >
+// {c}
+// </div>
+// ))}
+
+// {/* rows */}
+// {rows.map((rowLabel, ri) => (
+// <React.Fragment key={rowLabel}>
+// {/* row header */}
+// <div className="bg-neutral-900 text-amber-300 flex items-center justify-center px-2 py-1 text-xs font-bold whitespace-nowrap border border-neutral-700">
+// {rowLabel}
+// </div>
+// {/* cells within the row */}
+// {cols.map((_, ci) => {
+// // We may have multiple consonants per slot; gather them.
+// const here = cells.filter((c) => c.row === ri && c.col === ci);
+// if (here.length === 0)
+// return <div key={ci} className="border border-neutral-700" />;
+
+// return (
+// <div
+// key={ci}
+// className={[
+// "border border-neutral-700 rounded-md flex flex-col items-center justify-center gap-1 p-1 text-white",
+// here[0].colour,
+// here.some((c) => c.highlight)
+// ? "ring-2 ring-green-400"
+// : "",
+// ].join(" ")}
+// >
+// {here.map((c, i) => (
+// <span key={i} className="text-sm leading-tight text-center">
+// <span className="block text-lg font-semibold">
+// {c.glyph}
+// </span>
+// <span className="block text-xs">{c.ipa}</span>
+// </span>
+// ))}
+// </div>
+// );
+// })}
+// </React.Fragment>
+// ))}
+// </div>
+// </div>
+// );
+// }
diff --git a/src/components/tones/ToneSelectorClient.tsx b/src/components/tones/ToneSelectorClient.tsx
new file mode 100644
index 0000000..0ee9433
--- /dev/null
+++ b/src/components/tones/ToneSelectorClient.tsx
@@ -0,0 +1,199 @@
+'use client';
+
+import { useState, useEffect, useTransition } from 'react';
+import { WordData } from '@/zoom/logic/types';
+import { fetchWordsByToneAndSyllables } from '@/actions/tones';
+import { Button } from '@/components/ui/button';
+import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select';
+import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from '@/components/ui/card';
+import { Label } from '@/components/ui/label';
+import { Skeleton } from '@/components/ui/skeleton'; // For loading state
+
+// Helper to display tones prominently
+const ProminentToneDisplay = ({ wordData }: { wordData: WordData }) => {
+ if (!wordData.prosody || !Array.isArray(wordData.prosody)) {
+ return <p className="text-gray-500">No prosody data</p>;
+ }
+
+ return (
+ <div className="flex flex-col items-center mb-4">
+ <h1 className="text-6xl font-bold text-blue-600 mb-2">{wordData.spelling}</h1>
+ <div className="flex space-x-4">
+ {wordData.prosody.map((p, index) => (
+ <div key={index} className="text-center">
+ <p className="text-sm text-gray-500">Syllable {index + 1}</p>
+ <p className="text-5xl font-semibold text-indigo-500">{p.tone ?? '?'}</p>
+ </div>
+ ))}
+ </div>
+ {wordData.ipa && wordData.ipa.length > 0 && (
+ <p className="text-xl text-gray-700 mt-2">
+ {wordData.ipa.map(i => i.ipa).join(' / ')}
+ </p>
+ )}
+ </div>
+ );
+};
+
+
+export default function ToneSelectorClient({ initialWord }: { initialWord: WordData | null }) {
+ const [currentWord, setCurrentWord] = useState<WordData | null>(initialWord);
+ const [syllableCount, setSyllableCount] = useState<number>(initialWord?.syllables || 1);
+ const [selectedTones, setSelectedTones] = useState<(number | null)[]>(
+ initialWord?.prosody?.map(p => p.tone ?? null) || [null]
+ );
+ const [isLoading, startTransition] = useTransition();
+
+ useEffect(() => {
+ // Adjust selectedTones array length when syllableCount changes
+ setSelectedTones(prevTones => {
+ const newTones = Array(syllableCount).fill(null);
+ for (let i = 0; i < Math.min(prevTones.length, syllableCount); i++) {
+ newTones[i] = prevTones[i];
+ }
+ return newTones;
+ });
+ }, [syllableCount]);
+
+ const handleFetchWord = () => {
+ startTransition(async () => {
+ const word = await fetchWordsByToneAndSyllables(syllableCount, selectedTones);
+ setCurrentWord(word);
+ });
+ };
+
+ const handleSyllableCountChange = (value: string) => {
+ const count = parseInt(value, 10);
+ if (!isNaN(count) && count > 0 && count <= 5) { // Max 5 syllables for simplicity
+ setSyllableCount(count);
+ }
+ };
+
+ const handleToneChange = (syllableIndex: number, value: string) => {
+ const tone = value === 'any' ? null : parseInt(value, 10);
+ setSelectedTones(prevTones => {
+ const newTones = [...prevTones];
+ newTones[syllableIndex] = tone;
+ return newTones;
+ });
+ };
+
+ const thaiTones = [
+ { value: '1', label: '1 (Mid)' },
+ { value: '2', label: '2 (Low)' },
+ { value: '3', label: '3 (Falling)' },
+ { value: '4', label: '4 (High)' },
+ { value: '5', label: '5 (Rising)' },
+ ];
+
+ return (
+ <div className="container mx-auto p-4 max-w-2xl">
+ <Card className="mb-6">
+ <CardHeader>
+ <CardTitle>Thai Tone Explorer</CardTitle>
+ <CardDescription>Select syllable count and tones to find Thai words.</CardDescription>
+ </CardHeader>
+ <CardContent className="space-y-6">
+ <div>
+ <Label htmlFor="syllable-count" className="text-lg font-medium">Number of Syllables</Label>
+ <Select
+ value={syllableCount.toString()}
+ onValueChange={handleSyllableCountChange}
+ >
+ <SelectTrigger id="syllable-count" className="w-full md:w-1/2 mt-1">
+ <SelectValue placeholder="Select number of syllables" />
+ </SelectTrigger>
+ <SelectContent>
+ {[1, 2, 3, 4, 5].map(num => (
+ <SelectItem key={num} value={num.toString()}>
+ {num} Syllable{num > 1 ? 's' : ''}
+ </SelectItem>
+ ))}
+ </SelectContent>
+ </Select>
+ </div>
+
+ {Array.from({ length: syllableCount }).map((_, index) => (
+ <div key={index}>
+ <Label htmlFor={`tone-select-${index}`} className="text-lg font-medium">
+ Tone for Syllable {index + 1}
+ </Label>
+ <Select
+ value={selectedTones[index]?.toString() || 'any'}
+ onValueChange={(value) => handleToneChange(index, value)}
+ >
+ <SelectTrigger id={`tone-select-${index}`} className="w-full md:w-1/2 mt-1">
+ <SelectValue placeholder={`Select tone for syllable ${index + 1}`} />
+ </SelectTrigger>
+ <SelectContent>
+ <SelectItem value="any">Any Tone</SelectItem>
+ {thaiTones.map(tone => (
+ <SelectItem key={tone.value} value={tone.value}>
+ {tone.label}
+ </SelectItem>
+ ))}
+ </SelectContent>
+ </Select>
+ </div>
+ ))}
+ </CardContent>
+ <CardFooter>
+ <Button onClick={handleFetchWord} disabled={isLoading} className="w-full md:w-auto">
+ {isLoading ? 'Searching...' : 'Find Word'}
+ </Button>
+ </CardFooter>
+ </Card>
+
+ {isLoading && !currentWord && (
+ <Card>
+ <CardHeader><Skeleton className="h-12 w-3/4" /></CardHeader>
+ <CardContent className="space-y-4">
+ <Skeleton className="h-8 w-1/2" />
+ <Skeleton className="h-20 w-full" />
+ <Skeleton className="h-6 w-full" />
+ </CardContent>
+ </Card>
+ )}
+
+ {!isLoading && currentWord && (
+ <Card>
+ <CardHeader>
+ <CardTitle className="text-center">Current Word</CardTitle>
+ </CardHeader>
+ <CardContent>
+ <ProminentToneDisplay wordData={currentWord} />
+ {/* You can add more details from WordData here if needed, like definitions */}
+ {currentWord.senses && currentWord.senses.length > 0 && (
+ <div className="mt-4 pt-4 border-t">
+ <h3 className="text-lg font-semibold mb-2">Meanings:</h3>
+ {currentWord.senses.map((sense, sIdx) => (
+ <div key={sIdx} className="mb-2 p-2 border rounded bg-gray-50">
+ <p className="font-medium text-indigo-600">{sense.pos}</p>
+ {sense.senses && Array.isArray(sense.senses) && sense.senses.map((subSense, ssIdx) => (
+ subSense.glosses && Array.isArray(subSense.glosses) && subSense.glosses.map((gloss: string, gIdx: number) => (
+ <p key={`${ssIdx}-${gIdx}`} className="text-sm text-gray-700 ml-2">- {gloss}</p>
+ ))
+ ))}
+ </div>
+ ))}
+ </div>
+ )}
+ </CardContent>
+ </Card>
+ )}
+
+ {!isLoading && !currentWord && (
+ <Card>
+ <CardHeader>
+ <CardTitle className="text-center">No Word Found</CardTitle>
+ </CardHeader>
+ <CardContent>
+ <p className="text-center text-gray-600">
+ Could not find a Thai word matching your criteria. Try different selections.
+ </p>
+ </CardContent>
+ </Card>
+ )}
+ </div>
+ );
+}
diff --git a/src/components/ui/skeleton.tsx b/src/components/ui/skeleton.tsx
new file mode 100644
index 0000000..32ea0ef
--- /dev/null
+++ b/src/components/ui/skeleton.tsx
@@ -0,0 +1,13 @@
+import { cn } from "@/lib/utils"
+
+function Skeleton({ className, ...props }: React.ComponentProps<"div">) {
+ return (
+ <div
+ data-slot="skeleton"
+ className={cn("bg-accent animate-pulse rounded-md", className)}
+ {...props}
+ />
+ )
+}
+
+export { Skeleton }
diff --git a/src/lib/calls/nlp.ts b/src/lib/calls/nlp.ts
new file mode 100644
index 0000000..28562d0
--- /dev/null
+++ b/src/lib/calls/nlp.ts
@@ -0,0 +1,54 @@
+import { SyllableRes } from "../types/cards";
+
+type AnalyzeRes = {
+ word: string;
+ syllables: string[];
+ ipa: string;
+ pos: string;
+};
+
+export async function thaiData(word: string): Promise<AnalyzeRes[]> {
+ const [head, tail] = await Promise.all([
+ analyzeTHWord(word),
+ segmentateThai(word),
+ ]);
+ return [head, ...tail];
+}
+
+export async function analyzeTHWord(word: string): Promise<AnalyzeRes> {
+ const opts = {
+ method: "POST",
+ headers: { "Content-type": "application/json" },
+ body: JSON.stringify({ word }),
+ };
+ const r1 = await fetch("http://localhost:8001" + "/analyze", opts);
+ // const r2 = await fetch(`http://192.168.1.110:8000/analyze`, opts);
+ const jj = await r1.json();
+ return jj;
+}
+export async function segmentateThai(sentence: string): Promise<AnalyzeRes[]> {
+ const opts = {
+ method: "POST",
+ headers: { "Content-type": "application/json" },
+ body: JSON.stringify({ word: sentence }),
+ };
+ // const r1 = await fetch(`http://localhost:8000/segmentate`, opts);
+ const r2 = await fetch("http://localhost:8001" + `/segmentate`, opts);
+ const jj = await r2.json();
+ return jj;
+}
+
+export async function deconstructSyllable(ipa: string): Promise<SyllableRes> {
+ const opts = {
+ method: "POST",
+ headers: {
+ "Content-type": "application/json",
+ "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
+ },
+ body: JSON.stringify({ string: ipa }),
+ };
+ // const r1 = await fetch(`http://localhost:8000/segmentate`, opts);
+ const r2 = await fetch("http://localhost:8102" + `/lingpy`, opts);
+ const jj = await r2.json();
+ return jj;
+}
diff --git a/src/lib/db/codes.js b/src/lib/db/codes.js
new file mode 100644
index 0000000..bef3e1b
--- /dev/null
+++ b/src/lib/db/codes.js
@@ -0,0 +1,203 @@
+
+
+const ALL_LANGUAGE_CODES = {
+ iso639_1: [],
+ iso639_2_T: [], // Terminology codes
+ iso639_2_B: [], // Bibliographic codes
+ iso639_3: [],
+ iso639_5: [],
+ bcp47_language_subtags: [], // Primary language subtags from IANA
+};
+
+const LOC_ISO639_2_URL = 'https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt';
+// For ISO 639-3, SIL provides dated files. This is the structure of the comprehensive file.
+// The actual filename changes with each update (e.g., iso-639-3_20240123.tab).
+// You might need to go to https://iso639-3.sil.org/code_tables/download_tables and get the current link
+// for the "Complete Code Tables" zip, then extract the main .tab file.
+// For this script, I'll use a link to one specific (potentially older) version for demonstration.
+// A more robust solution would involve downloading and unzipping the latest.
+// This link points to the main table that includes mappings.
+const SIL_ISO639_3_URL = 'https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_20240701.tab'; // Example: replace with current
+const LOC_ISO639_5_URL = 'https://www.loc.gov/standards/iso639-5/iso639-5.tsv'; // TSV format
+const IANA_LANGUAGE_SUBTAG_REGISTRY_URL = 'https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry';
+
+async function fetchAndParseISO639_1_2() {
+ try {
+ console.log('Fetching ISO 639-1 & 639-2 codes from LOC...');
+ const response = await fetch(LOC_ISO639_2_URL);
+ if (!response.ok) throw new Error(`Failed to fetch ISO 639-1/2: ${response.statusText}`);
+ const text = await response.text();
+
+ const lines = text.trim().split('\n');
+ lines.forEach(line => {
+ // Format: alpha3-b|alpha3-t|alpha2|english_name|french_name
+ const parts = line.split('|');
+ if (parts.length >= 4) {
+ const alpha3_b = parts[0].trim();
+ const alpha3_t = parts[1].trim();
+ const alpha2 = parts[2].trim();
+ const englishName = parts[3].trim();
+
+ if (alpha3_b) { // Bibliographic code
+ ALL_LANGUAGE_CODES.iso639_2_B.push({ code: alpha3_b, name: englishName });
+ }
+ if (alpha3_t) { // Terminology code
+ ALL_LANGUAGE_CODES.iso639_2_T.push({ code: alpha3_t, name: englishName });
+ }
+ if (alpha2) { // Alpha-2 code
+ ALL_LANGUAGE_CODES.iso639_1.push({ code: alpha2, name: englishName });
+ }
+ }
+ });
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.iso639_1.length} ISO 639-1 codes.`);
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.iso639_2_B.length} ISO 639-2/B codes.`);
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.iso639_2_T.length} ISO 639-2/T codes.`);
+ } catch (error) {
+ console.error('Error fetching ISO 639-1/2 codes:', error.message);
+ }
+}
+
+async function fetchAndParseISO639_3() {
+ try {
+ console.log('Fetching ISO 639-3 codes from SIL...');
+ const response = await fetch(SIL_ISO639_3_URL);
+ if (!response.ok) throw new Error(`Failed to fetch ISO 639-3: ${response.statusText}`);
+ const text = await response.text();
+
+ const lines = text.trim().split('\n');
+ const header = lines.shift().split('\t'); // Remove header line
+ // Expected header fields (order matters):
+ // Id (3-letter code) | Part2B | Part2T | Part1 | Scope | Language_Type | Ref_Name | Comment
+ const idIndex = header.indexOf('Id');
+ const refNameIndex = header.indexOf('Ref_Name');
+ const part1Index = header.indexOf('Part1'); // For cross-referencing ISO 639-1
+
+ if (idIndex === -1 || refNameIndex === -1) {
+ throw new Error('ISO 639-3 header format mismatch. Expected "Id" and "Ref_Name" columns.');
+ }
+
+ lines.forEach(line => {
+ const parts = line.split('\t');
+ const code = parts[idIndex]?.trim();
+ const name = parts[refNameIndex]?.trim();
+ const part1Code = parts[part1Index]?.trim();
+
+ if (code && name) {
+ ALL_LANGUAGE_CODES.iso639_3.push({ code, name });
+
+ // Also, let's try to get more complete ISO 639-1 from this source
+ // as it might be more comprehensive than LOC's where 639-1 is only if 639-2 exists
+ if (part1Code && !ALL_LANGUAGE_CODES.iso639_1.find(c => c.code === part1Code)) {
+ ALL_LANGUAGE_CODES.iso639_1.push({ code: part1Code, name });
+ }
+ }
+ });
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.iso639_3.length} ISO 639-3 codes.`);
+ // Deduplicate and sort ISO 639-1 after potential additions
+ const uniqueIso639_1 = {};
+ ALL_LANGUAGE_CODES.iso639_1.forEach(item => uniqueIso639_1[item.code] = item);
+ ALL_LANGUAGE_CODES.iso639_1 = Object.values(uniqueIso639_1).sort((a, b) => a.code.localeCompare(b.code));
+ console.log(`Final unique ISO 639-1 count: ${ALL_LANGUAGE_CODES.iso639_1.length}.`);
+
+ } catch (error) {
+ console.error('Error fetching ISO 639-3 codes:', error.message);
+ console.warn('Make sure the SIL_ISO639_3_URL is current or points to a valid .tab file.');
+ }
+}
+
+async function fetchAndParseISO639_5() {
+ try {
+ console.log('Fetching ISO 639-5 codes from LOC...');
+ const response = await fetch(LOC_ISO639_5_URL);
+ if (!response.ok) throw new Error(`Failed to fetch ISO 639-5: ${response.statusText}`);
+ const text = await response.text();
+
+ const lines = text.trim().split('\n');
+ lines.shift(); // Remove header line: URI Code Label_en
+
+ lines.forEach(line => {
+ const parts = line.split('\t');
+ // URI | Code | Label_en | Label_fr ...
+ if (parts.length >= 3) {
+ const code = parts[1].trim();
+ const name = parts[2].trim();
+ if (code && name) {
+ ALL_LANGUAGE_CODES.iso639_5.push({ code, name });
+ }
+ }
+ });
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.iso639_5.length} ISO 639-5 codes (language families/groups).`);
+ } catch (error) {
+ console.error('Error fetching ISO 639-5 codes:', error.message);
+ }
+}
+
+async function fetchAndParseIANALanguageSubtags() {
+ try {
+ console.log('Fetching IANA Language Subtag Registry...');
+ const response = await fetch(IANA_LANGUAGE_SUBTAG_REGISTRY_URL);
+ if (!response.ok) throw new Error(`Failed to fetch IANA registry: ${response.statusText}`);
+ const text = await response.text();
+
+ const entries = text.split('%%'); // Entries are separated by %%
+ entries.forEach(entry => {
+ const lines = entry.trim().split('\n');
+ let type = '';
+ let subtag = '';
+ let description = '';
+
+ lines.forEach(line => {
+ if (line.startsWith('Type:')) {
+ type = line.substring(5).trim();
+ } else if (line.startsWith('Subtag:')) {
+ subtag = line.substring(7).trim();
+ } else if (line.startsWith('Description:')) {
+ // Description can span multiple lines, but for simplicity, we take the first
+ if (!description) description = line.substring(12).trim();
+ }
+ });
+
+ if (type === 'language' && subtag && description) {
+ ALL_LANGUAGE_CODES.bcp47_language_subtags.push({
+ code: subtag,
+ name: description
+ });
+ }
+ });
+ console.log(`Fetched ${ALL_LANGUAGE_CODES.bcp47_language_subtags.length} primary language subtags from IANA.`);
+ } catch (error) {
+ console.error('Error fetching IANA Language Subtag Registry:', error.message);
+ }
+}
+
+
+async function main() {
+ console.log('Starting to fetch all language codes...\n');
+
+ await Promise.all([
+ fetchAndParseISO639_1_2(),
+ fetchAndParseISO639_3(), // Run this after 1_2 to potentially augment 639-1
+ fetchAndParseISO639_5(),
+ fetchAndParseIANALanguageSubtags()
+ ]);
+ await Bun.write("bcp.json", JSON.stringify(ALL_LANGUAGE_CODES.bcp47_language_subtags))
+
+ console.log('\n\n--- All Fetched Language Codes ---');
+
+ // Example: Print counts and first few of each
+ for (const [key, codes] of Object.entries(ALL_LANGUAGE_CODES)) {
+ console.log(`\n--- ${key} (Total: ${codes.length}) ---`);
+ codes.slice(0, 50).forEach(c => console.log(`${c.code}: ${c.name}`));
+ if (codes.length > 50) console.log('... and more ...');
+ }
+
+ // You can now use ALL_LANGUAGE_CODES object for your needs
+ // e.g., save to a JSON file
+ // import fs from 'fs';
+ // fs.writeFileSync('all_language_codes.json', JSON.stringify(ALL_LANGUAGE_CODES, null, 2));
+ // console.log('\n\nSaved all codes to all_language_codes.json');
+
+ console.log('\nFetching complete.');
+}
+
+main().catch(console.error);
diff --git a/src/lib/db/index.ts b/src/lib/db/index.ts
index fcfab57..6bd417c 100644
--- a/src/lib/db/index.ts
+++ b/src/lib/db/index.ts
@@ -92,10 +92,16 @@ class DatabaseHandler {
const query = this.db.query(
`
SELECT * FROM expressions e
- WHERE e.syllables = 1 AND e.lang = ?
+ WHERE e.type = 'syllable' AND e.lang = ?
ORDER BY frequency DESC
LIMIT ${PAGE_SIZE} ${page ? "OFFSET " + getDBOffset(page, PAGE_SIZE) : ""}
`,
+ // `
+ // SELECT * FROM expressions e
+ // WHERE e.syllables = 1 AND e.lang = ?
+ // ORDER BY frequency DESC
+ // LIMIT ${PAGE_SIZE} ${page ? "OFFSET " + getDBOffset(page, PAGE_SIZE) : ""}
+ // `,
);
const results = query.all(lang);
console.log({ lang, page }, "results");
diff --git a/src/lib/db/prosodydb.ts b/src/lib/db/prosodydb.ts
new file mode 100644
index 0000000..b3b973b
--- /dev/null
+++ b/src/lib/db/prosodydb.ts
@@ -0,0 +1,238 @@
+import Database from "bun:sqlite";
+type Str = string | null;
+type ItemType = "word" | "syllable" | "idiom";
+
+class DatabaseHandler {
+ db: Database;
+ constructor() {
+ const dbPath = "/home/y/code/bun/ssr/waku/bulkdata/prosodynew.db";
+ const db = new Database(dbPath, { create: true });
+ db.exec("PRAGMA journal_mode = WAL"); // Enable Write-Ahead Logging for better performance
+ db.exec("PRAGMA foreign_keys = ON");
+ this.db = db;
+ }
+ async init() {
+ const file = Bun.file("./prosodyschema.sql");
+ const sql = await file.text();
+ this.db.exec(sql);
+ }
+ // selects
+ fetchWords(words: string[]) {
+ const query = this.db.query(
+ `SELECT id FROM words where spelling IN (${words.map((w) => `'${w}'`).join(", ")})`,
+ );
+ return query.all() as Array<{ id: number }>;
+ }
+ // inserts
+
+ addLanguage(code: string, name: string) {
+ const query = this.db
+ .query(`INSERT OR IGNORE INTO languages(iso6392, english) VALUES(?, ?)`)
+ .run(code, name);
+ }
+ addPronunciation(
+ type: ItemType,
+ parentId: number | bigint,
+ ipa: string,
+ syllables: number,
+ tags: Str,
+ notes: Str,
+ ) {
+ try {
+ const query = this.db
+ .query(
+ `INSERT INTO pronunciation(type, parent_id,ipa, syllables, tag, notes) VALUES(?, ?, ?, ?, ?, ?)`,
+ )
+ .run(type, parentId, ipa, syllables, tags, notes);
+ } catch (e) {
+ // console.error(e);
+ }
+ }
+ addWordRhyme(wordId: number | bigint, ipa: string, lang: string, notes: Str) {
+ console.log("wordrhyme", notes);
+ try {
+ const query = this.db
+ .query(
+ `INSERT INTO word_rhymes(text, lang, notes) VALUES(?, ?, ?)
+ ON CONFLICT(text,lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(ipa, lang, notes) as { id: number };
+ const query2 = this.db
+ .query(
+ `
+ INSERT INTO words_idioms(word_id, idiom_id) VALUES(?, ?)
+ `,
+ )
+ .run(wordId, query.id);
+ } catch (e) {
+ // console.error(e);
+ }
+ }
+ addIdiom(spelling: string, lang: string) {
+ const query = this.db.query(
+ `INSERT INTO idioms(spelling, lang) VALUES(?, ?)`,
+ );
+ const res = query.run(spelling, lang);
+ return res;
+ }
+ findIdiomWords(spelling: string, idId: number | bigint) {
+ const split = spelling.split(" ");
+ const words = this.fetchWords(split);
+ console.log({ words });
+ const tx = this.db.transaction(() => {
+ for (const w of words) {
+ this.db
+ .query(
+ `
+ INSERT INTO words_idioms(word_id, idiom_id) VALUES(?, ?)
+ `,
+ )
+ .run(w.id, idId);
+ }
+ });
+ tx();
+ }
+ findIdiomsWords() {
+ const rows: any = this.db.query(`SELECT id, spelling FROM idioms`);
+ for (const row of rows) {
+ this.findIdiomWords(row.spelling, row.id);
+ }
+ }
+ addWord(spelling: string, lang: string) {
+ const query = this.db.query(
+ // `INSERT OR IGNORE INTO words(spelling, lang) VALUES(?, ?)`,
+ `INSERT INTO words(spelling, lang) VALUES(?, ?)`,
+ );
+ const res = query.run(spelling, lang);
+ const wordId = res.lastInsertRowid;
+ return wordId;
+ }
+ addSyllable(
+ wordId: number | bigint,
+ text: string,
+ lang: string,
+ long: boolean,
+ onset: Str,
+ medial: Str,
+ nucleus: string,
+ coda: Str,
+ rhyme: string,
+ tone: Str,
+ notes: Str,
+ ) {
+ const tx = this.db.transaction(() => {
+ const query = this.db.query(
+ `INSERT INTO syllables(text, lang, long, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ );
+ const res = query.run(
+ text,
+ lang,
+ long,
+ onset,
+ medial,
+ nucleus,
+ coda,
+ rhyme,
+ tone,
+ notes,
+ );
+ const sylId = res.lastInsertRowid;
+
+ const res1 = this.db
+ .query(`INSERT INTO syllables_words(syl_id, word_id) VALUES(?, ?)`)
+ .run(sylId, wordId);
+ //
+ return sylId;
+ });
+ const sylId = tx();
+ let res1: any;
+ if (onset) {
+ res1 = this.db
+ .query(
+ `INSERT INTO onsets(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(onset, lang);
+ this.db
+ .query(`INSERT INTO onsets_syllables(syl_id, onset_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ }
+ if (medial) {
+ res1 = this.db
+ .query(
+ `INSERT INTO medials(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(medial, lang);
+ this.db
+ .query(`INSERT INTO medials_syllables(syl_id, medial_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ }
+ res1 = this.db
+ .query(
+ `INSERT INTO nucleus(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(nucleus, lang);
+ this.db
+ .query(`INSERT INTO nucleus_syllables(syl_id, nucleus_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ if (coda) {
+ res1 = this.db
+ .query(
+ `INSERT INTO codas(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(coda, lang);
+ this.db
+ .query(`INSERT INTO codas_syllables(syl_id, coda_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ }
+ res1 = this.db
+ .query(
+ `INSERT INTO rhymes(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(rhyme, lang);
+ this.db
+ .query(`INSERT INTO rhymes_syllables(syl_id, rhyme_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ if (tone) {
+ res1 = this.db
+ .query(
+ `INSERT INTO tones(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get(tone, lang);
+ this.db
+ .query(`INSERT INTO tones_syllables(syl_id, tone_id) VALUES(?, ?)`)
+ .run(sylId, res1.id);
+ }
+ }
+
+ // reads
+}
+const db = new DatabaseHandler();
+
+export default db;
diff --git a/src/lib/db/prosodyschema.sql b/src/lib/db/prosodyschema.sql
new file mode 100644
index 0000000..e70b005
--- /dev/null
+++ b/src/lib/db/prosodyschema.sql
@@ -0,0 +1,178 @@
+-- Enable foreign key support
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+PRAGMA cache_size = -2000;
+PRAGMA mmap_size = 30000000000;
+
+
+-- proper prosody now
+--
+--
+--
+CREATE TABLE IF NOT EXISTS languages(
+ iso6392 TEXT PRIMARY KEY,
+ -- bcp47 TEXT PRIMARY KEY,
+ -- iso6393 TEXT NOT NULL,
+ english TEXT NOT NULL
+ -- native TEXT,
+ -- iso6391 TEXT,
+ -- iso6395 TEXT,
+ -- glottolog TEXT
+);
+
+CREATE TABLE IF NOT EXISTS idioms(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ spelling TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ frequency INTEGER,
+ FOREIGN KEY (lang) REFERENCES languages(iso6392),
+ CONSTRAINT spell_unique UNIQUE (spelling, lang)
+);
+
+CREATE INDEX IF NOT EXISTS idx_idioms_spelling ON idioms(spelling);
+CREATE TABLE IF NOT EXISTS words(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ spelling TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ frequency INTEGER,
+ FOREIGN KEY (lang) REFERENCES languages(iso6392),
+ CONSTRAINT spell_unique UNIQUE (spelling, lang)
+);
+
+CREATE INDEX IF NOT EXISTS idx_words_spelling ON words(spelling);
+
+CREATE TABLE IF NOT EXISTS word_rhymes(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ notes TEXT,
+ CONSTRAINT wrhyme_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS words_rhymes(
+ word_id INTEGER NOT NULL,
+ wrhyme_id INTEGER NOT NULL,
+ FOREIGN KEY (word_id) REFERENCES words(id),
+ FOREIGN KEY (wrhyme_id) REFERENCES word_rhymes(id)
+);
+
+-- break up syllables
+CREATE TABLE IF NOT EXISTS syllables(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ long INTEGER NOT NULL,
+ tone TEXT,
+ onset TEXT,
+ medial TEXT,
+ nucleus TEXT,
+ coda TEXT,
+ rhyme TEXT,
+ notes TEXT,
+ FOREIGN KEY (lang) REFERENCES languages(iso6392),
+ CONSTRAINT spell_unique UNIQUE (text, lang)
+);
+
+CREATE TABLE IF NOT EXISTS tones(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ name TEXT,
+ num INTEGER,
+ CONSTRAINT tone_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS onsets(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT onsets_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS medials(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT medials_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS nucleus(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT nucleus_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS codas(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT coda_unique UNIQUE (text, lang)
+);
+CREATE TABLE IF NOT EXISTS rhymes(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ text TEXT NOT NULL,
+ lang TEXT NOT NULL,
+ CONSTRAINT rhyme_unique UNIQUE (text, lang)
+);
+
+-- join tables
+
+CREATE TABLE IF NOT EXISTS tones_syllables(
+ syl_id INTEGER NOT NULL,
+ tone_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (tone_id) REFERENCES tones(id)
+);
+CREATE TABLE IF NOT EXISTS onsets_syllables(
+ syl_id INTEGER NOT NULL,
+ onset_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (onset_id) REFERENCES onsets(id)
+);
+CREATE TABLE IF NOT EXISTS medials_syllables(
+ syl_id INTEGER NOT NULL,
+ medial_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (medial_id) REFERENCES medials(id)
+);
+CREATE TABLE IF NOT EXISTS nucleus_syllables(
+ syl_id INTEGER NOT NULL,
+ nucleus_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (nucleus_id) REFERENCES nucleus(id)
+);
+CREATE TABLE IF NOT EXISTS codas_syllables(
+ syl_id INTEGER NOT NULL,
+ coda_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (coda_id) REFERENCES codas(id)
+);
+CREATE TABLE IF NOT EXISTS rhymes_syllables(
+ syl_id INTEGER NOT NULL,
+ rhyme_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (rhyme_id) REFERENCES rhymes(id)
+);
+
+CREATE TABLE IF NOT EXISTS syllables_words(
+ syl_id INTEGER NOT NULL,
+ word_id INTEGER NOT NULL,
+ FOREIGN KEY (syl_id) REFERENCES syllables(id),
+ FOREIGN KEY (word_id) REFERENCES words(id)
+);
+CREATE TABLE IF NOT EXISTS words_idioms(
+ word_id INTEGER NOT NULL,
+ idiom_id INTEGER NOT NULL,
+ FOREIGN KEY (idiom_id) REFERENCES idioms(id),
+ FOREIGN KEY (word_id) REFERENCES words(id)
+);
+
+
+--
+CREATE TABLE IF NOT EXISTS pronunciation(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ type TEXT CHECK(type IN ('word', 'syllable', 'idiom')) NOT NULL,
+ parent_id INTEGER NOT NULL,
+ ipa TEXT NOT NULL,
+ syllables INTEGER NOT NULL,
+ tag TEXT,
+ notes TEXT,
+ CONSTRAINT ipa_unique UNIQUE (ipa, parent_id)
+);
+CREATE INDEX IF NOT EXISTS idx_words_ipa ON pronunciation(ipa, parent_id);
diff --git a/src/lib/db/seed.ts b/src/lib/db/seed.ts
index c4094de..7f4352f 100644
--- a/src/lib/db/seed.ts
+++ b/src/lib/db/seed.ts
@@ -2,6 +2,8 @@ import { readWiktionaryDump } from "../services/wiki";
import { getStressedSyllable, getSyllableCount } from "../utils";
import useful from "@/lib/useful_thai.json";
import db from ".";
+import pdb from "./prosodydb";
+import * as Sorsyl from "sorsyl";
const SYMBOL_REGEX = new RegExp(/[\W\d]/);
@@ -483,7 +485,16 @@ function addThaiSyllablesLesson() {
// }
// }
// }
-addThaiUseful();
+function fixSyllables() {
+ const res = db.db.query(`SELECT ipa, syllables FROM expressions;`).all();
+ for (let i = 0; i < 10; i++) {
+ // for (const row of res) {
+ const row = res[i];
+ console.log({ row });
+ }
+}
+// fixSyllables();
+// addThaiUseful();
// addThaiSyllablesLesson();
// adjustFrequency("th");
@@ -492,3 +503,202 @@ addThaiUseful();
// fillFromDump();
// thaiSyllables();
// thaiFreq();
+//
+//
+const SORSYL_PATH =
+ "/nix/store/lkyi9rrjbr619w3ivpkm89ccf93bvxx5-sorsyl-0.1.0/bin/sorsyl";
+async function redump() {
+ await pdb.init();
+ let count = 0;
+
+ // const soundTypes = new Set<string>();
+ // [
+ // "tags", "ipa", "audio", "ogg_url", "mp3_url", "enpr", "rhymes", "homophone", "note", "zh-pron", "other",
+ // "text", "hangeul", "topics", "form", "audio-ipa"
+ // ]
+ for await (const line of readWiktionaryDump()) {
+ try {
+ count++;
+ // if (count > 50) break;
+ const j = JSON.parse(line);
+ console.log(Object.keys(j), j.word);
+ // add language to db
+ pdb.addLanguage(j.lang_code, j.lang);
+ // handleEtim(j);
+ // handleDerived(j);
+ // handleSenses(j.pos, j.senses);
+ // //
+ const isWord = j.word.trim().split(" ").length === 1;
+ if (isWord) await handleWord(j);
+ else await handleIdiom(j);
+ } catch (e) {
+ console.log("error parsing", e);
+ // break;
+ }
+ }
+}
+
+type SorSyl = {
+ stressed: boolean;
+ long: boolean;
+ spelling: string;
+ ipa: string;
+ nucleus: string;
+ onset: string;
+ medial: string;
+ coda: string;
+ rhyme: string;
+ tone: string;
+};
+async function handleWord(j: any) {
+ const wordId = pdb.addWord(j.word, j.lang_code);
+ let ts = Date.now();
+
+ const hwikiRhyme = j.sounds.find((s) => "rhymes" in s);
+ const wikiRhyme = hwikiRhyme ? hwikiRhyme.rhymes : null;
+ for (let snd of j.sounds || []) {
+ if ("ipa" in snd) {
+ const tags = JSON.stringify(snd.tags) || null;
+ const ipa = snd.ipa;
+ try {
+ const hres = await fetch("http://localhost:8104/syls", {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify({ string: j.word, lang: j.lang_code, ipa }),
+ });
+ const hjon = await hres.json();
+ console.log(Date.now() - ts, "elapsed in http");
+ ts = Date.now();
+ pdb.addPronunciation(
+ "word",
+ wordId,
+ hjon.clean_ipa,
+ hjon.syls.length,
+ tags,
+ null,
+ );
+ const wordRhyme = hjon.syls.reduce((acc: string, item: SorSyl) => {
+ if (!item.stressed && !acc) return acc;
+ if (item.stressed && !acc) return `${acc}${item.rhyme}`;
+ else return `${acc}${item.ipa}`;
+ }, "");
+ if (wordRhyme)
+ pdb.addWordRhyme(wordId, wordRhyme, j.lang_code, wikiRhyme);
+ else console.log("no rhyme?", hjon);
+ for (const syl of hjon.syls) {
+ // TODO ideally syllables would have spelling not IPA... harsh tho
+ pdb.addSyllable(
+ wordId,
+ syl.ipa,
+ j.lang_code,
+ syl.long,
+ syl.onset || null,
+ syl.medial || null,
+ syl.nucleus,
+ syl.coda || null,
+ syl.rhyme,
+ syl.tone || null,
+ null,
+ );
+ }
+ console.log(Date.now() - ts, "elapsed in db");
+ ts = Date.now();
+ } catch (e) {
+ console.error(e);
+ console.error(j);
+ // break;
+ }
+ }
+ }
+}
+async function handleIdiom(j: any) {
+ console.log(j.word, "idiom");
+ pdb.addIdiom(j.word, j.lang_code);
+ // TODO IPA of idioms...?
+}
+async function handleEtim(j: any) {
+ console.log(j.etymology_text, "etym");
+ console.log(j.etymology_templates, "etym");
+
+ // {
+ // name: "inh",
+ // args: {
+ // "1": "en",
+ // "2": "ang",
+ // "3": "frēo",
+ // "4": "",
+ // "5": "free",
+ // },
+ // expansion: "Old English frēo (“free”)",
+ // },
+
+ console.log(j.head_templates, "head");
+ // {
+ // name: "en-verb",
+ // args: {},
+ // expansion: "free (third-person singular simple present frees, present participle freeing, simple past and past participle freed)",
+ // }
+}
+async function handleDerived(j: any) {
+ const { forms, derived, related, antonyms, hyponyms, synonyms, descendants } =
+ j;
+ console.log("forms", forms);
+ // {form: string; tags: string[]}
+ console.log("derived", derived);
+ // {word: string}
+ console.log("related", related);
+ // {word: string, source?: string;}
+ console.log("ant", antonyms);
+ // {word: string, source?: string;}
+ console.log("hypo", hyponyms);
+ console.log("syno", synonyms);
+ // {word: string, source?: string;}
+ console.log("desc", descendants);
+}
+async function handleSenses(pos: string, senses: any[]) {
+ console.log("ex", senses[0].examples);
+ // {text: string; ref: string; type: "quote"}
+ console.log("info", senses[0].info_templates);
+ for (const s of senses) {
+ // s.glosses[]
+ // s.tags[]
+ }
+}
+
+redump();
+
+async function newtest() {
+ // const query = pdb.db.query(
+ // `INSERT INTO syllables(text, lang, long, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ // );
+ // const res = query.run(
+ // "lol",
+ // "en",
+ // true,
+ // "l",
+ // "j",
+ // "o",
+ // "q",
+ // "joq",
+ // null,
+ // null,
+ // );
+ // const sylId = res.lastInsertRowid;
+ const res1 = pdb.db
+ .query(
+ `INSERT INTO onsets(text, lang) VALUES(?, ?)
+ ON CONFLICT(text, lang) DO UPDATE SET
+ text = excluded.text
+ RETURNING rowid
+ `,
+ )
+ .get("lll", "en");
+ console.log({ res1 });
+}
+// newtest();
+// TIL calling shell commands is terribly slow wtf
+// Bun.$.env({ FOO: ipa });
+// const res = await Bun.$`${SORSYL_PATH} $FOO`;
+// const syllables = JSON.parse(res.stdout.toString());
+// console.log(Date.now() - ts, "elapsed in py");
+// ts = Date.now();
diff --git a/src/lib/db/senseschema.sql b/src/lib/db/senseschema.sql
new file mode 100644
index 0000000..f07a208
--- /dev/null
+++ b/src/lib/db/senseschema.sql
@@ -0,0 +1,54 @@
+-- Enable foreign key support
+PRAGMA foreign_keys = ON;
+PRAGMA journal_mode = WAL;
+PRAGMA cache_size = -2000;
+PRAGMA mmap_size = 30000000000;
+
+-- a semantic entity
+CREATE TABLE IF NOT EXISTS senses(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ parent_id INTEGER NOT NULL,
+ spelling TEXT NOT NULL,
+ pos TEXT,
+ etymology TEXT,
+ senses JSONB,
+ forms JSONB,
+ related JSONB,
+ confidence INTEGER NOT NULL DEFAULT 0,
+ FOREIGN KEY (parent_id) REFERENCES expressions(id)
+);
+CREATE INDEX IF NOT EXISTS idx_words_pos ON senses(pos);
+CREATE INDEX IF NOT EXISTS idx_senses_parent ON senses(parent_id);
+
+CREATE TABLE IF NOT EXISTS subsenses(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ sid INTEGER NOT NULL
+ gloss TEXT NOT NULL,
+ examples JSONB,
+ FOREIGN KEY (sid) REFERENCES senses(id)
+);
+
+CREATE TABLE IF NOT EXISTS derivation(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ sid INTEGER NOT NULL
+ type TEXT NOT NULL,
+ text TEXT NOT NULL,
+ tags JSONB,
+ FOREIGN KEY (sid) REFERENCES senses(id)
+);
+
+-- Categories table (for noun and verb categories)
+CREATE TABLE IF NOT EXISTS categories (
+ name TEXT PRIMARY KEY
+);
+
+-- Word Categories junction table
+CREATE TABLE IF NOT EXISTS word_categories (
+ word_id INTEGER NOT NULL,
+ category INTEGER NOT NULL,
+ PRIMARY KEY (word_id, category),
+ FOREIGN KEY (word_id) REFERENCES expressions(id),
+ FOREIGN KEY (category) REFERENCES categories(name)
+);
+CREATE INDEX IF NOT EXISTS idx_word_categories_category_id ON word_categories(category);
+
diff --git a/src/lib/lang/utils.ts b/src/lib/lang/utils.ts
new file mode 100644
index 0000000..b6c5bae
--- /dev/null
+++ b/src/lib/lang/utils.ts
@@ -0,0 +1,28 @@
+const TONE_LETTER_REGEX = new RegExp(
+ /[\u{02E5}-\u{02EE}\u{A700}-\u{A71F}\u{A789}-\u{A78A}]/u,
+);
+const TONE_NUMBERS_REGEX = new RegExp(/\w(\d{2,})\s/);
+
+export function isTonal(text: string): boolean {
+ return !!TONE_LETTER_REGEX.exec(text) || !!TONE_NUMBERS_REGEX.exec(text);
+}
+
+export function isLongVowel(text: string): boolean {
+ return text.includes("ː");
+}
+
+// In this order!
+export const thaiTones = {
+ "˧": "M",
+ "˨˩": "L",
+ "˦˥": "H",
+ "˥˩": "F",
+ "˩˩˦": "R",
+};
+
+export const mandarinTones = {
+ "˥˥": 1,
+ "˧˥": 2,
+ "˨˩˦": 3,
+ "˥˩": 4,
+};
diff --git a/src/lib/types/cards.ts b/src/lib/types/cards.ts
index cef02d2..1a62a44 100644
--- a/src/lib/types/cards.ts
+++ b/src/lib/types/cards.ts
@@ -210,3 +210,16 @@ export type ExpressionSearchParams = {
frequency?: { num: number; above: boolean };
type?: ExpressionType;
};
+
+export type SyllableRes = { input: string; result: SyllableToken[] };
+export type SyllableToken = [IPACharacter, SyllablePart];
+export type IPACharacter = string; // one char mostly
+export enum SyllablePart {
+ INITIAL = "#",
+ OTHER_ONSET = "C",
+ VOWEL = "V",
+ OTHER_VOWEL = "v",
+ FINAL_VOWEL = ">",
+ OTHER_OFFSET = "c",
+ CODA = "$",
+}
diff --git a/src/pages.gen.ts b/src/pages.gen.ts
index 7d23584..b4a4bd7 100644
--- a/src/pages.gen.ts
+++ b/src/pages.gen.ts
@@ -18,6 +18,8 @@ import type { getConfig as Db_getConfig } from './pages/db';
// prettier-ignore
import type { getConfig as Form_getConfig } from './pages/form';
// prettier-ignore
+import type { getConfig as Tones_getConfig } from './pages/tones';
+// prettier-ignore
import type { getConfig as Picker_getConfig } from './pages/picker';
// prettier-ignore
import type { getConfig as About_getConfig } from './pages/about';
@@ -38,6 +40,7 @@ type Page =
| { path: '/test/trigger-modal-button'; render: 'dynamic' }
| { path: '/test'; render: 'dynamic' }
| ({ path: '/form' } & GetConfigResponse<typeof Form_getConfig>)
+| ({ path: '/tones' } & GetConfigResponse<typeof Tones_getConfig>)
| ({ path: '/picker' } & GetConfigResponse<typeof Picker_getConfig>)
| ({ path: '/about' } & GetConfigResponse<typeof About_getConfig>)
| { path: '/logintest/Form'; render: 'dynamic' }
diff --git a/src/pages/api/nlp.ts b/src/pages/api/nlp.ts
index 27c330d..0e5eacb 100644
--- a/src/pages/api/nlp.ts
+++ b/src/pages/api/nlp.ts
@@ -30,41 +30,3 @@ export const POST = async (request: Request): Promise<Response> => {
return Response.json({ message: "Failure" }, { status: 500 });
}
};
-
-type AnalyzeRes = {
- word: string;
- syllables: string[];
- ipa: string;
- pos: string;
-};
-
-export async function thaiData(word: string): Promise<AnalyzeRes[]> {
- const [head, tail] = await Promise.all([
- analyzeTHWord(word),
- segmentateThai(word),
- ]);
- return [head, ...tail];
-}
-
-export async function analyzeTHWord(word: string): Promise<AnalyzeRes> {
- const opts = {
- method: "POST",
- headers: { "Content-type": "application/json" },
- body: JSON.stringify({ word }),
- };
- const r1 = await fetch("http://localhost:8001" + "/analyze", opts);
- // const r2 = await fetch(`http://192.168.1.110:8000/analyze`, opts);
- const jj = await r1.json();
- return jj;
-}
-export async function segmentateThai(sentence: string): Promise<AnalyzeRes[]> {
- const opts = {
- method: "POST",
- headers: { "Content-type": "application/json" },
- body: JSON.stringify({ word: sentence }),
- };
- // const r1 = await fetch(`http://localhost:8000/segmentate`, opts);
- const r2 = await fetch("http://localhost:8001" + `/segmentate`, opts);
- const jj = await r2.json();
- return jj;
-}
diff --git a/src/pages/lesson/[slug].tsx b/src/pages/lesson/[slug].tsx
index 9078958..991859b 100644
--- a/src/pages/lesson/[slug].tsx
+++ b/src/pages/lesson/[slug].tsx
@@ -21,24 +21,23 @@ const flags: Record<string, string> = {
};
export default async function HomePage(props: PageProps<"/lesson/[slug]">) {
- const hctx: any = getHonoContext();
- console.log({ hctx });
- const ctx = getContext();
- console.log(ctx.req.headers, "heders");
- hctx.set("lol", "lmao");
- const cokis = useCookies();
- const coki = cokis.getCookie("sorlang");
- console.log({ coki });
- console.log({ props });
- // const { user } = getContextData() as any;
- // console.log({ user });
+ // const hctx: any = getHonoContext();
+ // console.log({ hctx });
+ // const ctx = getContext();
+ // console.log(ctx.req.headers, "heders");
+ // hctx.set("lol", "lmao");
+ // const cokis = useCookies();
+ // const coki = cokis.getCookie("sorlang");
+ // console.log({ coki });
+ // console.log({ props });
+ // // const { user } = getContextData() as any;
+ // // console.log({ user });
const user = { id: 2 };
const data = await getData(Number(props.slug), user.id);
if ("error" in data) return <p>Error</p>;
- // console.log({ data });
const cardComponents = data.ok.cards.map((card) => ({
id: card.id,
- front: <CardFront data={card} />,
+ front: <CardFront data={card} needFetch={false} />,
back: <CardBack data={card} />,
}));
diff --git a/src/pages/tones.tsx b/src/pages/tones.tsx
new file mode 100644
index 0000000..1a1e908
--- /dev/null
+++ b/src/pages/tones.tsx
@@ -0,0 +1,62 @@
+import { Suspense } from 'react';
+import { fetchWordsByToneAndSyllables } from '@/actions/tones';
+import ToneSelectorClient from '@/components/tones/ToneSelectorClient';
+import { Skeleton } from '@/components/ui/skeleton'; // For Suspense fallback
+
+export const getConfig = async () => {
+ return {
+ render: 'static', // Or 'dynamic' if you prefer SSR for every request
+ };
+};
+
+// Function to fetch the initial word on the server
+async function InitialWordLoader() {
+ // Fetch a random 1-syllable Thai word with any tone initially
+ const initialWord = await fetchWordsByToneAndSyllables(1, [null]);
+ return <ToneSelectorClient initialWord={initialWord} />;
+}
+
+// Loading fallback component
+function TonePageSkeleton() {
+ return (
+ <div className="container mx-auto p-4 max-w-2xl">
+ <div className="mb-6 p-6 border rounded-lg shadow">
+ <Skeleton className="h-8 w-1/2 mb-4" />
+ <Skeleton className="h-6 w-3/4 mb-6" />
+
+ <div className="space-y-6">
+ <div>
+ <Skeleton className="h-6 w-1/4 mb-2" />
+ <Skeleton className="h-10 w-full md:w-1/2" />
+ </div>
+ <div>
+ <Skeleton className="h-6 w-1/4 mb-2" />
+ <Skeleton className="h-10 w-full md:w-1/2" />
+ </div>
+ </div>
+ <Skeleton className="h-10 w-full md:w-1/4 mt-6" />
+ </div>
+ <div className="p-6 border rounded-lg shadow">
+ <Skeleton className="h-8 w-1/3 mx-auto mb-4" />
+ <Skeleton className="h-24 w-3/4 mx-auto mb-4" />
+ <Skeleton className="h-6 w-1/2 mx-auto" />
+ </div>
+ </div>
+ );
+}
+
+
+export default function TonesPage() {
+ return (
+ <div className="py-8">
+ <Suspense fallback={<TonePageSkeleton />}>
+ <InitialWordLoader />
+ </Suspense>
+ </div>
+ );
+}
+
+export const metadata = {
+ title: 'Thai Tone Explorer',
+ description: 'Explore Thai words by syllable count and tones.',
+};
diff --git a/src/zoom/ServerSyllable.tsx b/src/zoom/ServerSyllable.tsx
new file mode 100644
index 0000000..907b956
--- /dev/null
+++ b/src/zoom/ServerSyllable.tsx
@@ -0,0 +1,84 @@
+// This is a Server Component
+import React, { Suspense } from "react";
+import db from "@/lib/db";
+import {
+ Card,
+ CardHeader,
+ CardDescription,
+ CardContent,
+ CardFooter,
+ CardTitle,
+} from "@/components/ui/card";
+import { NLP } from "sortug-ai";
+import { Volume2, Link as LinkIcon } from "lucide-react";
+import { isTonal } from "@/lib/lang/utils";
+import { CardResponse, SyllableToken } from "@/lib/types/cards";
+import { deconstructSyllable } from "@/lib/calls/nlp";
+
+export default async function (props: { data: CardResponse }) {
+ const { expression } = props.data;
+ const { result } = await deconstructSyllable(expression.spelling);
+
+ return (
+ <div className="absolute w-full h-full bg-white dark:bg-slate-800 rounded-xl backface-hidden flex flex-col justify-center gap-8 items-center p-6">
+ <p className="text-5xl cursor-pointer hover:text-blue-700 font-semibold text-slate-800 dark:text-slate-100 text-center">
+ {expression.spelling}
+ </p>
+ <Suspense fallback={<IpaDisplay ipaEntries={expression.ipa} />}>
+ <Deconstructed syl={result} />
+ </Suspense>
+ </div>
+ );
+}
+
+function Deconstructed({ syl }: { syl: SyllableToken[] }) {
+ return (
+ <div>
+ {syl.map((tok) => (
+ <span></span>
+ ))}
+ </div>
+ );
+}
+
+// Helper component for IPA display
+const IpaDisplay = ({
+ ipaEntries,
+}: {
+ ipaEntries: Array<{ ipa: string; tags?: string[] }>;
+}) => {
+ if (!ipaEntries || ipaEntries.length === 0) return null;
+ return (
+ <div className="flex items-center space-x-2 flex-wrap">
+ {ipaEntries.map((entry, index) => {
+ const tags = entry.tags ? entry.tags : [];
+ return (
+ <span key={index} className="text-lg text-blue-600 font-serif">
+ {entry.ipa}{" "}
+ {tags.length > 0 && (
+ <span className="text-xs text-gray-500">({tags.join(", ")})</span>
+ )}
+ </span>
+ );
+ })}
+ <button
+ className="p-1 text-blue-500 hover:text-blue-700 transition-colors"
+ title="Pronounce"
+ // onClick={() => {
+ // /* Pronunciation logic would be client-side or a server roundtrip for audio file. */ alert(
+ // "Pronunciation feature not implemented for server component.",
+ // );
+ // }}
+ >
+ <Volume2 size={20} />
+ </button>
+ </div>
+ );
+};
+
+function Tones({ text, lang }: WordProps) {
+ return <div></div>;
+}
+function NotTones({ text, lang }: WordProps) {
+ return <div></div>;
+}
diff --git a/src/zoom/ServerWord.tsx b/src/zoom/ServerWord.tsx
index 75b631d..712efb6 100644
--- a/src/zoom/ServerWord.tsx
+++ b/src/zoom/ServerWord.tsx
@@ -38,7 +38,7 @@ export default async function Wordd({
word: string;
lang: string;
}) {
- const data = db.fetchWordBySpelling(word, "en");
+ const data = db.fetchWordBySpelling(word, lang);
if (!data) return <p>oh...</p>;
console.log(data.senses[0]);
diff --git a/src/zoom/logic/types.ts b/src/zoom/logic/types.ts
index 1342bc7..48c505e 100644
--- a/src/zoom/logic/types.ts
+++ b/src/zoom/logic/types.ts
@@ -52,10 +52,18 @@ export type WordData = {
type: ExpressionType;
syllables: number;
lang: string;
- prosody: Prosody;
+ prosody: Prosody; // This will be SyllableProsody[]
senses: Sense[];
};
-export type Prosody = { stressedSyllable: number; rhyme: string };
+
+export type SyllableProsody = {
+ tone: number | null; // Tone for the syllable
+ ipa?: string; // IPA for the syllable
+ // Add other syllable-specific prosodic features if needed
+};
+
+export type Prosody = SyllableProsody[];
+
export type ExpressionType = "word" | "expression" | "syllable";
export type Sense = {
etymology: string;