This commit is contained in:
polwex 2024-10-22 15:45:52 +07:00
parent c9406d40a8
commit 9bbb3b3cfa
5 changed files with 908 additions and 492 deletions

192
schema.sql Normal file
View File

@ -0,0 +1,192 @@
-- Enable foreign key support
PRAGMA foreign_keys = ON;
PRAGMA journal_mode = WAL;
PRAGMA cache_size = -2000;
PRAGMA mmap_size = 30000000000;
-- Words table
CREATE TABLE expressions(
id INTEGER PRIMARY KEY AUTOINCREMENT,
spelling TEXT NOT NULL,
ipa TEXT NOT NULL,
language_id INTEGER NOT NULL,
frequency INTEGER,
type TEXT NOT NULL,
subtype TEXT,
FOREIGN KEY (language_id) REFERENCES languages(id)
);
CREATE INDEX idx_words_spelling ON expressions(spelling);
CREATE INDEX idx_words_type ON expressions(type);
CREATE INDEX idx_words_subtype ON expressions(subtype);
CREATE INDEX idx_words_language_id ON expressions(language_id);
CREATE TABLE expression_words(
expression_id INTEGER NOT NULL,
word_id INTEGER NOT NULL,
PRIMARY KEY (expression_id, word_id),
FOREIGN KEY (word_id) REFERENCES expressions(id),
FOREIGN KEY (expression_id) REFERENCES expressions(id)
);
-- Languages table
CREATE TABLE languages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL
);
-- Parts of Speech table
CREATE TABLE parts_of_speech (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL
);
-- Categories table (for noun and verb categories)
CREATE TABLE categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
part_of_speech_id INTEGER NOT NULL,
FOREIGN KEY (part_of_speech_id) REFERENCES parts_of_speech(id)
);
CREATE INDEX idx_categories_name ON categories(name);
CREATE INDEX idx_categories_part_of_speech_id ON categories(part_of_speech_id);
-- Word Categories junction table
CREATE TABLE word_categories (
word_id INTEGER NOT NULL,
category_id INTEGER NOT NULL,
PRIMARY KEY (word_id, category_id),
FOREIGN KEY (word_id) REFERENCES words(id),
FOREIGN KEY (category_id) REFERENCES categories(id)
);
CREATE INDEX idx_word_categories_category_id ON word_categories(category_id);
-- Example data insertion
INSERT INTO languages (name) VALUES ('en-us');
INSERT INTO languages (name) VALUES ('th');
INSERT INTO languages (name) VALUES ('zh-cn');
INSERT INTO languages (name) VALUES ('zh-hk');
INSERT INTO languages (name) VALUES ('ja-jp');
INSERT INTO parts_of_speech (name) VALUES ('noun'), ('verb'), ('adjective'), ('adverb'), ('pronoun'), ('adposition'), ('conjunction'), ('unknown');
INSERT INTO categories (name, part_of_speech_id) VALUES
('countable', 1),
('uncountable', 1),
('animate', 1),
('inanimate', 1),
('spatial', 1),
('temporal', 1),
('abstract', 1),
('noun', 1),
-- verbs
('transitive', 2),
('intransitive', 2),
('action', 2),
('mental', 2),
('auxiliar', 2),
('verb', 2),
-- adjectives
('adjective', 3),
-- adverbs
('adverb', 4),
-- pronouns
('nominative', 5),
('accusative', 5),
('genitive', 5),
-- adpositions
('preposition', 6),
('postposition', 6),
('circumposition', 6),
-- conjunctions
('conjunction', 7),
-- ?
('unknown', 8);
-- -- Example word insertion
-- INSERT INTO words (spelling, ipa, language_id) VALUES ('book', 'bʊk', 1);
-- -- Categorize 'book' as a countable, inanimate noun
-- INSERT INTO word_categories (word_id, category_id)
-- SELECT
-- (SELECT id FROM words WHERE spelling = 'book'),
-- id
-- FROM categories
-- WHERE name IN ('countable', 'inanimate');
-- -- Example verb insertion
-- INSERT INTO words (spelling, ipa, language_id) VALUES ('think','θɪŋk', 1);
-- -- Categorize 'think' as an intransitive, mental verb
-- INSERT INTO word_categories (word_id, category_id)
-- SELECT
-- (SELECT id FROM words WHERE spelling = 'think'),
-- id
-- FROM categories
-- WHERE name IN ('intransitive', 'mental');
-- Progress
CREATE TABLE users(
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
creds TEXT NOT NULL
);
-- Lessons
CREATE TABLE lessons(
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT NOT NULL
);
CREATE TABLE cards(
id INTEGER PRIMARY KEY AUTOINCREMENT,
text TEXT NOT NULL,
note TEXT
);
CREATE TABLE cards_expressions(
expression_id INTEGER NOT NULL,
card_id INTEGER NOT NULL,
PRIMARY KEY (card_id, expression_id),
FOREIGN KEY (card_id) REFERENCES cards(id),
FOREIGN KEY (expression_id) REFERENCES expressions(id)
);
CREATE TABLE cards_lessons(
lesson_id INTEGER,
card_id INTEGER NOT NULL,
PRIMARY KEY (card_id, lesson_id),
FOREIGN KEY (card_id) REFERENCES cards(id),
FOREIGN KEY (lesson_id) REFERENCES lessons(id)
);
CREATE TABLE attempts(
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER NOT NULL,
timestamp INTEGER NOT NULL,
card_id INTEGER NOT NULL,
good INTEGER NOT NULL, -- 0 or 1
FOREIGN KEY (user_id) REFERENCES users(id)
FOREIGN KEY (card_id) REFERENCES cards(id)
);
-- Index to query attempts on a specific card
CREATE INDEX idx_attempts_card ON attempts(card_id);
-- Index to query attempts for a specific user
CREATE INDEX idx_attempts_user ON attempts(user_id);
-- (Optional) Index to query attempts by user and resource (useful if you often query by both)
CREATE INDEX idx_attempts_user_resource ON attempts(user_id, card_id);
CREATE INDEX idx_cards_resources
ON cards_expressions(expression_id, card_id);
-- CREATE TRIGGER IF NOT EXISTS populate_cards_resources
-- AFTER INSERT ON cards
-- FOR EACH ROW
-- BEGIN
-- -- Insert matching words into cards_resources
-- INSERT INTO cards_expressions(card_id, expression_id)
-- SELECT NEW.id, w.id
-- FROM expressions w
-- WHERE NEW.text LIKE '%' || w.spelling || '%';
-- END;

View File

@ -1,5 +1,5 @@
import { Database } from 'bun:sqlite'; import { Database } from "bun:sqlite";
import { wordFactorial } from './utils'; import { wordFactorial } from "./utils";
// read // read
@ -42,13 +42,29 @@ export function fetchLessons(db: Database, count: number, page: number) {
const offset = (p - 1) * count; const offset = (p - 1) * count;
const queryString = ` const queryString = `
SELECT SELECT
l.id, l.text as ltext, cards.text as ctext, cards.note as cnote, cards.id as cid l.id, l.text as ltext, cards.text as ctext, cards.note as cnote, cards.id as cid,
FROM cards_lessons cl e.spelling, e.ipa, e.frequency, e.id as eid,
JOIN lessons l ON l.id = cl.lesson_id GROUP_CONCAT(cg.name, ',') AS category
FROM expressions e
JOIN cards_expressions ce ON e.id = ce.expression_id
JOIN cards ON cards.id = cl.card_id JOIN cards ON cards.id = cl.card_id
JOIN cards_lessons cl ON cl.card_id = cards.id
JOIN lessons l ON l.id = cl.lesson_id
JOIN expressions e ON e.id = ce.expression_id
JOIN word_categories wc ON wc.word_id = e.id
JOIN categories cg ON cg.id = wc.category_id
LIMIT $count LIMIT $count
OFFSET $offset OFFSET $offset
`; `;
// const queryString = `
// SELECT
// l.id, l.text as ltext, cards.text as ctext, cards.note as cnote, cards.id as cid
// FROM cards_lessons cl
// JOIN lessons l ON l.id = cl.lesson_id
// JOIN cards ON cards.id = cl.card_id
// LIMIT $count
// OFFSET $offset
// `;
const query = db.query(queryString); const query = db.query(queryString);
const res = query.all({ count, offset }); const res = query.all({ count, offset });
return res; return res;
@ -58,12 +74,19 @@ export function fetchLessons(db: Database, count: number, page: number) {
export function fetchLesson(db: Database, lesson: number) { export function fetchLesson(db: Database, lesson: number) {
const queryString = ` const queryString = `
SELECT SELECT
l.id, l.text, cards.text, cards.note, cards.id as cid l.id, l.text, cards.text, cards.note, cards.id as cid,
spelling, ipa, frequency, e.id as eid,
GROUP_CONCAT(cg.name, ',') AS category
FROM cards_lessons cl FROM cards_lessons cl
JOIN lessons l ON l.id = cl.lesson_id JOIN lessons l ON l.id = cl.lesson_id
JOIN cards ON cards.id = lc.card_id JOIN cards ON cards.id = lc.card_id
JOIN cards_expressions ce ON cards.id = ce.card_id
JOIN expressions e ON e.id = ce.expression_id
JOIN word_categories wc ON wc.word_id = e.id
JOIN categories cg ON cg.id = wc.category_id
WHERE l.id = $lesson WHERE l.id = $lesson
`; `;
console.log(queryString);
const query = db.query(queryString); const query = db.query(queryString);
return query.all({ lesson }); return query.all({ lesson });
} }
@ -102,7 +125,7 @@ export function addCard(
INTO cards(text, note) INTO cards(text, note)
VALUES($text, $note) VALUES($text, $note)
`); `);
const params = { text, note }; const params = { text, note, spel: text };
const res = query.run(params); const res = query.run(params);
const cid = res.lastInsertRowid; const cid = res.lastInsertRowid;
const wquery = db.query(` const wquery = db.query(`
@ -117,10 +140,10 @@ export function addCard(
for (const pair of pairs) wquery.run(pair); for (const pair of pairs) wquery.run(pair);
}); });
const words = text const words = text
.replace(/[^\w\s]/g, '') .replace(/[^\w\s]/g, "")
.replace(/\s+/g, ' ') .replace(/\s+/g, " ")
.trim() .trim()
.split(' '); .split(" ");
const combinations = wordFactorial(words); const combinations = wordFactorial(words);
const richWords = combinations.map((spelling) => { const richWords = combinations.map((spelling) => {
return { spelling, cid }; return { spelling, cid };
@ -173,58 +196,58 @@ export function addCat(db: Database, wordId: number | bigint, domain: string) {
WHERE name = $category WHERE name = $category
)) ))
`; `;
const category = domains[domain] || 'unknown'; const category = domains[domain] || "unknown";
const query = db.query(queryString); const query = db.query(queryString);
const res = query.run({ wordId, category }); const res = query.run({ wordId, category });
return res.lastInsertRowid; return res.lastInsertRowid;
} }
const domains: Record<string, string> = { const domains: Record<string, string> = {
'adj.all': 'adjective', "adj.all": "adjective",
'adj.pert': 'adjective', "adj.pert": "adjective",
'adj.ppl': 'adjective', "adj.ppl": "adjective",
'adv.all': 'adverb', "adv.all": "adverb",
'noun.Tops': '', "noun.Tops": "",
'noun.act': 'abstract', "noun.act": "abstract",
'noun.animal': 'animate', "noun.animal": "animate",
'noun.artifact': 'inanimate', "noun.artifact": "inanimate",
'noun.attribute': 'abstract', "noun.attribute": "abstract",
'noun.body': 'inanimate', "noun.body": "inanimate",
'noun.cognition': 'abstract', "noun.cognition": "abstract",
'noun.communication': 'abstract', "noun.communication": "abstract",
'noun.event': 'abstract', "noun.event": "abstract",
'noun.feeling': 'abstract', "noun.feeling": "abstract",
'noun.food': 'inanimate', "noun.food": "inanimate",
'noun.group': 'noun', "noun.group": "noun",
'noun.location': 'spatial', "noun.location": "spatial",
'noun.motive': 'abstract', "noun.motive": "abstract",
'noun.object': 'inanimate', "noun.object": "inanimate",
'noun.person': 'animate', "noun.person": "animate",
'noun.phenomenon': 'abstract', "noun.phenomenon": "abstract",
'noun.plant': 'noun', "noun.plant": "noun",
'noun.possession': 'noun', "noun.possession": "noun",
'noun.process': 'noun', "noun.process": "noun",
'noun.quantity': 'uncountable', "noun.quantity": "uncountable",
'noun.relation': 'noun', "noun.relation": "noun",
'noun.shape': 'noun', "noun.shape": "noun",
'noun.state': 'noun', "noun.state": "noun",
'noun.substance': 'uncountable', "noun.substance": "uncountable",
'noun.time': 'temporal', "noun.time": "temporal",
'verb.body': 'verb', "verb.body": "verb",
'verb.change': 'verb', "verb.change": "verb",
'verb.cognition': 'verb', "verb.cognition": "verb",
'verb.communication': 'verb', "verb.communication": "verb",
'verb.competition': 'verb', "verb.competition": "verb",
'verb.consumption': 'verb', "verb.consumption": "verb",
'verb.contact': 'verb', "verb.contact": "verb",
'verb.creation': 'verb', "verb.creation": "verb",
'verb.emotion': 'mental', "verb.emotion": "mental",
'verb.motion': 'verb', "verb.motion": "verb",
'verb.perception': 'mental', "verb.perception": "mental",
'verb.possession': 'verb', "verb.possession": "verb",
'verb.social': 'verb', "verb.social": "verb",
'verb.stative': 'verb', "verb.stative": "verb",
'verb.weather': 'verb', "verb.weather": "verb",
}; };
export function addFrequency( export function addFrequency(
db: Database, db: Database,
@ -238,5 +261,5 @@ export function addFrequency(
`; `;
const query = db.query(queryString); const query = db.query(queryString);
const res = query.run({ spelling, frequency }); const res = query.run({ spelling, frequency });
console.log(res, 'added frequency'); console.log(res, "added frequency");
} }

View File

@ -1,11 +1,10 @@
import { Database } from 'bun:sqlite'; import { Database } from "bun:sqlite";
import { addCard, addCat, addFrequency, addLesson, addWord } from './db'; import { addCard, addCat, addFrequency, addLesson, addWord } from "./db";
import Wordnet from 'en-wordnet';
// const db = new Database('../db/data.db'); // const db = new Database('../db/data.db');
const db = new Database('../db/data.db', { strict: true }); const db = new Database("../db/data.db", { strict: true });
const wndb = new Database('../datasets/en-wordnet/data.sqlite'); const wndb = new Database("../datasets/en-wordnet/data.sqlite");
db.exec('PRAGMA journal_mode = WAL;'); db.exec("PRAGMA journal_mode = WAL;");
const SYMBOL_REGEX = new RegExp(/[\W\d]/); const SYMBOL_REGEX = new RegExp(/[\W\d]/);
@ -33,22 +32,22 @@ const SYMBOL_REGEX = new RegExp(/[\W\d]/);
// } // }
async function englishFreq() { async function englishFreq() {
const file = Bun.file('../datasets/unigram_freq.csv'); const file = Bun.file("../datasets/unigram_freq.csv");
const s = file.stream(); const s = file.stream();
const reader = s.getReader(); const reader = s.getReader();
const decoder = new TextDecoder(); const decoder = new TextDecoder();
let leftover = ''; let leftover = "";
let lineCount = 0; let lineCount = 0;
while (true) { while (true) {
const { value, done } = await reader.read(); const { value, done } = await reader.read();
if (done) break; if (done) break;
const chunk = decoder.decode(value, { stream: true }); const chunk = decoder.decode(value, { stream: true });
const lines = (leftover + chunk).split('\n'); const lines = (leftover + chunk).split("\n");
// Process each line except the last (which might be incomplete) // Process each line except the last (which might be incomplete)
for (const line of lines.slice(0, -1)) { for (const line of lines.slice(0, -1)) {
lineCount++; lineCount++;
const [spelling, _frequency] = line.split(','); const [spelling, _frequency] = line.split(",");
addFrequency(db, spelling, lineCount); addFrequency(db, spelling, lineCount);
} }
@ -77,12 +76,12 @@ function englishIPA() {
domainname: string; domainname: string;
}> = query.all() as any; }> = query.all() as any;
for (const r of res) { for (const r of res) {
console.log('adding word', r); console.log("adding word", r);
// if (r.word === 'abrasive') throw new Error('stop right here'); // if (r.word === 'abrasive') throw new Error('stop right here');
const split = r.word.split(' '); const split = r.word.split(" ");
const type = split.length > 1 ? 'expression' : 'word'; const type = split.length > 1 ? "expression" : "word";
const subtype = null; const subtype = null;
const wordid = addWord(db, r.word, r.pronunciation, 'en-us', type, subtype); const wordid = addWord(db, r.word, r.pronunciation, "en-us", type, subtype);
addCat(db, wordid, r.domainname); addCat(db, wordid, r.domainname);
} }
} }
@ -107,37 +106,37 @@ function englishIPA() {
// } // }
function englishCards() { function englishCards() {
const lesson_id = addLesson(db, 'First Lesson, some easy stuff'); const lesson_id = addLesson(db, "First Lesson, some easy stuff");
const texts = [ const texts = [
'I', "I",
'friend', "friend",
'my friend', "my friend",
'you', "you",
'your friend', "your friend",
"my friends' friend", "my friends' friend",
'you are my friend', "you are my friend",
'I am your friend', "I am your friend",
'your friend is my friend', "your friend is my friend",
'my friend is your friend', "my friend is your friend",
'he is my friend', "he is my friend",
'this is mine', "this is mine",
'this is yours', "this is yours",
"this is my friends'", "this is my friends'",
'no', "no",
'you are not my friend', "you are not my friend",
'this is not yours', "this is not yours",
'your friend is not my friend', "your friend is not my friend",
'that is mine', "that is mine",
'this is mine, that is yours', "this is mine, that is yours",
'he is not your friend', "he is not your friend",
'no, I am not', "no, I am not",
'that is not me', "that is not me",
"that is not mine, that is my friends'", "that is not mine, that is my friends'",
]; ];
for (const text of texts) { for (const text of texts) {
addCard(db, lesson_id, text); addCard(db, lesson_id, text);
} }
} }
englishIPA(); // englishIPA();
englishFreq(); // englishFreq();
englishCards(); englishCards();

View File

@ -1,23 +1,23 @@
import { Database } from 'bun:sqlite'; import { Database } from "bun:sqlite";
import { import {
addUser, addUser,
fetchCard, fetchCard,
fetchLesson, fetchLesson,
fetchLessons, fetchLessons,
fetchResource, fetchResource,
} from './db'; } from "./db";
const db = new Database('../db/data.db', { strict: true }); const db = new Database("../db/data.db", { strict: true });
db.exec('PRAGMA journal_mode = WAL;'); db.exec("PRAGMA journal_mode = WAL;");
Bun.serve({ Bun.serve({
fetch(req) { fetch(req) {
const url = new URL(req.url); const url = new URL(req.url);
console.log(url.pathname, 'url'); console.log(url.pathname, "url");
const user = parseUser(req); const user = parseUser(req);
if (req.method === 'POST' && url.pathname === '/api') if (req.method === "POST" && url.pathname === "/api")
return handlePost(req, user, url); return handlePost(req, user, url);
if (req.method === 'GET' && url.pathname.startsWith('/api')) if (req.method === "GET" && url.pathname.startsWith("/api"))
return handleGet(req, user, url); return handleGet(req, user, url);
return serveStatic(url); return serveStatic(url);
}, },
@ -29,27 +29,27 @@ function parseUser(req: Request): number {
} }
async function serveStatic(url: URL) { async function serveStatic(url: URL) {
const filename = url.pathname === '/' ? '/index.html' : url.pathname; const filename = url.pathname === "/" ? "/index.html" : url.pathname;
const headers = { 'Content-type': 'text/html' }; const headers = { "Content-type": "text/html" };
const opts = { headers }; const opts = { headers };
try { try {
const file = await Bun.file(`../ui/${filename}`).bytes(); const file = await Bun.file(`../ui/${filename}`).bytes();
return new Response(file, opts); return new Response(file, opts);
} catch (_) { } catch (_) {
return new Response('404!'); return new Response("404!");
} }
} }
async function handleGet(_req: Request, user: number, url: URL) { async function handleGet(_req: Request, user: number, url: URL) {
if (url.pathname === '/api/resource') return handleGetExpresion(user, url); if (url.pathname === "/api/resource") return handleGetExpresion(user, url);
if (url.pathname === '/api/card') return handleGetCard(user, url); if (url.pathname === "/api/card") return handleGetCard(user, url);
if (url.pathname === '/api/lesson') return handleGetLesson(user, url); if (url.pathname === "/api/lesson") return handleGetLesson(user, url);
if (url.pathname === '/api/lessons') return handleGetLessons(user, url); if (url.pathname === "/api/lessons") return handleGetLessons(user, url);
else return new Response('huh'); else return new Response("huh");
} }
function handleGetExpresion(user: number, url: URL) { function handleGetExpresion(user: number, url: URL) {
const params = new URLSearchParams(url.search); const params = new URLSearchParams(url.search);
const expression = params.get('exp'); const expression = params.get("exp");
const data = fetchResource(db, expression!); const data = fetchResource(db, expression!);
return Response.json({ ok: data }); return Response.json({ ok: data });
} }
@ -70,20 +70,21 @@ type LessonsDBType = {
}; };
function handleGetLesson(user: number, url: URL) { function handleGetLesson(user: number, url: URL) {
const params = new URLSearchParams(url.search); const params = new URLSearchParams(url.search);
const lesson = params.get('lesson'); const lesson = params.get("lesson");
const data = fetchLesson(db, Number(lesson!)); const data = fetchLesson(db, Number(lesson!));
return Response.json({ ok: data }); return Response.json({ ok: data });
} }
function handleGetCard(user: number, url: URL) { function handleGetCard(user: number, url: URL) {
const params = new URLSearchParams(url.search); const params = new URLSearchParams(url.search);
const card = params.get('card'); const card = params.get("card");
const data = fetchCard(db, Number(card), user); const data = fetchCard(db, Number(card), user);
return Response.json({ ok: data }); return Response.json({ ok: data });
} }
function handleGetLessons(user: number, url: URL) { function handleGetLessons(user: number, url: URL) {
const params = new URLSearchParams(url.search); const params = new URLSearchParams(url.search);
const page = params.get('page') || '0'; const page = params.get("page") || "0";
const data: LessonsDBType[] = fetchLessons(db, 20, Number(page)) as any; const data: LessonsDBType[] = fetchLessons(db, 20, Number(page)) as any;
console.log(data, "fetchlessons");
const lessons = data.reduce((acc: LessonsType, item: LessonsDBType) => { const lessons = data.reduce((acc: LessonsType, item: LessonsDBType) => {
let cur = acc[item.id] || { id: item.id, text: item.ltext, cards: [] }; let cur = acc[item.id] || { id: item.id, text: item.ltext, cards: [] };
const cards = [ const cards = [
@ -93,18 +94,18 @@ function handleGetLessons(user: number, url: URL) {
const def = { ...cur, cards }; const def = { ...cur, cards };
return { ...acc, [item.id]: def }; return { ...acc, [item.id]: def };
}, {} as LessonsType); }, {} as LessonsType);
console.log(lessons, 'lesons'); console.log(lessons, "lesons");
return Response.json({ ok: lessons }); return Response.json({ ok: lessons });
} }
async function handlePost(req: Request, user: number, url: URL) { async function handlePost(req: Request, user: number, url: URL) {
const data = await req.json(); const data = await req.json();
if (url.pathname === '/api/user') return handlePostUser(data); if (url.pathname === "/api/user") return handlePostUser(data);
else return new Response('huh'); else return new Response("huh");
} }
// https://bun.sh/guides/http/server // https://bun.sh/guides/http/server
type PostUser = { name: string; creds: string }; type PostUser = { name: string; creds: string };
function handlePostUser(user: PostUser) { function handlePostUser(user: PostUser) {
addUser(db, user.name, user.creds); addUser(db, user.name, user.creds);
return new Response('ok'); return new Response("ok");
} }

949
ui/elm.js

File diff suppressed because it is too large Load Diff