import Database from "bun:sqlite";
import {
  analyzeTHWord,
  deconstructSyllable,
  segmentateThai,
  type SorSyl,
  type ThaiNLPRes,
  sorSyl,
  getThaiFreq,
} from "../calls/nlp";
import pdb from "./prosodydb";
import { cleanIpa } from "../utils";
import { handleFile } from "./utils";
import { Tone } from "../types/phonetics";

async function readDump(lang: string) {
  await pdb.init();
  pdb.addLanguage("th", "thai");
  let count = 0;
  const langdb = new Database(
    `/home/y/code/prosody/resources/wiktionary/${lang}.db`,
  );
  let langrows: any = langdb.query("SELECT data FROM langs");
  // langrows = langrows.slice(10);
  for (const langrow of langrows) {
    count++;
    // console.log(count);
    // if (count <= 10000) continue;
    // if (count > 100) break;
    const j = JSON.parse(langrow.data);
    const word = j.word.trim();
    if (!word) continue;
    if (word.includes("ๆ")) await handleWord(word, j);
    else {
      const split = word.split(" ");
      if (split.length > 1) await handleIdiom(word);
      else await handleWord(word, j);
    }
  }
}

async function handleWord(word: string, j: any) {
  // TODO add categories but add a tag to see what classifying scheme we're using
  //
  const sounds = j.sounds || [];
  const hasIpa = sounds.find((s: any) => "ipa" in s);
  if (!hasIpa) return;
  const freq = await getThaiFreq(word);
  const wordId = pdb.addWord(word, "th", freq, null);
  const analyzed = await analyzeTHWord(word);
  for (let snd of sounds) if ("ipa" in snd) handleIpa(wordId, j, snd, analyzed);
}
async function handleIpa(
  wordId: number | bigint,
  j: any,
  snd: any,
  analyzed: ThaiNLPRes,
) {
  const tags = JSON.stringify(snd.tags) || null;
  // console.log("handleipa", analyzed.syllables.length);
  // console.log(analyzed);
  const wikiIpa = cleanIpa(snd.ipa);
  const nlpIpa = cleanIpa(analyzed.ipa);
  const ipa = wikiIpa || nlpIpa;
  const wikiIpaSplit = wikiIpa.split(".");
  const nlpIpaSplit = nlpIpa.split(".");
  if (wikiIpaSplit.length !== nlpIpaSplit.length) {
    // console.log("ipa mismatch");
    // console.log(wikiIpa);
    // console.log(nlpIpa);
  }
  if (analyzed.realSyls.length !== wikiIpaSplit.length) {
    // console.log("syllable analysis mismatch", j.word);
    // console.log({ syls: analyzed.syllables, ipa: wikiIpaSplit });
    // console.dir(j, { depth: null });
    return;
  }
  pdb.addPronunciation(wordId, ipa, analyzed.syllables.length, tags, null);
  const writtenSyls = analyzed.syllables;
  const pronouncedSyls = analyzed.realSyls;
  let badSyls = false;
  if (writtenSyls.length !== pronouncedSyls.length) badSyls = true;

  for (let i = 0; i < pronouncedSyls.length; i++) {
    const pronounced = pronouncedSyls[i]!.replace(/\u{E3A}/u, "");
    const written = writtenSyls[i] || "";
    const syllable = badSyls ? pronounced : written;
    const ipa = wikiIpaSplit[i]!;
    // TODO insert both??
    const notes = pronounced === written ? null : `Pronounced ${pronounced}`;
    if (pronounced !== syllable) {
      console.log("diff");
      console.log(pronounced);
      console.log(written);
    }
    try {
      await handleSyllable(syllable, ipa, wordId, i, notes);
    } catch (e) {
      console.error("syl error", j.word, j.sounds);
      console.error({ analyzed, ipa, wikiIpaSplit });
      console.error(e);
    }
  }
}
const thaiTones: Record<string, string> = {
  "˧": "mid",
  "˨˩": "low",
  "˥˩": "falling",
  "˦˥": "high",
  "˩˩˦": "rising",
};
const thaiToneNums: Record<string, number> = {
  "˧": 33,
  "˨˩": 21,
  "˥˩": 41,
  "˦˥": 45,
  "˩˩˦": 214,
};
function parseTone(ipa: string, spelling: string): Tone {
  try {
    const name = thaiTones[ipa]!;
    const numbers = thaiToneNums[ipa]!;
    return { letters: ipa, name, numbers };
  } catch (e) {
    console.error("wrong tones!!", { s: spelling, ipa });
    throw new Error("");
  }
}

async function handleSyllable(
  spelling: string,
  ipa: string,
  wordId: number | bigint,
  idx: number,
  notes: string | null,
) {
  const sorsyl = await sorSyl(spelling, "th", ipa);
  const weird = [
    // "a̯n",
    // "a̯",
    // "a̯p",
    // "a̯w",
    // "a̯j",
    // "a̯ŋ",
    // "a̯k",
    // "a̯t",
    // "a̯m",
    // "a̯ʔ",
    // "ʔ",
    "s",
    "l",
    "f",
    "a̯s",
    "js",
    "t͡ɕʰ",
    "ks",
    "ns",
    "a̯l",
    "a̯f",
    "mk",
  ];
  // const weirder = sorsyl.syls.find((s) => weird.includes(s.coda));
  // if (weirder) {
  //   console.log("syllable", spelling);
  //   // console.dir(sorsyl, { depth: null });
  //   // console.dir(j, { depth: null });
  // }
  if (sorsyl.syls.length !== 1) throw new Error("wtf sorsyl!");
  const syl = sorsyl.syls[0]!;
  const tone = parseTone(syl.tone, spelling);
  try {
    pdb.addSyllable(
      wordId,
      idx + 1,
      "th",
      syl.ipa,
      syl.long,
      spelling,
      { spelling: syl.onset, ipa: syl.onset },
      { spelling: syl.medial, ipa: syl.medial },
      { spelling: syl.nucleus, ipa: syl.nucleus },
      { spelling: syl.coda, ipa: syl.coda },
      { spelling: syl.rhyme, ipa: syl.rhyme },
      tone,
      notes,
    );
  } catch (e) {
    // console.log("well fuck", syl);
    // console.error(e);
    console.log();
  }
}
async function handleIdiom(idiom: string) {
  pdb.addIdiom(idiom, "th");
  // TODO later set idiom_words once all words are populated
  // console.log();
}
// ช้า ๆ
// งก ๆ
// หงก ๆ

async function getFrequency() {
  const files = [
    "/home/y/code/prosody/resources/langdata/thai/data/1yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/2yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/3yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/4yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/5yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/6yin_freq.csv",
  ];
  const freqMap = new Map<number, string>();
  for (const file of files) {
    await handleFile(file, (line, idx) => {
      const [spelling, IPA, tone, length, frequency, ...rest] = line.split(",");
      freqMap.set(Number(frequency!), spelling!);
    });
  }
  const orderedMap = new Map<string, number>();
  const keys = Array.from(freqMap.keys()).sort();
  for (let i = 0; i < keys.length; i++) {
    const val = freqMap.get(keys[i]!)!;
    orderedMap.set(val, i + 1);
  }
  return orderedMap;
}

readDump("th");