import Database from "bun:sqlite";
import { readWiktionaryDump } from "../services/wiki";
import { getStressedSyllable, getSyllableCount } from "../utils";
import useful from "@/lib/useful_thai.json";
import db from ".";
import pdb from "./prosodydb";
import { findLemma } from "../calls/nlp";

const SYMBOL_REGEX = new RegExp(/[\W\d]/);

function goodPos(pos: string): boolean {
  const list = [
    "CC",
    "DT",
    "EX",
    "IN",
    "LS",
    "MD",
    "PDT",
    "POS",
    "PRP",
    "PRP$",
    "RP",
    "TO",
    "WDT",
    "WP",
    "WP$",
  ];
  return list.includes(pos);
}
// function englishKaggle() {
//   handleFile("../datasets/words_pos.csv", (line, idx) => {
//     const [_, spelling, pos] = line.split(",");
//     if (!goodPos(pos)) return;
//     const rowid = addWord(db, spelling, "", "en-us", "word", null);
//     const category = poss[pos] || "unknown;";
//     addCat(db, rowid, category);
//   });
// }
// async function englishIPA() {
//   handleFile("ipa/en-us/ipadict.txt", (line, idx) => {
//     const [spelling, ipa] = line.split(/\s+/);
//     if (!spelling || !ipa) return;
//     const hasSymbols = spelling.match(SYMBOL_REGEX);
//     if (hasSymbols) return;
//     const split = spelling.split(" ");
//     const type = split.length > 1 ? "expression" : "word";
//     const subtype = null;
//     addWord(db, spelling, ipa, "en-us", type, subtype);
//   });
// }

async function englishFreq() {
  handleFile(
    "/home/y/code/prosody/hanchu/datasets/unigram_freq.csv",
    (line, idx) => {
      const [spelling, _frequency] = line.split(",");
      db.addFrequency(spelling, idx);
    },
  );
}
async function thaiFreq() {
  const files = [
    "/home/y/code/prosody/resources/langdata/thai/data/1yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/2yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/3yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/4yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/5yin_freq.csv",
    "/home/y/code/prosody/resources/langdata/thai/data/6yin_freq.csv",
  ];
  for (let f of files) {
    handleFile(f, (line, idx) => {
      const [spelling, IPA, tone, length, frequency, ...rest] = line.split(",");
      db.addFrequency(spelling, Number(frequency));
    });
  }
}

const thaiTones: Record<string, number> = {
  M: 1,
  L: 2,
  F: 3,
  H: 4,
  R: 5,
};
const thaiTones2: Record<string, number> = {
  "˧": 1,
  "˨˩": 2,
  "˥˩": 3,
  "˦˥": 4,
  "˩˩˦": 5,
};
async function thaiSyllables() {
  handleFile(
    "/home/y/code/prosody/prosody/langdata/thai/data/1yin_freq.csv",
    (line, idx) => {
      const [spelling, IPA, toneS, length, frequency, ...rest] =
        line.split(",");
      const isLong = length === "長";
      const tone = thaiTones[toneS];
      const prosody = JSON.stringify({ isLong, tone, lang: "th" });
      db.upsertWord({
        spelling,
        lang: "th",
        ipa: JSON.stringify([{ ipa: IPA, tags: ["sortug"] }]),
        prosody,
        syllables: 1,
        type: "syllable",
        frequency: Number(frequency),
        confidence: 10,
      });
    },
  );
  handleFile(
    "/home/y/code/prosody/prosody/langdata/thai/data/1yinjie.csv",
    (line, idx) => {
      const [spelling, IPA] = line.split(",");
      const isLong = IPA.includes("ː");
      let tone = 0;
      const toneMarks = Object.keys(thaiTones2);
      for (let tm of toneMarks) {
        if (IPA.includes(tm)) tone = thaiTones2[tm];
      }
      const prosody = JSON.stringify({ isLong, tone, lang: "th" });
      db.upsertWord({
        spelling,
        lang: "th",
        ipa: JSON.stringify([{ ipa: IPA, tags: ["sortug"] }]),
        prosody,
        syllables: 1,
        type: "syllable",
        confidence: 10,
      });
    },
  );
}

//   // Save the last incomplete line to process in the next iteration
// }
// TODO no conjunctions or adpositions in Wordnet!!
// function englishWordnet() {
//   // LEFT JOIN lexes_pronunciations ukpr ON ukpr.wordid = words.wordid AND uspr.variety = 'GB'
//   // LEFT JOIN pronunciations ukp ON ukp.pronunciationid = ukpr.pronunciationid
//   const queryString = `
//     WITH ranked_ipa  AS (
//       SELECT
//         lp.wordid,
//         pr.pronunciation,
//         lp.variety,
//         ROW_NUMBER() OVER (
//           PARTITION BY lp.wordid
//           ORDER BY
//               CASE
//                   WHEN lp.variety = 'US' THEN 1
//                   WHEN lp.variety IS NULL THEN 2
//                   WHEN lp.variety IS 'GB' THEN 3
//                   ELSE 4
//               END
//         ) AS rank
//         FROM lexes_pronunciations lp
//         JOIN pronunciations pr ON pr.pronunciationid = lp.pronunciationid
//     )
//     SELECT words.wordid, word, rp.pronunciation as ipa, domainname
//     FROM words
//     LEFT JOIN ranked_ipa rp ON rp.wordid = words.wordid AND rp.rank = 1
//     LEFT JOIN senses ON senses.wordid = words.wordid
//     LEFT JOIN synsets ON synsets.synsetid = senses.synsetid
//     LEFT JOIN domains ON domains.domainid = synsets.domainid
//     GROUP BY words.wordid
//   `;
//   const query = wndb.query(queryString);
//   const res: Array<{
//     word: string;
//     ipa: string;
//     domainname: string;
//   }> = query.all() as any;
//   console.log("res", res.length);
//   for (const r of res) {
//     console.log(r, "r");
//     // if (r.word === 'abrasive') throw new Error('stop right here');
//     const ok = filterWord(r.word);
//     if (!ok) continue;
//     const split = r.word.split(" ");
//     const type = split.length > 1 ? "expression" : "word";
//     const subtype = null;
//     const wordid = addWord(db, r.word, r.ipa, "en-us", type, subtype);
//     const category = domains[r.domainname] || "unknown;";
//     addCat(db, wordid, category);
//   }
// }
function filterWord(s: string) {
  const hasSymbols = s.match(SYMBOL_REGEX);
  if (hasSymbols) return false;
  else return true;
}

// function checkWordNet(word: string) {
//   const query = wndb.query(`SELECT * FROM words WHERE word = $word`);
//   const res = query.get({ $word: word });
//   return !!res;
// }

// function englishCards() {
//   const lesson_id = addLesson(db, "First Lesson, some easy stuff");
//   const texts = [
//     "I",
//     "friend",
//     "my friend",
//     "you",
//     "your friend",
//     "my friends' friend",
//     "you are my friend",
//     "I am your friend",
//     "your friend is my friend",
//     "my friend is your friend",
//     "he is my friend",
//     "this is mine",
//     "this is yours",
//     "this is my friends'",
//     "no",
//     "you are not my friend",
//     "this is not yours",
//     "your friend is not my friend",
//     "that is mine",
//     "this is mine, that is yours",
//     "he is not your friend",
//     "no, I am not",
//     "that is not me",
//     "that is not mine, that is my friends'",
//   ];
//   for (const text of texts) {
//     addCard(db, lesson_id, text);
//   }
// }
// englishWordnet();
// englishFreq();
// englishCards();
// englishKaggle();

async function fillFromDump() {
  await db.init();
  // const log = Bun.file("./stuff.log");
  // const logWriter = log.writer();
  let count = 0;
  const fields = new Set<string>();
  // let biggest = 0;
  for await (const line of readWiktionaryDump()) {
    try {
      count++;
      console.log({ count });
      // if (count > 80) break;
      // if (line.length > biggest) {
      //   biggest = line.length;
      //   Bun.write("./biggest.log", line, { createPath: true });
      // }
      const j = JSON.parse(line);
      db.addLanguage(j.lang_code, j.lang);
      db.addCat(j.pos);
      // for (let key of Object.keys(j)) {
      //   if (!fields.has(key)) {
      //     fields.add(key);
      //     logWriter.write(`${line}\n`);
      //   }
      // }
      if (j.lang_code === "en" || j.lang_code === "th") {
        console.log("saving", j.word);
        // console.log(j.sounds);
        const related = {
          derived: j.derived,
          antonyms: j.antonyms,
          synonyms: j.synonyms,
          related: j.related,
        };
        let rhyme = "";
        let ipaExample = "";
        let ipa: any[] = [];
        for (let snd of j.sounds || []) {
          if ("ipa" in snd) {
            ipa.push(snd);
            if (!ipaExample) ipaExample = snd.ipa;
          }
          if ("rhymes" in snd) rhyme = snd.rhymes;
        }
        const isWord = j.word.trim().split(" ").length === 1;
        const type: any = isWord ? "word" : "expression";
        const syllables = ipaExample ? getSyllableCount(ipaExample) : 0;
        console.log({ ipaExample, syllables });
        let prosody: any = {};
        if (ipaExample) {
          const stressedSyllable = getStressedSyllable(ipaExample);
          if ("ok" in stressedSyllable)
            prosody.stressedSyllable = stressedSyllable.ok;
        }
        if (rhyme) prosody.rhyme = rhyme;
        try {
          const row = db.addWord({
            spelling: j.word,
            lang: j.lang_code,
            ipa: JSON.stringify(ipa),
            prosody: JSON.stringify(prosody),
            syllables,
            type,
          });
          let parent_id: number | bigint;
          if (row.changes === 1) parent_id = row.lastInsertRowid;
          else {
            const data: any = db.fetchExpressionBySpelling(j.word, j.lang_code);
            parent_id = data.id;
          }
          const senseRow = db.addSense({
            id: count - 1,
            parent_id,
            spelling: j.word,
            etymology: j.etymology_text || "",
            pos: j.pos,
            ipa: JSON.stringify(ipa),
            prosody: JSON.stringify(prosody),
            senses: JSON.stringify(j.senses),
            forms: JSON.stringify(j.forms || []),
            related: JSON.stringify(related),
          });
        } catch (e) {
          console.log("error inserting", e);
        }
      }
      // langset.add(j.lang_code);
      // if (j.lang === "Translingual") continue;
      // if (j.lang_code === "en") en++;
      // if (j.lang_code === "th") thai++;
      // if (j.lang_code === "zh") zh++;

      // if (j.word === "cat") {
      //   console.log(j.word);
      //   console.log(Object.keys(j));
      //   console.log(j);
      //   console.log("senses", j.senses);
      //   console.log("forms", j.forms);
      //   // console.log("ett", j.etymology_templates);
      //   // console.log("derived", j.derived);
      //   // const meaning: Meaning = {etymology: j.etymology_text}
      //   // const wd = { lang: j.lang_code, spelling: j.word, ipa, {} };
      //   break;
      // }
    } catch (e) {
      console.log("error parsing", e);
    }
  }
  console.log("fields", fields);
}

function addDecks() {
  // const lesson_id = db.addLesson({
  //   name: "Thai Syllables",
  //   description: "All the syllables in the Thai language ordered by frequency",
  //   lang: "th",
  // });
  const syllables: any[] = db.fetchExpressionRaw({
    confidence: "10",
    syllables: "1",
    lang: "th",
  });
  for (let expression of syllables) {
    db.addCard({
      lesson_id: 5,
      eid: expression.id,
      text: "Syllable",
      mnote: "from Sortug Development",
    });
  }
}
function adjustFrequency(lang: string) {
  const frequencies: Set<number> = new Set();
  const all: any[] = db.fetchExpressionRaw({ lang });
  for (let row of all) {
    if (row.frequency) frequencies.add(row.frequency);
  }
  const freqArray = Array.from(frequencies).sort((a, b) => b - a);
  console.log(freqArray);
  for (let row of all) {
    if (row.frequency) {
      const f = freqArray.indexOf(row.frequency);
      if (f === -1) throw new Error("wtf" + row.frequency);
      db.updateWord(row.id, { frequency: f + 1 });
    }
  }
}

// -- INSERT INTO lessons(name, description) values('8000 Super Useful Expressions', 'David Martins Facebook list of coloquial Thai expressions');
// --  INSERT INTO lang_lessons(lesson_id, lang) VALUES(1, 'th');
// -- INSERT INTO lessons(name, description) values('Thai Syllables', 'All syllables in Thai phonology');
// -- INSERT INTO lang_lessons(lesson_id, lang) VALUES(2, 'th');
function addThaiUseful() {
  let idx = 0;
  for (const level in useful) {
    db.addCat(level);
    const exps = (useful as any)[level];
    console.log(level, exps.length);
    for (const exp of exps) {
      const split = exp.ipa.split("/").filter((s) => s.trim());
      const ipa = split.map((ip: any) => ({ ipa: ip, tags: [] }));
      try {
        idx++;
        const tx = db.db.transaction(() => {
          const wid = db.addWord({
            spelling: exp.spelling,
            lang: "th",
            type: "expression",
            ipa: JSON.stringify(ipa),
          });
          console.log({ wid });
          db.addWCat(wid, level);
          if (exp.register) {
            db.addCat(exp.register);
            db.addWCat(wid, exp.register);
          }
          const glosses = [exp.english];
          if (exp.note) glosses.push(exp.note);
          db.addSense({
            parent_id: wid,
            spelling: exp.spelling,
            senses: JSON.stringify([{ glosses }]),
          });
          db.addCard({
            text: `Super Useful ${idx}`,
            eid: wid as any,
            lesson_id: 1,
          });
        });
        tx();
      } catch (e) {
        console.log({ exp });
        console.error(`${e}`);
        // break;
      }
    }
  }
}

function addThaiSyllablesLesson() {
  const res = db.db
    .query(
      "SELECT id FROM expressions e WHERE e.type = 'syllable' and e.lang = 'th'",
    )
    .all() as any[];
  for (const row of res) {
    db.addCard({ text: "Syllable", eid: row.id, lesson_id: 2 });
  }
}
// function fixIpa() {
//   const res = db.db.query(`SELECT id, ipa FROM expressions`).all() as any[];
//   for (const row of res) {
//     try {
//       const jon = JSON.parse(row.ipa);
//     } catch (_) {
//       const clean: string = row.ipa.replace("...", "").trim();
//       db.db.query(`UPDATE expressions SET ipa = ? WHERE `).run(JSON.stringify(ipa));
//     }
//   }
// }
function fixSyllables() {
  const res = db.db.query(`SELECT ipa, syllables FROM expressions;`).all();
  for (let i = 0; i < 10; i++) {
    // for (const row of res) {
    const row = res[i];
    console.log({ row });
  }
}
// fixSyllables();
// addThaiUseful();
// addThaiSyllablesLesson();

// adjustFrequency("th");

// addDecks();
// fillFromDump();
// thaiSyllables();
// thaiFreq();
//
//
const SORSYL_PATH =
  "/nix/store/lkyi9rrjbr619w3ivpkm89ccf93bvxx5-sorsyl-0.1.0/bin/sorsyl";

async function redump(lang: string) {
  let count = 0;
  const langdb = new Database(
    `/home/y/code/prosody/resources/wiktionary/${lang}.db`,
  );
  const langrows: any = langdb.query("SELECT data FROM langs");
  for (const langrow of langrows) {
    const j = JSON.parse(langrow.data);
    console.log({ j });
    if (count > 10) break;
  }
  // await pdb.init();

  // // const soundTypes = new Set<string>();
  // // [
  // //   "tags", "ipa", "audio", "ogg_url", "mp3_url", "enpr", "rhymes", "homophone", "note", "zh-pron", "other",
  // //   "text", "hangeul", "topics", "form", "audio-ipa"
  // // ]
  // const langs = ["en", "th", "zh", "es", "ja", "vn"];

  // for await (const line of readWiktionaryDump()) {
  //   try {
  //     count++;
  //     console.log({ count });
  //     // if (count > 50) break;
  //     const j = JSON.parse(line);
  //     // console.log(Object.keys(j), j.word);
  //     // add language to db
  //     pdb.addLanguage(j.lang_code, j.lang);
  //     if (!langs.includes(j.lang_code)) continue;
  //     // handleEtim(j);
  //     // handleDerived(j);
  //     // handleSenses(j.pos, j.senses);
  //     // //
  //     const isWord = j.word.trim().split(" ").length === 1;
  //     if (isWord) await handleWord(j);
  //     else await handleIdiom(j);
  //   } catch (e) {
  //     // console.log("error parsing", e);
  //     // break;
  //   }
  // }
}

async function handleWord(j: any) {
  let ts = Date.now();
  const analyzed = await findLemma(j.word, j.lang_code);
  // console.log(analyzed.segments.length);
  if (analyzed.segments.length !== 1)
    return console.error("wtf bruh", analyzed);
  const seg = analyzed.segments[0];
  if (!seg) return console.log("no seg", analyzed);
  const isLemma = analyzed.input === seg.root.lemma;
  if (!isLemma)
    // return console.error("not lemma", {
    //   ...seg,
    //   word: j.word,
    //   input: analyzed.input,
    // });
    return;
  const wordId = pdb.addWord(j.word, j.lang_code);

  const sounds = j.sounds || [];
  const hwikiRhyme = sounds.find((s: any) => "rhymes" in s);
  const wikiRhyme = hwikiRhyme ? hwikiRhyme.rhymes : null;
  for (let snd of sounds) {
    if ("ipa" in snd) handleIpa(wordId, j, snd, wikiRhyme);
  }
}
async function handleIpa(
  wordId: number | bigint,
  j: any,
  snd: any,
  wikiRhyme: string | null,
) {
  const tags = JSON.stringify(snd.tags) || null;
  const ipa = snd.ipa;
  try {
    const hres = await fetch("http://localhost:8104/syls", {
      method: "POST",
      headers: { "content-type": "application/json" },
      body: JSON.stringify({ string: j.word, lang: j.lang_code, ipa }),
    });
    const hjon = await hres.json();
    // console.log(Date.now() - ts, "elapsed in http");
    // ts = Date.now();
    pdb.addPronunciation(
      "word",
      wordId,
      hjon.clean_ipa,
      hjon.syls.length,
      tags,
      null,
    );
    const wordRhyme = hjon.syls.reduce((acc: string, item: SorSyl) => {
      if (!item.stressed && !acc) return acc;
      if (item.stressed && !acc) return `${acc}${item.rhyme}`;
      else return `${acc}${item.ipa}`;
    }, "");
    if (wordRhyme) pdb.addWordRhyme(wordId, wordRhyme, j.lang_code, wikiRhyme);
    // else console.log("no rhyme?", hjon);
    for (const syl of hjon.syls) {
      // TODO ideally syllables would have spelling not IPA... harsh tho
      pdb.addSyllable(
        wordId,
        idx,
        j.lang_code,
        syl.ipa,
        syl.long,
        "",
        syl.onset || null,
        syl.medial || null,
        syl.nucleus,
        syl.coda || null,
        syl.rhyme,
        syl.tone || null,
        null,
      );
    }
    // console.log(Date.now() - ts, "elapsed in db");
    // ts = Date.now();
  } catch (e) {
    // console.error(e);
    // console.error({ snd });
    // break;
  }
}
async function handleIdiom(j: any) {
  console.log(j.word, "idiom");
  pdb.addIdiom(j.word, j.lang_code);
  // TODO IPA of idioms...?
}
async function handleEtim(j: any) {
  console.log(j.etymology_text, "etym");
  console.log(j.etymology_templates, "etym");

  // {
  //    name: "inh",
  //    args: {
  //      "1": "en",
  //      "2": "ang",
  //      "3": "frēo",
  //      "4": "",
  //      "5": "free",
  //    },
  //    expansion: "Old English frēo (“free”)",
  //  },

  console.log(j.head_templates, "head");
  // {
  //    name: "en-verb",
  //    args: {},
  //    expansion: "free (third-person singular simple present frees, present participle freeing, simple past and past participle freed)",
  //  }
}
async function handleDerived(j: any) {
  const { forms, derived, related, antonyms, hyponyms, synonyms, descendants } =
    j;
  console.log("forms", forms);
  // {form: string; tags: string[]}
  console.log("derived", derived);
  // {word: string}
  console.log("related", related);
  // {word: string, source?: string;}
  console.log("ant", antonyms);
  // {word: string, source?: string;}
  console.log("hypo", hyponyms);
  console.log("syno", synonyms);
  // {word: string, source?: string;}
  console.log("desc", descendants);
}
async function handleSenses(pos: string, senses: any[]) {
  console.log("ex", senses[0].examples);
  // {text: string; ref: string; type: "quote"}
  console.log("info", senses[0].info_templates);
  for (const s of senses) {
    // s.glosses[]
    // s.tags[]
  }
}

redump("th");

async function newtest() {
  // const query = pdb.db.query(
  //   `INSERT INTO syllables(text, lang, long, onset, medial, nucleus, coda, rhyme, tone, notes) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
  // );
  // const res = query.run(
  //   "lol",
  //   "en",
  //   true,
  //   "l",
  //   "j",
  //   "o",
  //   "q",
  //   "joq",
  //   null,
  //   null,
  // );
  // const sylId = res.lastInsertRowid;
  const res1 = pdb.db
    .query(
      `INSERT INTO onsets(text, lang) VALUES(?, ?)
         ON CONFLICT(text, lang) DO UPDATE SET
         text = excluded.text
         RETURNING rowid
      `,
    )
    .get("lll", "en");
  console.log({ res1 });
}
// newtest();
// TIL calling shell commands is terribly slow wtf
// Bun.$.env({ FOO: ipa });
// const res = await Bun.$`${SORSYL_PATH} $FOO`;
// const syllables = JSON.parse(res.stdout.toString());
// console.log(Date.now() - ts, "elapsed in py");
// ts = Date.now();