import { SQL } from "bun";
import { SRSQueries } from "./srs";
// NEW API
//
// https://bun.com/docs/runtime/sql
// import { sql } from "bun";

// // Basic insert with direct values
// const [user] = await sql`
//   INSERT INTO users (name, email)
//   VALUES (${name}, ${email})
//   RETURNING *
// `;

// // Using object helper for cleaner syntax
// const userData = {
//   name: "Alice",
//   email: "alice@example.com",
// };

// const [newUser] = await sql`
//   INSERT INTO users ${sql(userData)}
//   RETURNING *
// `;
// // Expands to: INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com')

const DB_PATH = "/home/y/code/bun/sorlang/bulkdata/db.db";
import type { FullWordDataDB } from "@sortug/langlib";

export class Queries {
  db: SQL;
  ready = false;
  // srs: SRSQueries;
  constructor() {
    console.log("setting sorlang DB");
    const db = new SQL({
      adapter: "sqlite",
      filename: DB_PATH,
      create: false,
    });
    this.db = db;
    // this.srs = new SRSQueries(db);
  }
  async init() {
    try {
      await this.db`PRAGMA journal_mode = WAL`; // Enable Write-Ahead Logging for better performance
      await this.db`PRAGMA foreign_keys = ON`;
      await this.db`PRAGMA cache_size = -8000`; // Increase cache size to 8MB
      await this.db`PRAGMA mmap_size = 30000000000`;
      await this.db`PRAGMA temp_store = MEMORY`; // Store temp tables in memory
      await this.db`PRAGMA synchronous = NORMAL`; // Slightly less safe but faster
      this.ready = true;
    } catch (e) {
      console.error("error starting db", e);
    }
  }

  async fetchExpressionById(id: number) {
    const query = await this.db`
      SELECT * FROM expressions WHERE id = ${id}
      `;
    return query;
  }
  // TODO word_phonetics is MANY TO ONE vs expressions, so it's possible for word_phonetics to lack entries for syllables
  async fetchExpressionBySpelling(
    spelling: string,
    lang: string,
  ): Promise<FullWordDataDB | null> {
    const data = await this.db`
SELECT 
    json_object(
        'id', e.id,
        'spelling', e.spelling,
        'frequency', e.frequency,
        'phonetic', (
            SELECT json_object(
                'id', wp.id,
                'tone_sequence', wp.tone_sequence,
                'syl_seq', wp.syllable_sequence,
                'syllable_count', wp.syllable_count,
                'ipa', wp.ipa,
                'syllables', (
                    SELECT json_group_array(
                        json_object(
                            'stressed', CASE WHEN sw.stressed = 1 THEN json('true') ELSE json('false') END,
                            'long', CASE WHEN s.long = 1 THEN json('true') ELSE json('false') END,
                            'spelling', s.text,
                            'ipa', s.ipa,
                            'onset', (
                                SELECT json_object('ipa', o.ipa, 'spelling', o.text) 
                                FROM onsets o WHERE o.id = s.onset
                            ),
                            'nucleus', (
                                SELECT json_object('ipa', n.ipa, 'spelling', n.text) 
                                FROM nucleus n WHERE n.id = s.nucleus
                            ),
                            'medial', (
                                SELECT json_object('ipa', m.ipa, 'spelling', m.text) 
                                FROM medials m WHERE m.id = s.medial
                            ),
                            'coda', (
                                SELECT json_object('ipa', c.ipa, 'spelling', c.text) 
                                FROM codas c WHERE c.id = s.coda
                            ),
                            'rhyme', (
                                SELECT json_object('ipa', r.ipa, 'spelling', r.text) 
                                FROM rhymes r WHERE r.id = s.rhyme
                            ),
                            'tone', (
                                SELECT json_object('letters', t.ipa, 'numbers', t.nums, 'name', t.name) 
                                FROM tones t WHERE t.id = s.tone
                            )
                        )
                    )
                    FROM syllables_words sw
                    JOIN syllables s ON sw.syl_id = s.id
                    WHERE sw.word_id = wp.id
                    ORDER BY sw.idx ASC
                )
            )
            FROM word_phonetics wp
            WHERE wp.word_id = e.id
            -- Select the most general pronunciation (sense_id IS NULL) or the first available
            ORDER BY (wp.sense_id IS NULL) DESC, wp.id ASC
            LIMIT 1
        ),
        'senses', (
            SELECT json_group_array(
                json_object(
                    'id', sn.id,
                    'confidence', sn.confidence,
                    'etymology', sn.etymology,
                    'pos', sn.pos,
                    'glosses', (
                        SELECT json_group_array(sub.gloss)
                        FROM subsenses sub
                        WHERE sub.sid = sn.id
                    ),
                    'examples', (
                        SELECT json_group_array(
                            json_object(
                                'example', ex.example, 
                                'ref', ex.ref
                            )
                        )
                        FROM examples ex
                        WHERE ex.sid = sn.id
                    ),
                    'categories', (
                        SELECT json_group_array(wc.category)
                        FROM word_categories wc
                        WHERE wc.word_id = sn.id
                    ),
                    'derivation', (
                        SELECT json_group_array(
                            json_object(
                                'type', d.type, 
                                'text', d.text, 
                                'tags', json(d.tags)
                            )
                        )
                        FROM derivation d
                        WHERE d.sid = sn.id
                    )
                )
            )
            FROM senses sn
            WHERE sn.parent_id = e.id
        )
    ) AS full_word_data
FROM expressions e
WHERE e.spelling = ${spelling} AND e.lang = ${lang};
      `;

    if (data.length > 1) {
      console.log({ spelling, lang, data });
      throw new Error("more rows than 1 wtf");
    }
    if (data.length === 0) {
      console.log({ spelling, lang, data });
      return null;
    }
    const row = data[0];
    const json = JSON.parse(row.full_word_data);
    const phonetic = JSON.parse(json.phonetic);
    const obj = { ...json, phonetic };
    return obj;
  }

  // TODO combine with this old query to get both phonetic and semantic data
  // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table

  async fetchSenses(spelling: string, lang: string) {
    const rows = await this.db`
      WITH sense_data AS (
        SELECT
          s.*,
          GROUP_CONCAT(DISTINCT ss.id || ':' || ss.gloss, '|') as subsenses_data,
          GROUP_CONCAT(DISTINCT ex.id || ':' || ex.example || ':' || COALESCE(ex.ref, ''), '|') as examples_data,
          GROUP_CONCAT(DISTINCT d.id || ':' || d.type || ':' || d.text, '|') as derivation_data,
          GROUP_CONCAT(DISTINCT wc.category, '|') as categories_data
        FROM senses s
        LEFT JOIN subsenses ss ON ss.sid = s.id
        LEFT JOIN examples ex ON ex.sid = s.id
        LEFT JOIN derivation d ON d.sid = s.id
        LEFT JOIN word_categories wc ON wc.word_id = s.id
        GROUP BY s.id
      )
      SELECT e.*,
        (SELECT
          json_group_array(json_object(
            'id', sd.id,
            'pos', sd.pos,
            'etymology', sd.etymology,
            'confidence', sd.confidence,
            'subsenses_data', sd.subsenses_data,
            'examples_data', sd.examples_data,
            'derivation_data', sd.derivation_data,
            'categories_data', sd.categories_data
          ))
          FROM sense_data sd
          WHERE sd.parent_id = e.id
        ) as senses_array
      FROM expressions e
      WHERE e.spelling = ${spelling} AND e.lang = ${lang}
      ORDER BY e.frequency DESC`;

    return rows;
  }

  // Tones and syls
  async fetchWordsByToneAndSyls1(tones: Array<string | null>) {
    const toneString = tones
      .reduce((acc: string, item) => {
        if (!item) return `${acc},%`;
        else return `${acc},${item}`;
      }, "")
      .slice(1);
    const data = await this.db`
      WITH word_tone_sequences AS (
          SELECT 
              wp.ipa,
              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
              COUNT(sw.syl_id) as syllable_count
          FROM word_phonetics wp
          JOIN syllables_words sw ON wp.id = sw.word_id
          JOIN syllables sy ON sw.syl_id = sy.id
          JOIN tones t ON sy.tone = t.id
          GROUP BY wp.ipa
      )        
        SELECT *
        FROM word_tone_sequences
        WHERE tone_sequence LIKE ${toneString}
        AND syllable_count = ${tones.length}
      `;
    return data;
  }
  async fetchWordsByToneAndSylsO(tones: Array<string | null>) {
    const toneString = tones
      .reduce((acc: string, item) => {
        if (!item) return `${acc},%`;
        else return `${acc},${item}`;
      }, "")
      .slice(1);
    const data = await this.db`
      WITH word_tone_sequences AS (
          SELECT 
              w.id as word_id,
              w.spelling,
              wp.ipa,
              w.frequency,
              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
              COUNT(sw.syl_id) as syllable_count
          FROM expressions w
          JOIN word_phonetics wp ON w.id = wp.word_id
          JOIN syllables_words sw ON wp.id = sw.word_id
          JOIN syllables sy ON sw.syl_id = sy.id
          JOIN tones t ON sy.tone = t.id
          GROUP BY w.id, w.spelling, w.lang, w.frequency
      )        
        SELECT word_id,
               spelling,
               ipa,
               frequency,
               syl_seq,
               tone_sequence,
               syllable_count
        FROM word_tone_sequences
        WHERE tone_sequence LIKE ${toneString}
        AND syllable_count = ${tones.length}
        ORDER BY frequency ASC NULLS LAST;
      `;
    // TODO combine with this old query to get both phonetic and semantic data
    // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table

    return data;
  }
  async fetchWordsByToneAndSyls(
    tones: Array<string | null>,
  ): Promise<FullWordDataDB[]> {
    const toneString = tones
      .reduce((acc: string, item) => {
        if (!item) return `${acc},%`;
        else return `${acc},${item}`;
      }, "")
      .slice(1);
    const data = await this.db`
      WITH word_tone_sequences AS (
          SELECT 
              w.id as word_id,
              w.spelling,
              wp.ipa,
              w.frequency,
              GROUP_CONCAT(sy.text ORDER BY sw.idx) as syl_seq,
              GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
              COUNT(sw.syl_id) as syllable_count,
              (SELECT
                json_group_array(json_object(
                  'id', s.id,
                  'pos', s.pos,
                  'etymology', s.etymology,
                  'confidence', s.confidence,
                  'glosses', (
                    SELECT json_group_array(ss.gloss)
                    FROM subsenses ss
                    WHERE ss.sid = s.id
                  ),
                  'examples', (
                    SELECT json_group_array(json_object(
                      'example', ex.example,
                      'ref', ex.ref
                    ))
                    FROM examples ex
                    WHERE ex.sid = s.id
                  ),
                  'derivation', (
                    SELECT json_group_array(json_object(
                      'type', d.type,
                      'text', d.text,
                      'tags', d.tags
                    ))
                    FROM derivation d
                    WHERE d.sid = s.id
                  ),
                  'categories', (
                    SELECT json_group_array(wc.category)
                    FROM word_categories wc
                    WHERE wc.word_id = s.id
                  )
                ))
                FROM senses s
                WHERE s.parent_id = w.id
              ) as senses_array
          FROM expressions w
          JOIN word_phonetics wp ON w.id = wp.word_id
          JOIN syllables_words sw ON wp.id = sw.word_id
          JOIN syllables sy ON sw.syl_id = sy.id
          JOIN tones t ON sy.tone = t.id
          GROUP BY w.id, w.spelling, w.lang, w.frequency
      )        
        SELECT *
        FROM word_tone_sequences
        WHERE tone_sequence LIKE ${toneString}
        AND syllable_count = ${tones.length}
        ORDER BY frequency ASC NULLS LAST;
      `;
    // TODO combine with this old query to get both phonetic and semantic data
    // Thing is the tables have changed, used to be that the senses table had JSONB columns for senses, forms and related but those are now proper tables now with one-to-many relation to the senses table

    return data;
  }
}
export default Queries;
// `
//       WITH word_tone_sequences AS (
//           SELECT
//               w.id as word_id,
//               w.spelling,
//               wp.ipa,
//               w.frequency,
//               s.etymology,
//               s.pos,
//               GROUP_CONCAT(s.text ORDER BY sw.idx) as syl_seq,
//               GROUP_CONCAT(t.name ORDER BY sw.idx) as tone_sequence,
//               // GROUP_CONCAT(t.name ORDER BY s.id) as senses,
//               // GROUP_CONCAT(ex.example ORDER BY s.id) as examples,
//               // GROUP_CONCAT(cat.category) as tags,
//               COUNT(sw.syl_id) as syllable_count
//           FROM expressions w
//           JOIN word_phonetics wp ON w.id = wp.word_id
//           JOIN syllables_words sw ON wp.id = sw.word_id
//           JOIN syllables s ON sw.syl_id = s.id
//           JOIN tones t ON s.tone = t.id
//           JOIN senses s ON s.parent_id = w.id
//           JOIN word_categories cat ON s.id = cat.word_id
//           JOIN subsenses ss ON ss.sid = s.id
//           JOIN examples ex ON ex.sid = s.id
//           GROUP BY w.id, w.spelling, w.lang, w.frequency
//       )
//         SELECT
//             word_id,
//             spelling,
//             ipa,
//             frequency,
//             syl_seq,
//             tone_sequence,
//             syllable_count
//         FROM word_tone_sequences
//         WHERE tone_sequence LIKE ?
//         AND syllable_count = ?
//         ORDER BY frequency ASC NULLS LAST;
//       `