1 files changed, 199 insertions, 0 deletions
diff --git a/packages/ai/src/gemini.ts b/packages/ai/src/gemini.ts
new file mode 100644
index 0000000..d8010b0
--- /dev/null
+++ b/packages/ai/src/gemini.ts
@@ -0,0 +1,199 @@
+// import mime from "mime-types";
+import {
+  Chat,
+  createPartFromBase64,
+  createPartFromUri,
+  createUserContent,
+  GoogleGenAI,
+  type Content,
+  type ContentListUnion,
+  type GeneratedImage,
+  type GeneratedVideo,
+  type Part,
+} from "@google/genai";
+import type { AIModelAPI, InputToken } from "./types";
+import type { AsyncRes, Result } from "@sortug/lib";
+
+export default class GeminiAPI implements AIModelAPI {
+  tokenizer: (text: string) => number;
+  maxTokens: number;
+  private model: string;
+  api: GoogleGenAI;
+  chats: Map<string, Chat> = new Map<string, Chat>();
+
+  constructor(
+    model?: string,
+    maxTokens = 200_000,
+    tokenizer: (text: string) => number = (text) => text.length / 3,
+  ) {
+    this.maxTokens = maxTokens;
+    this.tokenizer = tokenizer;
+
+    const gem = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! });
+    this.api = gem;
+    this.model = model || "gemini-2.5-pro";
+  }
+
+  // input data in  gemini gets pretty involved
+  //
+  // data
+  // Union type
+  // data can be only one of the following:
+  // text
+  // string
+  // Inline text.
+
+  // inlineData
+  // object (Blob)
+  // Inline media bytes.
+
+  // functionCall
+  // object (FunctionCall)
+  // A predicted FunctionCall returned from the model that contains a string representing the FunctionDeclaration.name with the arguments and their values.
+
+  // functionResponse
+  // object (FunctionResponse)
+  // The result output of a FunctionCall that contains a string representing the FunctionDeclaration.name and a structured JSON object containing any output from the function is used as context to the model.
+
+  // fileData
+  // object (FileData)
+  // URI based data.
+
+  // executableCode
+  // object (ExecutableCode)
+  // Code generated by the model that is meant to be executed.
+
+  // codeExecutionResult
+  // object (CodeExecutionResult)
+  // Result of executing the ExecutableCode.
+
+  // metadata
+  // Union type
+  public setModel(model: string) {
+    this.model = model;
+  }
+  private contentFromImage(imageString: string): Result<Part> {
+    // TODO
+    // const mimeType = mime.lookup(imageString);
+    const mimeType = "";
+    if (!mimeType) return { error: "no mimetype" };
+    const url = URL.parse(imageString);
+    if (url) {
+      const part = createPartFromUri(imageString, mimeType);
+      return { ok: part };
+    } else return { ok: createPartFromBase64(imageString, mimeType) };
+  }
+  async inlineImage(imageURI: URL): AsyncRes<Part> {
+    try {
+      const imgdata = await fetch(imageURI);
+      const imageArrayBuffer = await imgdata.arrayBuffer();
+      const base64ImageData = Buffer.from(imageArrayBuffer).toString("base64");
+      const mimeType = imgdata.headers.get("content-type") || "image/jpeg";
+      return { ok: { inlineData: { mimeType, data: base64ImageData } } };
+    } catch (e) {
+      return { error: `${e}` };
+    }
+  }
+  public buildInput(tokens: InputToken[]): Result<Content> {
+    try {
+      const input = createUserContent(
+        tokens.map((t) => {
+          if ("text" in t) return t.text;
+          if ("img" in t) {
+            const imagePart = this.contentFromImage(t.img);
+            if ("error" in imagePart) throw new Error("image failed");
+            else return imagePart.ok;
+          }
+          return "oy vey";
+        }),
+      );
+      return { ok: input };
+    } catch (e) {
+      return { error: `${e}` };
+    }
+  }
+
+  async send(
+    input: string | InputToken[],
+    systemPrompt?: string,
+  ): AsyncRes<string> {
+    let contents: ContentListUnion;
+    if (typeof input === "string") contents = input;
+    else {
+      const built = this.buildInput(input);
+      if ("error" in built) return built;
+      else contents = built.ok;
+    }
+    try {
+      const opts = {
+        model: this.model,
+        contents,
+      };
+      const fopts = systemPrompt
+        ? { ...opts, config: { systemInstruction: systemPrompt } }
+        : opts;
+      const response = await this.api.models.generateContent(fopts);
+      if (!response.text) return { error: "no text in response" };
+      return { ok: response.text };
+    } catch (e) {
+      return { error: `${e}` };
+    }
+  }
+  async stream(
+    input: string | InputToken[],
+    handler: (s: string) => void,
+    systemPrompt?: string,
+  ) {
+    let contents: ContentListUnion;
+    if (typeof input === "string") contents = input;
+    else {
+      const built = this.buildInput(input);
+      if ("error" in built) return built;
+      else contents = built.ok;
+    }
+    const opts = {
+      model: this.model,
+      contents,
+    };
+    const fopts = systemPrompt
+      ? { ...opts, config: { systemInstruction: systemPrompt } }
+      : opts;
+    const response = await this.api.models.generateContentStream(fopts);
+    for await (const chunk of response) {
+      handler(chunk.text || "");
+    }
+  }
+
+  async makeImage(prompt: string): AsyncRes<GeneratedImage[]> {
+    try {
+      const response = await this.api.models.generateImages({
+        model: this.model,
+        prompt,
+      });
+      // TODO if empty or undefined return error
+      return { ok: response.generatedImages || [] };
+    } catch (e) {
+      return { error: `${e}` };
+    }
+  }
+  async makeVideo({
+    prompt,
+    image,
+  }: {
+    prompt?: string;
+    image?: string;
+  }): AsyncRes<GeneratedVideo[]> {
+    try {
+      const response = await this.api.models.generateVideos({
+        model: this.model,
+        prompt,
+      });
+      // TODO if empty or undefined return error
+      return { ok: response.response?.generatedVideos || [] };
+    } catch (e) {
+      return { error: `${e}` };
+    }
+  }
+}
+// TODO how to use caches
+// https://ai.google.dev/api/caching