import mime from "mime-types"; import { Chat, createPartFromBase64, createPartFromUri, createUserContent, GoogleGenAI, type Content, type GeneratedImage, type GeneratedVideo, type Part, } from "@google/genai"; import type { AIModelAPI, InputToken } from "./types"; import type { AsyncRes, Result } from "sortug"; export default class GeminiAPI implements AIModelAPI { tokenizer: (text: string) => number; maxTokens: number; private model: string; api: GoogleGenAI; chats: Map = new Map(); constructor( model?: string, maxTokens = 200_000, tokenizer: (text: string) => number = (text) => text.length / 3, ) { this.maxTokens = maxTokens; this.tokenizer = tokenizer; const gem = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! }); this.api = gem; this.model = model || "gemini-2.5-pro"; } // input data in gemini gets pretty involved // // data // Union type // data can be only one of the following: // text // string // Inline text. // inlineData // object (Blob) // Inline media bytes. // functionCall // object (FunctionCall) // A predicted FunctionCall returned from the model that contains a string representing the FunctionDeclaration.name with the arguments and their values. // functionResponse // object (FunctionResponse) // The result output of a FunctionCall that contains a string representing the FunctionDeclaration.name and a structured JSON object containing any output from the function is used as context to the model. // fileData // object (FileData) // URI based data. // executableCode // object (ExecutableCode) // Code generated by the model that is meant to be executed. // codeExecutionResult // object (CodeExecutionResult) // Result of executing the ExecutableCode. // metadata // Union type public setModel(model: string) { this.model = model; } private contentFromImage(imageString: string): Result { // TODO const mimeType = mime.lookup(imageString); if (!mimeType) return { error: "no mimetype" }; const url = URL.parse(imageString); if (url) { const part = createPartFromUri(imageString, mimeType); return { ok: part }; } else return { ok: createPartFromBase64(imageString, mimeType) }; } public buildInput(tokens: InputToken[]): Result { try { const input = createUserContent( tokens.map((t) => { if ("text" in t) return t.text; if ("img" in t) { const imagePart = this.contentFromImage(t.img); if ("error" in imagePart) throw new Error("image failed"); else return imagePart.ok; } return "oy vey"; }), ); return { ok: input }; } catch (e) { return { error: `${e}` }; } } async send(input: string | Content, systemPrompt?: string): AsyncRes { try { const opts = { model: this.model, contents: input, }; const fopts = systemPrompt ? { ...opts, config: { systemInstruction: systemPrompt } } : opts; const response = await this.api.models.generateContent(fopts); if (!response.text) return { error: "no text in response" }; return { ok: response.text }; } catch (e) { return { error: `${e}` }; } } async stream( input: string | Content, handler: (s: string) => void, systemPrompt?: string, ) { const opts = { model: this.model, contents: input, }; const fopts = systemPrompt ? { ...opts, config: { systemInstruction: systemPrompt } } : opts; const response = await this.api.models.generateContentStream(fopts); for await (const chunk of response) { handler(chunk.text || ""); } } async makeImage(prompt: string): AsyncRes { try { const response = await this.api.models.generateImages({ model: this.model, prompt, }); // TODO if empty or undefined return error return { ok: response.generatedImages || [] }; } catch (e) { return { error: `${e}` }; } } async makeVideo({ prompt, image, }: { prompt?: string; image?: string; }): AsyncRes { try { const response = await this.api.models.generateVideos({ model: this.model, prompt, }); // TODO if empty or undefined return error return { ok: response.response?.generatedVideos || [] }; } catch (e) { return { error: `${e}` }; } } } // TODO how to use caches // https://ai.google.dev/api/caching