diff options
Diffstat (limited to 'packages/ai/src/gemini.ts')
| -rw-r--r-- | packages/ai/src/gemini.ts | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/packages/ai/src/gemini.ts b/packages/ai/src/gemini.ts new file mode 100644 index 0000000..d8010b0 --- /dev/null +++ b/packages/ai/src/gemini.ts @@ -0,0 +1,199 @@ +// import mime from "mime-types"; +import { + Chat, + createPartFromBase64, + createPartFromUri, + createUserContent, + GoogleGenAI, + type Content, + type ContentListUnion, + type GeneratedImage, + type GeneratedVideo, + type Part, +} from "@google/genai"; +import type { AIModelAPI, InputToken } from "./types"; +import type { AsyncRes, Result } from "@sortug/lib"; + +export default class GeminiAPI implements AIModelAPI { + tokenizer: (text: string) => number; + maxTokens: number; + private model: string; + api: GoogleGenAI; + chats: Map<string, Chat> = new Map<string, Chat>(); + + constructor( + model?: string, + maxTokens = 200_000, + tokenizer: (text: string) => number = (text) => text.length / 3, + ) { + this.maxTokens = maxTokens; + this.tokenizer = tokenizer; + + const gem = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! }); + this.api = gem; + this.model = model || "gemini-2.5-pro"; + } + + // input data in gemini gets pretty involved + // + // data + // Union type + // data can be only one of the following: + // text + // string + // Inline text. + + // inlineData + // object (Blob) + // Inline media bytes. + + // functionCall + // object (FunctionCall) + // A predicted FunctionCall returned from the model that contains a string representing the FunctionDeclaration.name with the arguments and their values. + + // functionResponse + // object (FunctionResponse) + // The result output of a FunctionCall that contains a string representing the FunctionDeclaration.name and a structured JSON object containing any output from the function is used as context to the model. + + // fileData + // object (FileData) + // URI based data. + + // executableCode + // object (ExecutableCode) + // Code generated by the model that is meant to be executed. + + // codeExecutionResult + // object (CodeExecutionResult) + // Result of executing the ExecutableCode. + + // metadata + // Union type + public setModel(model: string) { + this.model = model; + } + private contentFromImage(imageString: string): Result<Part> { + // TODO + // const mimeType = mime.lookup(imageString); + const mimeType = ""; + if (!mimeType) return { error: "no mimetype" }; + const url = URL.parse(imageString); + if (url) { + const part = createPartFromUri(imageString, mimeType); + return { ok: part }; + } else return { ok: createPartFromBase64(imageString, mimeType) }; + } + async inlineImage(imageURI: URL): AsyncRes<Part> { + try { + const imgdata = await fetch(imageURI); + const imageArrayBuffer = await imgdata.arrayBuffer(); + const base64ImageData = Buffer.from(imageArrayBuffer).toString("base64"); + const mimeType = imgdata.headers.get("content-type") || "image/jpeg"; + return { ok: { inlineData: { mimeType, data: base64ImageData } } }; + } catch (e) { + return { error: `${e}` }; + } + } + public buildInput(tokens: InputToken[]): Result<Content> { + try { + const input = createUserContent( + tokens.map((t) => { + if ("text" in t) return t.text; + if ("img" in t) { + const imagePart = this.contentFromImage(t.img); + if ("error" in imagePart) throw new Error("image failed"); + else return imagePart.ok; + } + return "oy vey"; + }), + ); + return { ok: input }; + } catch (e) { + return { error: `${e}` }; + } + } + + async send( + input: string | InputToken[], + systemPrompt?: string, + ): AsyncRes<string> { + let contents: ContentListUnion; + if (typeof input === "string") contents = input; + else { + const built = this.buildInput(input); + if ("error" in built) return built; + else contents = built.ok; + } + try { + const opts = { + model: this.model, + contents, + }; + const fopts = systemPrompt + ? { ...opts, config: { systemInstruction: systemPrompt } } + : opts; + const response = await this.api.models.generateContent(fopts); + if (!response.text) return { error: "no text in response" }; + return { ok: response.text }; + } catch (e) { + return { error: `${e}` }; + } + } + async stream( + input: string | InputToken[], + handler: (s: string) => void, + systemPrompt?: string, + ) { + let contents: ContentListUnion; + if (typeof input === "string") contents = input; + else { + const built = this.buildInput(input); + if ("error" in built) return built; + else contents = built.ok; + } + const opts = { + model: this.model, + contents, + }; + const fopts = systemPrompt + ? { ...opts, config: { systemInstruction: systemPrompt } } + : opts; + const response = await this.api.models.generateContentStream(fopts); + for await (const chunk of response) { + handler(chunk.text || ""); + } + } + + async makeImage(prompt: string): AsyncRes<GeneratedImage[]> { + try { + const response = await this.api.models.generateImages({ + model: this.model, + prompt, + }); + // TODO if empty or undefined return error + return { ok: response.generatedImages || [] }; + } catch (e) { + return { error: `${e}` }; + } + } + async makeVideo({ + prompt, + image, + }: { + prompt?: string; + image?: string; + }): AsyncRes<GeneratedVideo[]> { + try { + const response = await this.api.models.generateVideos({ + model: this.model, + prompt, + }); + // TODO if empty or undefined return error + return { ok: response.response?.generatedVideos || [] }; + } catch (e) { + return { error: `${e}` }; + } + } +} +// TODO how to use caches +// https://ai.google.dev/api/caching |
