summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/claude.ts22
-rw-r--r--src/gemini.ts207
-rw-r--r--src/gemini2.ts149
-rw-r--r--src/generic.ts (renamed from src/model.ts)26
-rw-r--r--src/nlp/index.ts7
-rw-r--r--src/nlp/iso.ts10
-rw-r--r--src/nlp/nlp.ts208
-rw-r--r--src/nlp/ocr.ts18
-rw-r--r--src/nlp/spacy.ts79
-rw-r--r--src/nlp/stanza.ts210
-rw-r--r--src/nlp/types.ts50
-rw-r--r--src/openai.ts18
-rw-r--r--src/types/index.ts18
13 files changed, 885 insertions, 137 deletions
diff --git a/src/claude.ts b/src/claude.ts
index 377316e..2a56bc1 100644
--- a/src/claude.ts
+++ b/src/claude.ts
@@ -1,20 +1,30 @@
import Claude from "@anthropic-ai/sdk";
import { RESPONSE_LENGTH } from "./logic/constants";
-import type { AResult, ChatMessage, OChoice, OChunk, OMessage } from "./types";
+import type {
+ AIModelAPI,
+ ChatMessage,
+ OChoice,
+ OChunk,
+ OMessage,
+} from "./types";
import { BOOKWORM_SYS } from "./prompts";
+import type { AsyncRes } from "sortug";
type Message = Claude.Messages.MessageParam;
-export default class Conversation {
- private tokenizer: (text: string) => number;
- private maxTokens: number;
- model: string = "claude-3-5-sonnet-20241022";
+export default class ClaudeAPI implements AIModelAPI {
+ private model: string = "claude-3-7-sonnet-20250219";
+ tokenizer: (text: string) => number;
+ maxTokens: number;
+ // model: string = "claude-3-5-sonnet-20241022";
constructor(
maxTokens = 200_000,
tokenizer: (text: string) => number = (text) => text.length / 3,
+ model?: string,
) {
this.maxTokens = maxTokens;
this.tokenizer = tokenizer;
+ if (model) this.model = model;
}
public setModel(model: string) {
this.model = model;
@@ -101,7 +111,7 @@ export default class Conversation {
system: string,
messages: Message[],
isR1: boolean = false,
- ): Promise<AResult<string[]>> {
+ ): Promise<AsyncRes<string[]>> {
try {
const claud = new Claude();
// const list = await claud.models.list();
diff --git a/src/gemini.ts b/src/gemini.ts
index 2f685a2..3e636c2 100644
--- a/src/gemini.ts
+++ b/src/gemini.ts
@@ -1,137 +1,132 @@
import {
- GenerativeModel,
- GoogleGenerativeAI,
+ Chat,
+ GoogleGenAI,
type Content,
- type GenerateContentResult,
-} from "@google/generative-ai";
+ type GeneratedImage,
+ type GeneratedVideo,
+} from "@google/genai";
import { RESPONSE_LENGTH } from "./logic/constants";
-import type { AResult, ChatMessage, OChoice, OChunk, OMessage } from "./types";
+import type {
+ AIModelAPI,
+ ChatMessage,
+ OChoice,
+ OChunk,
+ OMessage,
+} from "./types";
+import type { AsyncRes } from "sortug";
-export default class Conversation {
- private tokenizer: (text: string) => number;
- private maxTokens: number;
- private model: GenerativeModel;
+export default class GeminiAPI {
+ tokenizer: (text: string) => number;
+ maxTokens: number;
+ private model: string;
+ api: GoogleGenAI;
+ chats: Map<string, Chat> = new Map<string, Chat>();
constructor(
maxTokens = 200_000,
tokenizer: (text: string) => number = (text) => text.length / 3,
+ model?: string,
) {
this.maxTokens = maxTokens;
this.tokenizer = tokenizer;
- const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!);
- this.model = gem.getGenerativeModel({
- model: "gemini-2.0-flash-exp",
- generationConfig: { maxOutputTokens: RESPONSE_LENGTH },
- });
+ const gem = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! });
+ this.api = gem;
+ this.model = model || "gemini-2.5-pro-preview-05-06 ";
}
- public setModel(model: string) {
- const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!);
- this.model = gem.getGenerativeModel({
- model,
- generationConfig: { maxOutputTokens: RESPONSE_LENGTH },
- });
+ createChat({ name, history }: { name?: string; history?: Content[] }) {
+ const chat = this.api.chats.create({ model: this.model, history });
+ this.chats.set(name ? name : Date.now().toString(), chat);
}
- private mapMessages(input: ChatMessage[]): Content[] {
- return input.map((m) => ({
- role: m.author === "gemini" ? "model" : "user",
- parts: [{ text: m.text }],
- }));
+ async followChat(name: string, message: string): AsyncRes<string> {
+ const chat = this.chats.get(name);
+ if (!chat) return { error: "no chat with that name" };
+ else {
+ const response = await chat.sendMessage({ message });
+ const text = response.text;
+ return { ok: text || "" };
+ }
}
-
- private mapMessagesR1(input: ChatMessage[]): Content[] {
- return input.reduce((acc: Content[], m, i) => {
- const prev = acc[i - 1];
- const role = m.author === "gemini" ? "model" : "user";
- const msg = { role, parts: [{ text: m.text }] };
- if (prev?.role === role) acc[i - 1] = msg;
- else acc = [...acc, msg];
- return acc;
- }, []);
+ async followChatStream(
+ name: string,
+ message: string,
+ handler: (data: string) => void,
+ ) {
+ const chat = this.chats.get(name);
+ if (!chat) throw new Error("no chat!");
+ else {
+ const response = await chat.sendMessageStream({ message });
+ for await (const chunk of response) {
+ const text = chunk.text;
+ handler(text || "");
+ }
+ }
}
- private async apiCall(
- messages: Content[],
- isR1: boolean = false,
- ): Promise<AResult<string[]>> {
+ async send(message: string, systemPrompt?: string): AsyncRes<string> {
try {
- const chat = this.model.startChat({ history: messages });
- const res = await chat.sendMessage("");
- return { ok: [res.response.text()] };
+ const opts = {
+ model: this.model,
+ contents: message,
+ };
+ const fopts = systemPrompt
+ ? { ...opts, config: { systemInstruction: systemPrompt } }
+ : opts;
+ const response = await this.api.models.generateContent(fopts);
+ return { ok: response.text || "" };
} catch (e) {
- console.log(e, "error in gemini api");
return { error: `${e}` };
}
}
+ async sendStream(
+ handler: (s: string) => void,
+ message: string,
+ systemPrompt?: string,
+ ) {
+ const opts = {
+ model: this.model,
+ contents: message,
+ };
+ const fopts = systemPrompt
+ ? { ...opts, config: { systemInstruction: systemPrompt } }
+ : opts;
+ const response = await this.api.models.generateContentStream(fopts);
+ for await (const chunk of response) {
+ handler(chunk.text || "");
+ }
+ }
- private async apiCallStream(
- messages: Content[],
- handle: (c: any) => void,
- isR1: boolean = false,
- ): Promise<void> {
+ async makeImage(prompt: string): AsyncRes<GeneratedImage[]> {
try {
- const chat = this.model.startChat({ history: messages });
- const res = await chat.sendMessage("");
- // for await (const chunk of res.stream()) {
- // handle(chunk.text());
- // }
+ const response = await this.api.models.generateImages({
+ model: this.model,
+ prompt,
+ });
+ // TODO if empty or undefined return error
+ return { ok: response.generatedImages || [] };
} catch (e) {
- console.log(e, "error in gemini api");
- handle(`Error streaming Gemini, ${e}`);
+ return { error: `${e}` };
}
}
-
- public async send(sys: string, input: ChatMessage[]) {
- const messages = this.mapMessages(input);
- const truncated = this.truncateHistory(messages);
- const res = await this.apiCall(truncated);
- return res;
- }
-
- public async sendR1(input: ChatMessage[]) {
- const messages = this.mapMessagesR1(input);
- const truncated = this.truncateHistory(messages);
- const res = await this.apiCall(truncated, true);
- return res;
- }
-
- public async stream(
- sys: string,
- input: ChatMessage[],
- handle: (c: any) => void,
- ) {
- const messages = this.mapMessages(input);
- const truncated = this.truncateHistory(messages);
- await this.apiCallStream(truncated, handle);
- }
-
- public async streamR1(input: ChatMessage[], handle: (c: any) => void) {
- const messages = this.mapMessagesR1(input);
- const truncated = this.truncateHistory(messages);
- await this.apiCallStream(truncated, handle, true);
- }
-
- public async sendDoc(data: ArrayBuffer, mimeType: string, prompt: string) {
- const res = await this.model.generateContent([
- {
- inlineData: {
- data: Buffer.from(data).toString("base64"),
- mimeType,
- },
- },
- prompt,
- ]);
- return res;
- }
-
- private truncateHistory(messages: Content[]): Content[] {
- const totalTokens = messages.reduce((total, message) => {
- return total + this.tokenizer(message.parts[0].text || "");
- }, 0);
- while (totalTokens > this.maxTokens && messages.length > 1) {
- messages.splice(0, 1);
+ async makeVideo({
+ prompt,
+ image,
+ }: {
+ prompt?: string;
+ image?: string;
+ }): AsyncRes<GeneratedVideo[]> {
+ try {
+ const response = await this.api.models.generateVideos({
+ model: this.model,
+ prompt,
+ });
+ // TODO if empty or undefined return error
+ return { ok: response.response?.generatedVideos || [] };
+ } catch (e) {
+ return { error: `${e}` };
}
- return messages;
}
}
+// TODO how to use caches
+// https://ai.google.dev/api/caching
diff --git a/src/gemini2.ts b/src/gemini2.ts
new file mode 100644
index 0000000..291553f
--- /dev/null
+++ b/src/gemini2.ts
@@ -0,0 +1,149 @@
+import {
+ GenerativeModel,
+ GoogleGenerativeAI,
+ type Content,
+ type GenerateContentResult,
+} from "@google/generative-ai";
+import { RESPONSE_LENGTH } from "./logic/constants";
+import type {
+ AIModelAPI,
+ ChatMessage,
+ OChoice,
+ OChunk,
+ OMessage,
+} from "./types";
+import type { AsyncRes } from "sortug";
+
+export default class GeminiAPI implements AIModelAPI {
+ tokenizer: (text: string) => number;
+ maxTokens: number;
+ private model: GenerativeModel;
+
+ constructor(
+ maxTokens = 200_000,
+ tokenizer: (text: string) => number = (text) => text.length / 3,
+ model?: string,
+ ) {
+ this.maxTokens = maxTokens;
+ this.tokenizer = tokenizer;
+
+ const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!);
+ this.model = gem.getGenerativeModel({
+ // model: model || "gemini-2.0-flash-exp",
+ model: model || "gemini-2.5-pro-preview-05-06 ",
+ generationConfig: { maxOutputTokens: RESPONSE_LENGTH },
+ });
+ }
+
+ public setModel(model: string) {
+ const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!);
+ this.model = gem.getGenerativeModel({
+ model,
+ generationConfig: { maxOutputTokens: RESPONSE_LENGTH },
+ });
+ }
+ private mapMessages(input: ChatMessage[]): Content[] {
+ return input.map((m) => ({
+ role: m.author === "gemini" ? "model" : "user",
+ parts: [{ text: m.text }],
+ }));
+ }
+
+ private mapMessagesR1(input: ChatMessage[]): Content[] {
+ return input.reduce((acc: Content[], m, i) => {
+ const prev = acc[i - 1];
+ const role = m.author === "gemini" ? "model" : "user";
+ const msg = { role, parts: [{ text: m.text }] };
+ if (prev?.role === role) acc[i - 1] = msg;
+ else acc = [...acc, msg];
+ return acc;
+ }, []);
+ }
+
+ private async apiCall(
+ messages: Content[],
+ isR1: boolean = false,
+ ): Promise<AsyncRes<string[]>> {
+ try {
+ const chat = this.model.startChat({ history: messages });
+ const res = await chat.sendMessage("");
+ return { ok: [res.response.text()] };
+ } catch (e) {
+ console.log(e, "error in gemini api");
+ return { error: `${e}` };
+ }
+ }
+
+ private async apiCallStream(
+ messages: Content[],
+ handle: (c: any) => void,
+ isR1: boolean = false,
+ ): Promise<void> {
+ try {
+ const chat = this.model.startChat({ history: messages });
+ const res = await chat.sendMessage("");
+ // for await (const chunk of res.stream()) {
+ // handle(chunk.text());
+ // }
+ } catch (e) {
+ console.log(e, "error in gemini api");
+ handle(`Error streaming Gemini, ${e}`);
+ }
+ }
+
+ public async send(sys: string, input: ChatMessage[]) {
+ console.log({ sys, input });
+ this.model.systemInstruction = { role: "system", parts: [{ text: sys }] };
+ const messages = this.mapMessages(input);
+ const truncated = this.truncateHistory(messages);
+ const res = await this.apiCall(truncated);
+ return res;
+ }
+
+ public async sendR1(input: ChatMessage[]) {
+ const messages = this.mapMessagesR1(input);
+ const truncated = this.truncateHistory(messages);
+ const res = await this.apiCall(truncated, true);
+ return res;
+ }
+
+ public async stream(
+ sys: string,
+ input: ChatMessage[],
+ handle: (c: any) => void,
+ ) {
+ this.model.systemInstruction = { role: "system", parts: [{ text: sys }] };
+ const messages = this.mapMessages(input);
+ const truncated = this.truncateHistory(messages);
+ await this.apiCallStream(truncated, handle);
+ }
+
+ public async streamR1(input: ChatMessage[], handle: (c: any) => void) {
+ const messages = this.mapMessagesR1(input);
+ const truncated = this.truncateHistory(messages);
+ await this.apiCallStream(truncated, handle, true);
+ }
+
+ public async sendDoc(data: ArrayBuffer, mimeType: string, prompt: string) {
+ const res = await this.model.generateContent([
+ {
+ inlineData: {
+ data: Buffer.from(data).toString("base64"),
+ mimeType,
+ },
+ },
+ prompt,
+ ]);
+ return res;
+ }
+
+ private truncateHistory(messages: Content[]): Content[] {
+ const totalTokens = messages.reduce((total, message) => {
+ return total + this.tokenizer(message.parts[0].text || "");
+ }, 0);
+ while (totalTokens > this.maxTokens && messages.length > 1) {
+ messages.splice(0, 1);
+ }
+ return messages;
+ }
+}
diff --git a/src/model.ts b/src/generic.ts
index 39b42dc..50c4435 100644
--- a/src/model.ts
+++ b/src/generic.ts
@@ -1,29 +1,30 @@
import OpenAI from "openai";
import { MAX_TOKENS, RESPONSE_LENGTH } from "./logic/constants";
-import type { AResult, ChatMessage, OChoice } from "./types";
+import type { AIModelAPI, ChatMessage, OChoice } from "./types";
+import type { AsyncRes } from "sortug";
type Message = OpenAI.Chat.Completions.ChatCompletionMessageParam;
type Props = {
baseURL: string;
apiKey: string;
- model: string;
+ model?: string;
maxTokens?: number;
tokenizer?: (text: string) => number;
};
-export default class Conversation {
+export default class OpenAIAPI implements AIModelAPI {
private apiKey;
private baseURL;
- private maxTokens: number = MAX_TOKENS;
- private tokenizer: (text: string) => number = (text) => text.length / 3;
private api;
- private model;
+ maxTokens: number = MAX_TOKENS;
+ tokenizer: (text: string) => number = (text) => text.length / 3;
+ model;
constructor(props: Props) {
this.apiKey = props.apiKey;
this.baseURL = props.baseURL;
this.api = new OpenAI({ baseURL: this.baseURL, apiKey: this.apiKey });
- this.model = props.model;
+ this.model = props.model || "";
if (props.maxTokens) this.maxTokens = props.maxTokens;
if (props.tokenizer) this.tokenizer = props.tokenizer;
}
@@ -36,7 +37,7 @@ export default class Conversation {
});
}
- public async send(sys: string, input: ChatMessage[]): AResult<string[]> {
+ public async send(sys: string, input: ChatMessage[]): AsyncRes<string[]> {
const messages = this.mapMessages(input);
const sysMsg: Message = { role: "system", content: sys };
const allMessages = [sysMsg, ...messages];
@@ -44,12 +45,15 @@ export default class Conversation {
const truncated = this.truncateHistory(allMessages);
const res = await this.apiCall(truncated);
if ("error" in res) return res;
- else
+ else {
try {
- return { ok: res.ok.map((c) => c.message.content!) };
+ // TODO type this properly
+ const choices: OChoice[] = res.ok;
+ return { ok: choices.map((c) => c.message.content!) };
} catch (e) {
return { error: `${e}` };
}
+ }
}
public async stream(
@@ -77,7 +81,7 @@ export default class Conversation {
}
// TODO custom temperature?
- private async apiCall(messages: Message[]): AResult<OChoice[]> {
+ private async apiCall(messages: Message[]): AsyncRes<OChoice[]> {
console.log({ messages }, "at the very end");
try {
const completion = await this.api.chat.completions.create({
diff --git a/src/nlp/index.ts b/src/nlp/index.ts
new file mode 100644
index 0000000..ebed586
--- /dev/null
+++ b/src/nlp/index.ts
@@ -0,0 +1,7 @@
+import * as Spacy from "./spacy";
+import * as Stanza from "./stanza";
+import * as ISO from "./iso";
+import { ocr } from "./ocr";
+import type * as Types from "./types";
+export * from "./nlp";
+export { ISO, ocr, Stanza, Spacy, type Types };
diff --git a/src/nlp/iso.ts b/src/nlp/iso.ts
new file mode 100644
index 0000000..3e60850
--- /dev/null
+++ b/src/nlp/iso.ts
@@ -0,0 +1,10 @@
+import { franc, francAll } from "franc-all";
+import { iso6393To1 } from "iso-639-3";
+export { iso6393, iso6393To1, iso6393To2B, iso6393To2T } from "iso-639-3";
+export * as BCP47 from "bcp-47";
+
+export function detectLang(text: string) {
+ const iso3 = franc(text);
+ const iso1 = iso6393To1[iso3];
+ return iso1 ? iso1 : iso3;
+}
diff --git a/src/nlp/nlp.ts b/src/nlp/nlp.ts
new file mode 100644
index 0000000..3b1e3a7
--- /dev/null
+++ b/src/nlp/nlp.ts
@@ -0,0 +1,208 @@
+export const isPunctuation = (text: string): boolean => {
+ // Common punctuation characters
+ const punctuationRegex = /^[.,;:!?()[\]{}'"«»""''…-]+$/;
+ return punctuationRegex.test(text);
+};
+
+// Get color for different syntactic categories
+export function getColorForType(type: string): string {
+ const colors: Record<string, string> = {
+ // Phrasal categories
+ S: "#6495ED", // Sentence - cornflower blue
+ NP: "#FF7F50", // Noun Phrase - coral
+ VP: "#32CD32", // Verb Phrase - lime green
+ PP: "#9370DB", // Prepositional Phrase - medium purple
+ ADJP: "#FFD700", // Adjective Phrase - gold
+ ADVP: "#FF69B4", // Adverb Phrase - hot pink
+
+ // Part-of-speech tags
+ NN: "#FFA07A", // Noun - light salmon
+ NNS: "#FFA07A", // Plural Noun - light salmon
+ NNP: "#FFA07A", // Proper Noun - light salmon
+ VB: "#90EE90", // Verb - light green
+ VBP: "#90EE90", // Present tense verb - light green
+ VBG: "#90EE90", // Gerund verb - light green
+ VBZ: "#90EE90", // 3rd person singular present verb - light green
+ VBD: "#90EE90", // Past tense verb - light green
+ VBN: "#90EE90", // Past participle verb - light green
+ JJ: "#F0E68C", // Adjective - khaki
+ RB: "#DDA0DD", // Adverb - plum
+ IN: "#87CEFA", // Preposition - light sky blue
+ DT: "#D3D3D3", // Determiner - light gray
+ PRP: "#D8BFD8", // Personal pronoun - thistle
+ CC: "#A9A9A9", // Coordinating conjunction - dark gray
+
+ // Default
+ ROOT: "#000000", // Root - black
+ LEAF: "#666666", // Leaf nodes - dark gray
+ };
+
+ return colors[type] || "#666666";
+}
+
+// Get a description for node types
+export function getDescription(type: string): string {
+ const descriptions: Record<string, string> = {
+ S: "Sentence",
+ SBAR: "Subordinating conjunction clause",
+ SBARQ: "Direct question",
+ SINV: "Declarative sentence with subject-aux inversion",
+ SQ: "Subconstituent of SBARQ excluding wh-word",
+ WHADVP: "wh-adverb phrase",
+ WHNP: "wh-nounphrase",
+ WHPP: "wh-prepositional phrase",
+ WDT: "wh-determiner",
+ WP: "wh-pronoun",
+ WRB: "wh-adverb",
+ WP$: "possesive wh-pronoun",
+ MD: "modal",
+ X: "Unknown",
+ NP: "Noun Phrase",
+ VP: "Verb Phrase",
+ PP: "Prepositional Phrase",
+ ADJP: "Adjective Phrase",
+ ADVP: "Adverb Phrase",
+ LS: "List item market",
+ SYM: "Symbol",
+ NN: "Noun",
+ NNS: "Plural Noun",
+ NNP: "Proper Noun",
+ NNPS: "Proper Noun, Plural",
+ VB: "Verb (base form)",
+ VBP: "Verb (present tense)",
+ VBG: "Verb (gerund/present participle)",
+ VBZ: "Verb (3rd person singular present)",
+ VBD: "Verb (past tense)",
+ VBN: "Verb (past participle)",
+ JJ: "Adjective",
+ JJR: "Adjective, comparative",
+ JJS: "Adjective, superlative",
+ EX: "Existential there",
+ RB: "Adverb",
+ RBR: "Adverb, comparative",
+ RBS: "Adverb, superlative",
+ RP: "Particle",
+ IN: "Preposition",
+ TO: "to",
+ DT: "Determiner",
+ PDT: "Predeterminer",
+ PRP: "Personal Pronoun",
+ PP$: "Possesive Pronoun",
+ PRP$: "Possesive Pronoun",
+ POS: "Possesive ending",
+ FW: "Foreign Word",
+ CC: "Coordinating Conjunction",
+ CD: "Cardinal number",
+ UH: "interjection",
+ ROOT: "Root Node",
+ CLR: "figurative motion",
+ FRAG: "fragment",
+ ":": "Colon/Semicolon",
+ ",": "Comma",
+ ".": "Period",
+ };
+
+ return descriptions[type] || type;
+}
+
+// https://universaldependencies.org/u/dep/xcomp.htmlexport
+
+export function unpackDeprel(type: string): string {
+ const descriptions: Record<string, string> = {
+ nsubj: "nominal subject",
+ obj: "object",
+ iobj: "indirect object",
+ csubj: "clausal subject",
+ ccomp: "clausal complement",
+ xcomp: "open clausal complement",
+ obl: "oblique nominal",
+ vocative: "vocative",
+ expl: "expletive",
+ dislocated: "dislocated",
+ nmod: "nominal modifier",
+ appos: "appositional modifier",
+ nummod: "numeric modifier",
+ advcl: "adverbial clause modifier",
+ acl: "admonimal clause",
+ advmod: "adverbial modifier",
+ discourse: "dicourse element",
+ aux: "auxiliary",
+ cop: "copula",
+ mark: "marker",
+ amod: "adjectival modifier",
+ det: "determiner",
+ clf: "classifier",
+ case: "case marker",
+ conj: "conjunction",
+ cc: "coordinating conjunction",
+ fixed: "fixed multiword expression",
+ flat: "flat expression",
+ list: "list",
+ parataxis: "parataxis",
+ compound: "compound",
+ orphan: "orphan",
+ goeswith: "goes with",
+ reparandum: "overriden disfluency",
+ punct: "punctuation",
+ root: "root",
+ dep: "unspecified dependency",
+ };
+ const res = descriptions[type];
+ if (!res) console.log("tag not found!!", type);
+
+ return res || type;
+}
+
+export function deprelColors(type: string): string {
+ const colors: Record<string, string> = {
+ // Phrasal categories
+ s: "#6495ED", // Sentence - cornflower blue
+ nsubj: "#6495ED", // Sentence - cornflower blue
+ root: "#FFD700", // Adjective Phrase - gold
+ p: "#FFD700", // Adjective Phrase - gold
+ NP: "#FF7F50", // Noun Phrase - coral
+ VP: "#32CD32", // Verb Phrase - lime green
+ PP: "#9370DB", // Prepositional Phrase - medium purple
+ ADVP: "#FF69B4", // Adverb Phrase - hot pink
+
+ // Part-of-speech tags
+ NN: "#FFA07A", // Noun - light salmon
+ NNS: "#FFA07A", // Plural Noun - light salmon
+ NNP: "#FFA07A", // Proper Noun - light salmon
+ VB: "#90EE90", // Verb - light green
+ VBP: "#90EE90", // Present tense verb - light green
+ VBG: "#90EE90", // Gerund verb - light green
+ VBZ: "#90EE90", // 3rd person singular present verb - light green
+ VBD: "#90EE90", // Past tense verb - light green
+ VBN: "#90EE90", // Past participle verb - light green
+ JJ: "#F0E68C", // Adjective - khaki
+ RB: "#DDA0DD", // Adverb - plum
+ IN: "#87CEFA", // Preposition - light sky blue
+ DT: "#D3D3D3", // Determiner - light gray
+ PRP: "#D8BFD8", // Personal pronoun - thistle
+ CC: "#A9A9A9", // Coordinating conjunction - dark gray
+
+ // Default
+ ROOT: "#000000", // Root - black
+ LEAF: "#666666", // Leaf nodes - dark gray
+ };
+
+ return colors[type] || "#666666";
+}
+export function unpackPos(pos: string): string {
+ const map: Record<string, string> = {
+ adj: "adjective",
+ adv: "adverb",
+ adv_phrase: "adverbial phrase",
+ combining_form: "combining form",
+ conj: "conjunction",
+ det: "determinant",
+ intj: "interjection",
+ num: "number",
+ prep: "preposition",
+ prep_phrase: "prepositional phrase",
+ pron: "pronoun",
+ punct: "punctuation",
+ };
+ return map[pos] || pos;
+}
diff --git a/src/nlp/ocr.ts b/src/nlp/ocr.ts
new file mode 100644
index 0000000..1c40355
--- /dev/null
+++ b/src/nlp/ocr.ts
@@ -0,0 +1,18 @@
+import type { AsyncRes } from "sortug";
+
+export async function ocr(formData: FormData): AsyncRes<string[]> {
+ const endpoint = "http://localhost:8102/ocr";
+
+ const opts = {
+ method: "POST",
+ body: formData,
+ headers: { "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY! },
+ };
+ try {
+ const res = await fetch(endpoint, opts);
+ const j = await res.json();
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
diff --git a/src/nlp/spacy.ts b/src/nlp/spacy.ts
new file mode 100644
index 0000000..d79de55
--- /dev/null
+++ b/src/nlp/spacy.ts
@@ -0,0 +1,79 @@
+import type { AsyncRes, Result } from "sortug";
+import { detectLang } from "./iso";
+const ENDPOINT = "http://localhost:8102";
+
+export async function run(text: string, langg?: string): AsyncRes<SpacyRes> {
+ try {
+ const lang = langg ? langg : detectLang(text);
+ const body = JSON.stringify({ string: text, lang });
+ const opts = {
+ headers: {
+ "Content-type": "application/json",
+ "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
+ },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/spacy", opts);
+ const j = await res.json();
+ console.log("spacy", j);
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+
+export type SpacyResBig = {
+ doc: {
+ text: string;
+ ents: any[];
+ sents: Array<{ start: number; end: number }>;
+ tokens: Token[];
+ };
+ segs: Sentence[];
+};
+export type SpacyRes = {
+ input: string;
+ segments: Sentence[];
+};
+export type Sentence = {
+ text: string;
+ start: number;
+ end: number;
+ root: Token;
+ subj: Token;
+ arcs: Arc[];
+ words: Word[];
+};
+export type Arc = {
+ start: number;
+ end: number;
+ label: string; // deprel label
+ dir: string;
+};
+export type Token = {
+ id: number;
+ head: number;
+ start: number;
+ end: number;
+ dep: string;
+ lemma: string;
+ morph: string;
+ pos: string;
+ tag: string;
+ text: string;
+};
+
+export interface Word extends Token {
+ ancestors: number[];
+ children: [];
+ n_lefts: number;
+ n_rights: number;
+ left_edge: number;
+ right_edge: number;
+ morph_map: Record<string, string>;
+}
+
+export function isChild(w: Word, topId: number): boolean {
+ return w.id === topId || w.ancestors.includes(topId);
+}
diff --git a/src/nlp/stanza.ts b/src/nlp/stanza.ts
new file mode 100644
index 0000000..5836b91
--- /dev/null
+++ b/src/nlp/stanza.ts
@@ -0,0 +1,210 @@
+import type { AsyncRes, Result } from "sortug";
+import { detectLang } from "./iso";
+
+const ENDPOINT = "http://localhost:8102";
+export async function segmenter(
+ text: string,
+ langg?: string,
+): AsyncRes<StanzaRes> {
+ try {
+ const lang = langg ? langg : detectLang(text);
+ const body = JSON.stringify({ lang, string: text });
+ const opts = {
+ headers: {
+ "Content-type": "application/json",
+ "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
+ },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/stanza", opts);
+ const j = await res.json();
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+export async function idLang(text: string) {
+ try {
+ const body = JSON.stringify({ string: text });
+ const opts = {
+ headers: {
+ "Content-type": "application/json",
+ "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!,
+ },
+ method: "POST",
+ body,
+ };
+ const res = await fetch(ENDPOINT + "/detect-lang", opts);
+ const j = await res.json();
+ return { ok: j };
+ } catch (e) {
+ return { error: `${e}` };
+ }
+}
+export type StanzaRes = { input: string; segments: Sentence[] };
+export type Sentence = {
+ text: string;
+ sentiment: number;
+ constituency: TreeNode;
+ constring: string;
+ dependencies: Dependency[];
+ entities: Entity[];
+ tokens: Token[];
+ words: Word[];
+};
+export type TreeNode = {
+ label: string;
+ children: TreeNode[];
+};
+export type Dependency = Array<[Word, string, Word]>;
+export type Word = {
+ id: number;
+ text: string;
+ lemma: string;
+ upos: string;
+ xpos: string;
+ feats: string;
+ head: number;
+ deprel: string;
+ start_char: number;
+ end_char: number;
+};
+export type Token = {
+ id: [number, number];
+ text: string;
+ misc: string;
+ words: Word[];
+ start_char: number;
+ end_char: number;
+ ner: string;
+};
+export type Entity = {
+ text: string;
+ misc: string;
+ start_char: number;
+ end_char: number;
+ type: string;
+};
+
+// mine
+export type Clause = {
+ words: Word[];
+ dependency: Dependency;
+ text: string;
+};
+// "amod",
+// {
+// "id": 1,
+// "text": "Stony",
+// "lemma": "Stony",
+// "upos": "ADJ",
+// "xpos": "NNP",
+// "feats": "Degree=Pos",
+// "head": 3,
+// "deprel": "amod",
+// "start_char": 0,
+// "end_char": 5
+// }
+//
+//
+
+export interface ParsedGrammar {
+ predicateCore: number;
+ subjectCore: number | null;
+ tree: Record<number, number[]>;
+ wordMap: WordMap;
+ words: BigWord[];
+}
+export interface BigWord extends Word {
+ ancestry: number[];
+ component: "s" | "p" | "u";
+}
+export type ComputedDependency = {
+ word: BigWord;
+ children: ComputedDependency[];
+};
+export type WordMap = Record<number, Word>;
+
+export function buildTreeFromWords(words: Word[]): Result<ParsedGrammar> {
+ const roots = words.filter((w) => w.deprel === "root");
+ if (roots.length > 1) {
+ console.log("roots", roots);
+ return { error: "too many roots" };
+ } else if (roots.length === 0) {
+ return { error: "no roots" };
+ } else {
+ const root = roots[0];
+ const wordmap = words.reduce((acc: WordMap, item) => {
+ acc[item.id] = item;
+ return acc;
+ }, {});
+ return { ok: parseFurther(words, wordmap, root) };
+ }
+}
+function parseFurther(
+ words: Word[],
+ wordMap: WordMap,
+ root: Word,
+): ParsedGrammar {
+ const predicateCore = root.id;
+ let subjectCore: number | null = null;
+ const tree: Record<number, number[]> = {};
+ const bigwords: BigWord[] = [];
+ const getAncestry = (parent: Word): number[] => {
+ const kids = tree[parent.head] || [];
+ tree[parent.head] = [...kids, parent.id];
+ if (parent.deprel === "nsubj") subjectCore = parent.id;
+
+ console.log("getting ancestry " + parent.id, parent.text);
+ const grandpa = wordMap[parent.head];
+ if (!grandpa) return [parent.id];
+ else return [parent.id, ...getAncestry(grandpa)];
+ };
+ let idx = 0;
+ for (const w of words) {
+ if (w.deprel === "punct") {
+ const prev = words[idx - 1];
+ if (!prev) continue;
+ prev.text += w.text;
+ continue;
+ }
+ const parent = wordMap[w.head];
+ if (!parent) tree[w.id] = [];
+ const ancestry = !parent ? [] : getAncestry(parent);
+ const component =
+ subjectCore && (w.id === subjectCore || ancestry.includes(subjectCore))
+ ? "s"
+ : w.id === predicateCore || ancestry.includes(root.id)
+ ? "p"
+ : "u";
+ const bw: BigWord = { ...w, component, ancestry };
+ wordMap[w.id] = bw;
+ bigwords.push(bw);
+ idx++;
+ }
+ const pg: ParsedGrammar = {
+ predicateCore,
+ subjectCore,
+ wordMap,
+ tree,
+ words: bigwords,
+ };
+ return pg;
+}
+
+export function oneDescendant(node: TreeNode): boolean {
+ if (node.children.length !== 1) return false;
+ else {
+ const child = node.children[0];
+ return child.children.length === 0;
+ }
+}
+
+// function findChildren(wordmap: WordMap, word: Word): ComputedDependency {
+// const children = words.filter((w) => w.head === head.id);
+// return {
+// word: head,
+// children: children.map((c) => findChildren(words, c)),
+// };
+// }
diff --git a/src/nlp/types.ts b/src/nlp/types.ts
new file mode 100644
index 0000000..605a637
--- /dev/null
+++ b/src/nlp/types.ts
@@ -0,0 +1,50 @@
+export type ViewLevel =
+ | "text"
+ | "paragraph"
+ | "sentence"
+ | "clause"
+ | "word"
+ | "syllable"
+ | "phoneme";
+export interface ViewState {
+ level: ViewLevel;
+ pIndex: number | null;
+ sIndex: number | null;
+ cIndex: number | null;
+ wIndex: number | null;
+ yIndex: number | null;
+ fIndex: number | null;
+}
+
+export interface ViewProps {
+ idx: number;
+ rawText: string;
+ context: Context;
+}
+export type Context = {
+ parentText: string;
+ segmented: string[];
+ idx: number;
+};
+
+export type WordData = {
+ confidence: number;
+ frequency: number | null;
+ id: number;
+ ipa: Array<{ ipa: string; tags: string[] }>;
+ spelling: string;
+ type: ExpressionType;
+ syllables: number;
+ lang: string;
+ prosody: any;
+ senses: Sense[];
+};
+export type ExpressionType = "word" | "expression" | "syllable";
+export type Sense = {
+ etymology: string;
+ pos: string;
+ forms: Array<{ form: string; tags: string[] }>;
+ related: any;
+ senses: Array<{ glosses: string[]; links: Array<[string, string]> }>;
+};
+export type LoadingStatus = "pending" | "loading" | "success" | "error";
diff --git a/src/openai.ts b/src/openai.ts
index 2e15dcf..12939bc 100644
--- a/src/openai.ts
+++ b/src/openai.ts
@@ -1,14 +1,8 @@
import fs from "fs";
import OpenAI from "openai";
import { RESPONSE_LENGTH } from "./logic/constants";
-import type {
- AResult,
- ChatMessage,
- OChoice,
- OChunk,
- OMessage,
- Result,
-} from "./types";
+import type { ChatMessage, OChoice, OChunk, OMessage } from "./types";
+import type { AsyncRes, Result } from "sortug";
import OpenAIToolUse from "./openai_tools";
import type { FileObject } from "openai/src/resources/files.js";
@@ -26,7 +20,7 @@ export default class Conversation {
private baseURL: string = "https://api.openai.com/v1";
private tokenizer: (text: string) => number = (text) => text.length / 3;
openai;
- private model: string = "chatgpt-4o-latest";
+ private model: string = "gpt-4.1";
constructor(props: Props) {
if (props.apiKey) this.apiKey = props.apiKey;
@@ -56,7 +50,7 @@ export default class Conversation {
}, []);
}
- public async send(sys: string, input: ChatMessage[]): AResult<OChoice[]> {
+ public async send(sys: string, input: ChatMessage[]): AsyncRes<OChoice[]> {
const messages = this.mapMessages(input);
const sysMsg: Message = { role: "system", content: sys };
const allMessages = [sysMsg, ...messages];
@@ -65,7 +59,7 @@ export default class Conversation {
return res;
}
- public async sendR1(input: ChatMessage[]): AResult<OChoice[]> {
+ public async sendR1(input: ChatMessage[]): AsyncRes<OChoice[]> {
const messages = this.mapMessagesR1(input);
const truncated = this.truncateHistory(messages);
const res = await this.apiCall(truncated);
@@ -102,7 +96,7 @@ export default class Conversation {
return messages;
}
- private async apiCall(messages: Message[]): AResult<OChoice[]> {
+ private async apiCall(messages: Message[]): AsyncRes<OChoice[]> {
try {
const completion = await this.openai.chat.completions.create({
temperature: 1.3,
diff --git a/src/types/index.ts b/src/types/index.ts
index 97be443..b276457 100644
--- a/src/types/index.ts
+++ b/src/types/index.ts
@@ -1,15 +1,29 @@
import type OpenAI from "openai";
+import type { AsyncRes } from "sortug";
export type ChatMessage = {
author: string;
text: string;
sent: number;
reasoning?: string;
};
-export type Result<T> = { ok: T } | { error: string };
-export type AResult<T> = Promise<{ ok: T } | { error: string }>;
// openai
export type OChoice = OpenAI.Chat.Completions.ChatCompletion.Choice;
export type OChunk = OpenAI.Chat.Completions.ChatCompletionChunk.Choice;
export type OMessage = OpenAI.Chat.Completions.ChatCompletionMessageParam;
export type ContentType = { text: string } | { audio: Response };
+export type AIModelChoice =
+ | { name: "deepseek" | "chatgpt" | "claude" | "gemini" | "grok" }
+ | { other: { baseURL: string; apiKey: string } };
+export interface AIModelAPI {
+ setModel: (model: string) => void;
+ tokenizer: (text: string) => number;
+ maxTokens: number;
+
+ send: (systemPrompt: string, input: ChatMessage[]) => AsyncRes<string[]>;
+ stream: (
+ systemPrompt: string,
+ input: ChatMessage[],
+ handler: (data: any) => void,
+ ) => void;
+}