diff options
| author | polwex <polwex@sortug.com> | 2025-07-23 02:37:15 +0700 |
|---|---|---|
| committer | polwex <polwex@sortug.com> | 2025-07-23 02:37:15 +0700 |
| commit | 42dd99bfac9777a4ecc6700b87edf26a5c984de6 (patch) | |
| tree | 031e45d187f45def4b58ad7590d39dec3924600d | |
| parent | 4c6913644b362b28f15b125c2fbe48165f1e048c (diff) | |
checkpoint
| -rw-r--r-- | bun.lock | 83 | ||||
| -rw-r--r-- | debug.ts | 30 | ||||
| -rw-r--r-- | index.ts | 47 | ||||
| -rw-r--r-- | package.json | 13 | ||||
| -rw-r--r-- | src/claude.ts | 22 | ||||
| -rw-r--r-- | src/gemini.ts | 207 | ||||
| -rw-r--r-- | src/gemini2.ts | 149 | ||||
| -rw-r--r-- | src/generic.ts (renamed from src/model.ts) | 26 | ||||
| -rw-r--r-- | src/nlp/index.ts | 7 | ||||
| -rw-r--r-- | src/nlp/iso.ts | 10 | ||||
| -rw-r--r-- | src/nlp/nlp.ts | 208 | ||||
| -rw-r--r-- | src/nlp/ocr.ts | 18 | ||||
| -rw-r--r-- | src/nlp/spacy.ts | 79 | ||||
| -rw-r--r-- | src/nlp/stanza.ts | 210 | ||||
| -rw-r--r-- | src/nlp/types.ts | 50 | ||||
| -rw-r--r-- | src/openai.ts | 18 | ||||
| -rw-r--r-- | src/types/index.ts | 18 |
17 files changed, 1038 insertions, 157 deletions
@@ -5,23 +5,30 @@ "name": "models", "dependencies": { "@anthropic-ai/sdk": "^0.36.3", + "@google/genai": "^0.13.0", "@google/generative-ai": "^0.21.0", + "bcp-47": "^2.1.0", + "franc-all": "^7.2.0", "groq-sdk": "^0.15.0", + "iso-639-3": "^3.0.1", "openai": "^4.84.0", "playht": "^0.16.0", "replicate": "^1.0.1", + "sortug": "file://home/y/code/npm/sortug", }, "devDependencies": { - "@types/bun": "latest", + "@types/bun": "^1.2.12", }, "peerDependencies": { - "typescript": "^5.0.0", + "typescript": "^5.7.3", }, }, }, "packages": { "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.36.3", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-+c0mMLxL/17yFZ4P5+U6bTWiCSFZUKJddrv01ud2aFBWnTPLdRncYV76D3q1tqfnL7aCnhRtykFnoCFzvr4U3Q=="], + "@google/genai": ["@google/genai@0.13.0", "", { "dependencies": { "google-auth-library": "^9.14.2", "ws": "^8.18.0", "zod": "^3.22.4", "zod-to-json-schema": "^3.22.4" } }, "sha512-eaEncWt875H7046T04mOpxpHJUM+jLIljEf+5QctRyOeChylE/nhpwm1bZWTRWoOu/t46R9r+PmgsJFhTpE7tQ=="], + "@google/generative-ai": ["@google/generative-ai@0.21.0", "", {}, "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg=="], "@grpc/grpc-js": ["@grpc/grpc-js@1.12.6", "", { "dependencies": { "@grpc/proto-loader": "^0.7.13", "@js-sdsl/ordered-map": "^4.4.2" } }, "sha512-JXUj6PI0oqqzTGvKtzOkxtpsyPRNsrmhh41TtIz/zEB6J+AUiZZ0dxWzcMwO9Ns5rmSPuMdghlTbUuqIM48d3Q=="], @@ -52,7 +59,7 @@ "@tokenizer/token": ["@tokenizer/token@0.3.0", "", {}, "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="], - "@types/bun": ["@types/bun@1.2.2", "", { "dependencies": { "bun-types": "1.2.2" } }, "sha512-tr74gdku+AEDN5ergNiBnplr7hpDp3V1h7fqI2GcR/rsUaM39jpSeKH0TFibRvU0KwniRx5POgaYnaXbk0hU+w=="], + "@types/bun": ["@types/bun@1.2.12", "", { "dependencies": { "bun-types": "1.2.12" } }, "sha512-lY/GQTXDGsolT/TiH72p1tuyUORuRrdV7VwOTOjDOt8uTBJQOJc5zz3ufwwDl0VBaoxotSk4LdP0hhjLJ6ypIQ=="], "@types/node": ["@types/node@18.19.75", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-UIksWtThob6ZVSyxcOqCLOUNg/dyO1Qvx4McgeuhrEtHTLFTf7BBhEazaE4K806FGTPtzd/2sE90qn4fVr7cyw=="], @@ -62,6 +69,8 @@ "abort-controller": ["abort-controller@3.0.0", "", { "dependencies": { "event-target-shim": "^5.0.0" } }, "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg=="], + "agent-base": ["agent-base@7.1.3", "", {}, "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw=="], + "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="], "ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], @@ -74,12 +83,20 @@ "base64-js": ["base64-js@1.5.1", "", {}, "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="], + "bcp-47": ["bcp-47@2.1.0", "", { "dependencies": { "is-alphabetical": "^2.0.0", "is-alphanumerical": "^2.0.0", "is-decimal": "^2.0.0" } }, "sha512-9IIS3UPrvIa1Ej+lVDdDwO7zLehjqsaByECw0bu2RRGP73jALm6FYbzI5gWbgHLvNdkvfXB5YrSbocZdOS0c0w=="], + + "bignumber.js": ["bignumber.js@9.3.0", "", {}, "sha512-EM7aMFTXbptt/wZdMlBv2t8IViwQL+h6SLHosp8Yf0dqJMTnY6iL32opnAB6kAdL0SZPuvcAzFr31o0c/R3/RA=="], + "buffer": ["buffer@6.0.3", "", { "dependencies": { "base64-js": "^1.3.1", "ieee754": "^1.2.1" } }, "sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA=="], - "bun-types": ["bun-types@1.2.2", "", { "dependencies": { "@types/node": "*", "@types/ws": "~8.5.10" } }, "sha512-RCbMH5elr9gjgDGDhkTTugA21XtJAy/9jkKe/G3WR2q17VPGhcquf9Sir6uay9iW+7P/BV0CAHA1XlHXMAVKHg=="], + "buffer-equal-constant-time": ["buffer-equal-constant-time@1.0.1", "", {}, "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="], + + "bun-types": ["bun-types@1.2.12", "", { "dependencies": { "@types/node": "*" } }, "sha512-tvWMx5vPqbRXgE8WUZI94iS1xAYs8bkqESR9cxBB1Wi+urvfTrF1uzuDgBHFAdO0+d2lmsbG3HmeKMvUyj6pWA=="], "cliui": ["cliui@8.0.1", "", { "dependencies": { "string-width": "^4.2.0", "strip-ansi": "^6.0.1", "wrap-ansi": "^7.0.0" } }, "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ=="], + "collapse-white-space": ["collapse-white-space@2.1.0", "", {}, "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw=="], + "color-convert": ["color-convert@2.0.1", "", { "dependencies": { "color-name": "~1.1.4" } }, "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ=="], "color-name": ["color-name@1.1.4", "", {}, "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA=="], @@ -88,8 +105,12 @@ "cross-fetch": ["cross-fetch@4.1.0", "", { "dependencies": { "node-fetch": "^2.7.0" } }, "sha512-uKm5PU+MHTootlWEY+mZ4vvXoCn4fLQxT9dSc1sXVMSFkINTJVN8cAQROpwcKm8bJ/c7rgZVIBWzH5T78sNZZw=="], + "debug": ["debug@4.4.0", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA=="], + "delayed-stream": ["delayed-stream@1.0.0", "", {}, "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="], + "ecdsa-sig-formatter": ["ecdsa-sig-formatter@1.0.11", "", { "dependencies": { "safe-buffer": "^5.0.1" } }, "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ=="], + "emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], "escalade": ["escalade@3.2.0", "", {}, "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA=="], @@ -98,6 +119,8 @@ "events": ["events@3.3.0", "", {}, "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q=="], + "extend": ["extend@3.0.2", "", {}, "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="], + "file-type": ["file-type@18.7.0", "", { "dependencies": { "readable-web-to-node-stream": "^3.0.2", "strtok3": "^7.0.0", "token-types": "^5.0.1" } }, "sha512-ihHtXRzXEziMrQ56VSgU7wkxh55iNchFkosu7Y9/S+tXHdKyrGjVK0ujbqNnsxzea+78MaLhN6PGmfYSAv1ACw=="], "follow-redirects": ["follow-redirects@1.15.9", "", {}, "sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ=="], @@ -108,16 +131,46 @@ "formdata-node": ["formdata-node@4.4.1", "", { "dependencies": { "node-domexception": "1.0.0", "web-streams-polyfill": "4.0.0-beta.3" } }, "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ=="], + "franc-all": ["franc-all@7.2.0", "", { "dependencies": { "trigram-utils": "^2.0.0" } }, "sha512-ZR6ciLQTDBaOvBdkOd8+vqDzaLtmIXRa9GCzcAlaBpqNAKg9QrtClPmqiKac5/xZXfCZGMo1d8dIu1T0BLhHEg=="], + + "gaxios": ["gaxios@6.7.1", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "is-stream": "^2.0.0", "node-fetch": "^2.6.9", "uuid": "^9.0.1" } }, "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ=="], + + "gcp-metadata": ["gcp-metadata@6.1.1", "", { "dependencies": { "gaxios": "^6.1.1", "google-logging-utils": "^0.0.2", "json-bigint": "^1.0.0" } }, "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A=="], + "get-caller-file": ["get-caller-file@2.0.5", "", {}, "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg=="], + "google-auth-library": ["google-auth-library@9.15.1", "", { "dependencies": { "base64-js": "^1.3.0", "ecdsa-sig-formatter": "^1.0.11", "gaxios": "^6.1.1", "gcp-metadata": "^6.1.0", "gtoken": "^7.0.0", "jws": "^4.0.0" } }, "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng=="], + + "google-logging-utils": ["google-logging-utils@0.0.2", "", {}, "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ=="], + "groq-sdk": ["groq-sdk@0.15.0", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-aYDEdr4qczx3cLCRRe+Beb37I7g/9bD5kHF+EEDxcrREWw1vKoRcfP3vHEkJB7Ud/8oOuF0scRwDpwWostTWuQ=="], + "gtoken": ["gtoken@7.1.0", "", { "dependencies": { "gaxios": "^6.0.0", "jws": "^4.0.0" } }, "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw=="], + + "https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="], + "humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="], "ieee754": ["ieee754@1.2.1", "", {}, "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA=="], + "is-alphabetical": ["is-alphabetical@2.0.1", "", {}, "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ=="], + + "is-alphanumerical": ["is-alphanumerical@2.0.1", "", { "dependencies": { "is-alphabetical": "^2.0.0", "is-decimal": "^2.0.0" } }, "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw=="], + + "is-decimal": ["is-decimal@2.0.1", "", {}, "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A=="], + "is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="], + "is-stream": ["is-stream@2.0.1", "", {}, "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg=="], + + "iso-639-3": ["iso-639-3@3.0.1", "", {}, "sha512-SdljCYXOexv/JmbQ0tvigHN43yECoscVpe2y2hlEqy/CStXQlroPhZLj7zKLRiGqLJfw8k7B973UAMDoQczVgQ=="], + + "json-bigint": ["json-bigint@1.0.0", "", { "dependencies": { "bignumber.js": "^9.0.0" } }, "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ=="], + + "jwa": ["jwa@2.0.1", "", { "dependencies": { "buffer-equal-constant-time": "^1.0.1", "ecdsa-sig-formatter": "1.0.11", "safe-buffer": "^5.0.1" } }, "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg=="], + + "jws": ["jws@4.0.0", "", { "dependencies": { "jwa": "^2.0.0", "safe-buffer": "^5.0.1" } }, "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg=="], + "lodash.camelcase": ["lodash.camelcase@4.3.0", "", {}, "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA=="], "long": ["long@5.3.0", "", {}, "sha512-5vvY5yF1zF/kXk+L94FRiTDa1Znom46UjPCH6/XbSvS8zBKMFBHTJk8KDMqJ+2J6QezQFi7k1k8v21ClJYHPaw=="], @@ -128,6 +181,8 @@ "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], + "n-gram": ["n-gram@2.0.2", "", {}, "sha512-S24aGsn+HLBxUGVAUFOwGpKs7LBcG4RudKU//eWzt/mQ97/NMKQxDWHyHx63UNWk/OOdihgmzoETn1tf5nQDzQ=="], + "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="], "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="], @@ -154,6 +209,8 @@ "safe-buffer": ["safe-buffer@5.2.1", "", {}, "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ=="], + "sortug": ["sortug@file:../../../npm/sortug", { "devDependencies": { "@types/bun": "latest" }, "peerDependencies": { "typescript": "^5" } }], + "string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="], "string_decoder": ["string_decoder@1.3.0", "", { "dependencies": { "safe-buffer": "~5.2.0" } }, "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA=="], @@ -166,12 +223,16 @@ "tr46": ["tr46@0.0.3", "", {}, "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="], + "trigram-utils": ["trigram-utils@2.0.1", "", { "dependencies": { "collapse-white-space": "^2.0.0", "n-gram": "^2.0.0" } }, "sha512-nfWIXHEaB+HdyslAfMxSqWKDdmqY9I32jS7GnqpdWQnLH89r6A5sdk3fDVYqGAZ0CrT8ovAFSAo6HRiWcWNIGQ=="], + "tslib": ["tslib@2.8.1", "", {}, "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w=="], "typescript": ["typescript@5.7.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw=="], "undici-types": ["undici-types@5.26.5", "", {}, "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="], + "uuid": ["uuid@9.0.1", "", { "bin": { "uuid": "dist/bin/uuid" } }, "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA=="], + "web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="], "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="], @@ -180,12 +241,18 @@ "wrap-ansi": ["wrap-ansi@7.0.0", "", { "dependencies": { "ansi-styles": "^4.0.0", "string-width": "^4.1.0", "strip-ansi": "^6.0.0" } }, "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q=="], + "ws": ["ws@8.18.2", "", { "peerDependencies": { "bufferutil": "^4.0.1", "utf-8-validate": ">=5.0.2" }, "optionalPeers": ["bufferutil", "utf-8-validate"] }, "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ=="], + "y18n": ["y18n@5.0.8", "", {}, "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA=="], "yargs": ["yargs@17.7.2", "", { "dependencies": { "cliui": "^8.0.1", "escalade": "^3.1.1", "get-caller-file": "^2.0.5", "require-directory": "^2.1.1", "string-width": "^4.2.3", "y18n": "^5.0.5", "yargs-parser": "^21.1.1" } }, "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w=="], "yargs-parser": ["yargs-parser@21.1.1", "", {}, "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw=="], + "zod": ["zod@3.24.4", "", {}, "sha512-OdqJE9UDRPwWsrHjLN2F8bPxvwJBK22EHLWtanu0LSYr5YqzsaaW3RMgmjwr8Rypg5k+meEJdSPXJZXE/yqOMg=="], + + "zod-to-json-schema": ["zod-to-json-schema@3.24.5", "", { "peerDependencies": { "zod": "^3.24.1" } }, "sha512-/AuWwMP+YqiPbsJx5D6TfgRTc4kTLjsh5SOcd4bLsfUg2RcEXrFMJl1DGgdHy2aCfsIA/cr/1JM0xcB2GZji8g=="], + "@types/node-fetch/@types/node": ["@types/node@22.13.1", "", { "dependencies": { "undici-types": "~6.20.0" } }, "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew=="], "@types/ws/@types/node": ["@types/node@22.13.1", "", { "dependencies": { "undici-types": "~6.20.0" } }, "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew=="], @@ -194,6 +261,8 @@ "protobufjs/@types/node": ["@types/node@22.13.1", "", { "dependencies": { "undici-types": "~6.20.0" } }, "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew=="], + "sortug/@types/bun": ["@types/bun@1.2.4", "", { "dependencies": { "bun-types": "1.2.4" } }, "sha512-QtuV5OMR8/rdKJs213iwXDpfVvnskPXY/S0ZiFbsTjQZycuqPbMW8Gf/XhLfwE5njW8sxI2WjISURXPlHypMFA=="], + "@types/node-fetch/@types/node/undici-types": ["undici-types@6.20.0", "", {}, "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="], "@types/ws/@types/node/undici-types": ["undici-types@6.20.0", "", {}, "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="], @@ -201,5 +270,11 @@ "bun-types/@types/node/undici-types": ["undici-types@6.20.0", "", {}, "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="], "protobufjs/@types/node/undici-types": ["undici-types@6.20.0", "", {}, "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="], + + "sortug/@types/bun/bun-types": ["bun-types@1.2.4", "", { "dependencies": { "@types/node": "*", "@types/ws": "~8.5.10" } }, "sha512-nDPymR207ZZEoWD4AavvEaa/KZe/qlrbMSchqpQwovPZCKc7pwMoENjEtHgMKaAjJhy+x6vfqSBA1QU3bJgs0Q=="], + + "sortug/@types/bun/bun-types/@types/node": ["@types/node@22.13.1", "", { "dependencies": { "undici-types": "~6.20.0" } }, "sha512-jK8uzQlrvXqEU91UxiK5J7pKHyzgnI1Qnl0QDHIgVGuolJhRb9EEl28Cj9b3rGR8B2lhFCtvIm5os8lFnO/1Ew=="], + + "sortug/@types/bun/bun-types/@types/node/undici-types": ["undici-types@6.20.0", "", {}, "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg=="], } } diff --git a/debug.ts b/debug.ts new file mode 100644 index 0000000..1b1312d --- /dev/null +++ b/debug.ts @@ -0,0 +1,30 @@ +import OpenAI from "openai"; +import Claude from "@anthropic-ai/sdk"; +import { GoogleGenAI } from "@google/genai"; +async function oai() { + const openai = new OpenAI(); + + const list = await openai.models.list(); + for await (const model of list) { + console.log({ model }); + } +} + +async function cld() { + const claude = new Claude(); + const list = await claude.models.list(); + for await (const model of list) { + console.log({ model }); + } +} + +async function gem() { + const gemini = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! }); + const list = await gemini.models.list(); + for await (const model of list) { + console.log({ model }); + } +} +// oai(); +// cld(); +// gem(); @@ -1,13 +1,36 @@ -import openai from "./src/openai"; -import claude from "./src/claude"; -import gemini from "./src/gemini"; -import generic from "./src/model"; -import type { ChatMessage } from "./src/types"; +// import Openai from "./src/openai"; +import Claude from "./src/claude"; +import Gemini from "./src/gemini"; +import Generic from "./src/generic"; +import type { AIModelAPI, AIModelChoice } from "./src/types"; -export { - openai as OpenAI, - claude as Claude, - gemini as Gemini, - generic as Model, -}; -export type { ChatMessage }; +export type * from "./src/types"; +export * as NLP from "./src/nlp"; + +export default function (choice: AIModelChoice): AIModelAPI { + const api = + "other" in choice + ? new Generic(choice.other) + : choice.name === "deepseek" + ? new Generic({ + baseURL: "https://api.deepseek.com", + apiKey: Bun.env.DEEPSEEK_API_KEY!, + model: "deepseek-chat", + }) + : choice.name === "grok" + ? new Generic({ + baseURL: "https://api.x.ai/v1", + apiKey: Bun.env.GROK_API_KEY!, + model: "grok-2-latest", + }) + : choice.name === "chatgpt" + ? new Generic({ + baseURL: "https://api.openai.com/v1", + apiKey: Bun.env.OPENAI_API_KEY!, + model: "gpt-4o", + }) + : choice.name === "claude" + ? new Claude() + : new Gemini(); + return api; +} diff --git a/package.json b/package.json index fa8ad67..53e21ee 100644 --- a/package.json +++ b/package.json @@ -3,17 +3,22 @@ "module": "index.ts", "type": "module", "devDependencies": { - "@types/bun": "latest" + "@types/bun": "^1.2.12" }, "peerDependencies": { - "typescript": "^5.0.0" + "typescript": "^5.7.3" }, "dependencies": { "@anthropic-ai/sdk": "^0.36.3", + "@google/genai": "^0.13.0", "@google/generative-ai": "^0.21.0", + "bcp-47": "^2.1.0", + "franc-all": "^7.2.0", "groq-sdk": "^0.15.0", + "iso-639-3": "^3.0.1", "openai": "^4.84.0", "playht": "^0.16.0", - "replicate": "^1.0.1" + "replicate": "^1.0.1", + "sortug": "file://home/y/code/npm/sortug" } -}
\ No newline at end of file +} diff --git a/src/claude.ts b/src/claude.ts index 377316e..2a56bc1 100644 --- a/src/claude.ts +++ b/src/claude.ts @@ -1,20 +1,30 @@ import Claude from "@anthropic-ai/sdk"; import { RESPONSE_LENGTH } from "./logic/constants"; -import type { AResult, ChatMessage, OChoice, OChunk, OMessage } from "./types"; +import type { + AIModelAPI, + ChatMessage, + OChoice, + OChunk, + OMessage, +} from "./types"; import { BOOKWORM_SYS } from "./prompts"; +import type { AsyncRes } from "sortug"; type Message = Claude.Messages.MessageParam; -export default class Conversation { - private tokenizer: (text: string) => number; - private maxTokens: number; - model: string = "claude-3-5-sonnet-20241022"; +export default class ClaudeAPI implements AIModelAPI { + private model: string = "claude-3-7-sonnet-20250219"; + tokenizer: (text: string) => number; + maxTokens: number; + // model: string = "claude-3-5-sonnet-20241022"; constructor( maxTokens = 200_000, tokenizer: (text: string) => number = (text) => text.length / 3, + model?: string, ) { this.maxTokens = maxTokens; this.tokenizer = tokenizer; + if (model) this.model = model; } public setModel(model: string) { this.model = model; @@ -101,7 +111,7 @@ export default class Conversation { system: string, messages: Message[], isR1: boolean = false, - ): Promise<AResult<string[]>> { + ): Promise<AsyncRes<string[]>> { try { const claud = new Claude(); // const list = await claud.models.list(); diff --git a/src/gemini.ts b/src/gemini.ts index 2f685a2..3e636c2 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -1,137 +1,132 @@ import { - GenerativeModel, - GoogleGenerativeAI, + Chat, + GoogleGenAI, type Content, - type GenerateContentResult, -} from "@google/generative-ai"; + type GeneratedImage, + type GeneratedVideo, +} from "@google/genai"; import { RESPONSE_LENGTH } from "./logic/constants"; -import type { AResult, ChatMessage, OChoice, OChunk, OMessage } from "./types"; +import type { + AIModelAPI, + ChatMessage, + OChoice, + OChunk, + OMessage, +} from "./types"; +import type { AsyncRes } from "sortug"; -export default class Conversation { - private tokenizer: (text: string) => number; - private maxTokens: number; - private model: GenerativeModel; +export default class GeminiAPI { + tokenizer: (text: string) => number; + maxTokens: number; + private model: string; + api: GoogleGenAI; + chats: Map<string, Chat> = new Map<string, Chat>(); constructor( maxTokens = 200_000, tokenizer: (text: string) => number = (text) => text.length / 3, + model?: string, ) { this.maxTokens = maxTokens; this.tokenizer = tokenizer; - const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!); - this.model = gem.getGenerativeModel({ - model: "gemini-2.0-flash-exp", - generationConfig: { maxOutputTokens: RESPONSE_LENGTH }, - }); + const gem = new GoogleGenAI({ apiKey: Bun.env["GEMINI_API_KEY"]! }); + this.api = gem; + this.model = model || "gemini-2.5-pro-preview-05-06 "; } - public setModel(model: string) { - const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!); - this.model = gem.getGenerativeModel({ - model, - generationConfig: { maxOutputTokens: RESPONSE_LENGTH }, - }); + createChat({ name, history }: { name?: string; history?: Content[] }) { + const chat = this.api.chats.create({ model: this.model, history }); + this.chats.set(name ? name : Date.now().toString(), chat); } - private mapMessages(input: ChatMessage[]): Content[] { - return input.map((m) => ({ - role: m.author === "gemini" ? "model" : "user", - parts: [{ text: m.text }], - })); + async followChat(name: string, message: string): AsyncRes<string> { + const chat = this.chats.get(name); + if (!chat) return { error: "no chat with that name" }; + else { + const response = await chat.sendMessage({ message }); + const text = response.text; + return { ok: text || "" }; + } } - - private mapMessagesR1(input: ChatMessage[]): Content[] { - return input.reduce((acc: Content[], m, i) => { - const prev = acc[i - 1]; - const role = m.author === "gemini" ? "model" : "user"; - const msg = { role, parts: [{ text: m.text }] }; - if (prev?.role === role) acc[i - 1] = msg; - else acc = [...acc, msg]; - return acc; - }, []); + async followChatStream( + name: string, + message: string, + handler: (data: string) => void, + ) { + const chat = this.chats.get(name); + if (!chat) throw new Error("no chat!"); + else { + const response = await chat.sendMessageStream({ message }); + for await (const chunk of response) { + const text = chunk.text; + handler(text || ""); + } + } } - private async apiCall( - messages: Content[], - isR1: boolean = false, - ): Promise<AResult<string[]>> { + async send(message: string, systemPrompt?: string): AsyncRes<string> { try { - const chat = this.model.startChat({ history: messages }); - const res = await chat.sendMessage(""); - return { ok: [res.response.text()] }; + const opts = { + model: this.model, + contents: message, + }; + const fopts = systemPrompt + ? { ...opts, config: { systemInstruction: systemPrompt } } + : opts; + const response = await this.api.models.generateContent(fopts); + return { ok: response.text || "" }; } catch (e) { - console.log(e, "error in gemini api"); return { error: `${e}` }; } } + async sendStream( + handler: (s: string) => void, + message: string, + systemPrompt?: string, + ) { + const opts = { + model: this.model, + contents: message, + }; + const fopts = systemPrompt + ? { ...opts, config: { systemInstruction: systemPrompt } } + : opts; + const response = await this.api.models.generateContentStream(fopts); + for await (const chunk of response) { + handler(chunk.text || ""); + } + } - private async apiCallStream( - messages: Content[], - handle: (c: any) => void, - isR1: boolean = false, - ): Promise<void> { + async makeImage(prompt: string): AsyncRes<GeneratedImage[]> { try { - const chat = this.model.startChat({ history: messages }); - const res = await chat.sendMessage(""); - // for await (const chunk of res.stream()) { - // handle(chunk.text()); - // } + const response = await this.api.models.generateImages({ + model: this.model, + prompt, + }); + // TODO if empty or undefined return error + return { ok: response.generatedImages || [] }; } catch (e) { - console.log(e, "error in gemini api"); - handle(`Error streaming Gemini, ${e}`); + return { error: `${e}` }; } } - - public async send(sys: string, input: ChatMessage[]) { - const messages = this.mapMessages(input); - const truncated = this.truncateHistory(messages); - const res = await this.apiCall(truncated); - return res; - } - - public async sendR1(input: ChatMessage[]) { - const messages = this.mapMessagesR1(input); - const truncated = this.truncateHistory(messages); - const res = await this.apiCall(truncated, true); - return res; - } - - public async stream( - sys: string, - input: ChatMessage[], - handle: (c: any) => void, - ) { - const messages = this.mapMessages(input); - const truncated = this.truncateHistory(messages); - await this.apiCallStream(truncated, handle); - } - - public async streamR1(input: ChatMessage[], handle: (c: any) => void) { - const messages = this.mapMessagesR1(input); - const truncated = this.truncateHistory(messages); - await this.apiCallStream(truncated, handle, true); - } - - public async sendDoc(data: ArrayBuffer, mimeType: string, prompt: string) { - const res = await this.model.generateContent([ - { - inlineData: { - data: Buffer.from(data).toString("base64"), - mimeType, - }, - }, - prompt, - ]); - return res; - } - - private truncateHistory(messages: Content[]): Content[] { - const totalTokens = messages.reduce((total, message) => { - return total + this.tokenizer(message.parts[0].text || ""); - }, 0); - while (totalTokens > this.maxTokens && messages.length > 1) { - messages.splice(0, 1); + async makeVideo({ + prompt, + image, + }: { + prompt?: string; + image?: string; + }): AsyncRes<GeneratedVideo[]> { + try { + const response = await this.api.models.generateVideos({ + model: this.model, + prompt, + }); + // TODO if empty or undefined return error + return { ok: response.response?.generatedVideos || [] }; + } catch (e) { + return { error: `${e}` }; } - return messages; } } +// TODO how to use caches +// https://ai.google.dev/api/caching diff --git a/src/gemini2.ts b/src/gemini2.ts new file mode 100644 index 0000000..291553f --- /dev/null +++ b/src/gemini2.ts @@ -0,0 +1,149 @@ +import { + GenerativeModel, + GoogleGenerativeAI, + type Content, + type GenerateContentResult, +} from "@google/generative-ai"; +import { RESPONSE_LENGTH } from "./logic/constants"; +import type { + AIModelAPI, + ChatMessage, + OChoice, + OChunk, + OMessage, +} from "./types"; +import type { AsyncRes } from "sortug"; + +export default class GeminiAPI implements AIModelAPI { + tokenizer: (text: string) => number; + maxTokens: number; + private model: GenerativeModel; + + constructor( + maxTokens = 200_000, + tokenizer: (text: string) => number = (text) => text.length / 3, + model?: string, + ) { + this.maxTokens = maxTokens; + this.tokenizer = tokenizer; + + const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!); + this.model = gem.getGenerativeModel({ + // model: model || "gemini-2.0-flash-exp", + model: model || "gemini-2.5-pro-preview-05-06 ", + generationConfig: { maxOutputTokens: RESPONSE_LENGTH }, + }); + } + + public setModel(model: string) { + const gem = new GoogleGenerativeAI(Bun.env["GEMINI_API_KEY"]!); + this.model = gem.getGenerativeModel({ + model, + generationConfig: { maxOutputTokens: RESPONSE_LENGTH }, + }); + } + private mapMessages(input: ChatMessage[]): Content[] { + return input.map((m) => ({ + role: m.author === "gemini" ? "model" : "user", + parts: [{ text: m.text }], + })); + } + + private mapMessagesR1(input: ChatMessage[]): Content[] { + return input.reduce((acc: Content[], m, i) => { + const prev = acc[i - 1]; + const role = m.author === "gemini" ? "model" : "user"; + const msg = { role, parts: [{ text: m.text }] }; + if (prev?.role === role) acc[i - 1] = msg; + else acc = [...acc, msg]; + return acc; + }, []); + } + + private async apiCall( + messages: Content[], + isR1: boolean = false, + ): Promise<AsyncRes<string[]>> { + try { + const chat = this.model.startChat({ history: messages }); + const res = await chat.sendMessage(""); + return { ok: [res.response.text()] }; + } catch (e) { + console.log(e, "error in gemini api"); + return { error: `${e}` }; + } + } + + private async apiCallStream( + messages: Content[], + handle: (c: any) => void, + isR1: boolean = false, + ): Promise<void> { + try { + const chat = this.model.startChat({ history: messages }); + const res = await chat.sendMessage(""); + // for await (const chunk of res.stream()) { + // handle(chunk.text()); + // } + } catch (e) { + console.log(e, "error in gemini api"); + handle(`Error streaming Gemini, ${e}`); + } + } + + public async send(sys: string, input: ChatMessage[]) { + console.log({ sys, input }); + this.model.systemInstruction = { role: "system", parts: [{ text: sys }] }; + const messages = this.mapMessages(input); + const truncated = this.truncateHistory(messages); + const res = await this.apiCall(truncated); + return res; + } + + public async sendR1(input: ChatMessage[]) { + const messages = this.mapMessagesR1(input); + const truncated = this.truncateHistory(messages); + const res = await this.apiCall(truncated, true); + return res; + } + + public async stream( + sys: string, + input: ChatMessage[], + handle: (c: any) => void, + ) { + this.model.systemInstruction = { role: "system", parts: [{ text: sys }] }; + const messages = this.mapMessages(input); + const truncated = this.truncateHistory(messages); + await this.apiCallStream(truncated, handle); + } + + public async streamR1(input: ChatMessage[], handle: (c: any) => void) { + const messages = this.mapMessagesR1(input); + const truncated = this.truncateHistory(messages); + await this.apiCallStream(truncated, handle, true); + } + + public async sendDoc(data: ArrayBuffer, mimeType: string, prompt: string) { + const res = await this.model.generateContent([ + { + inlineData: { + data: Buffer.from(data).toString("base64"), + mimeType, + }, + }, + prompt, + ]); + return res; + } + + private truncateHistory(messages: Content[]): Content[] { + const totalTokens = messages.reduce((total, message) => { + return total + this.tokenizer(message.parts[0].text || ""); + }, 0); + while (totalTokens > this.maxTokens && messages.length > 1) { + messages.splice(0, 1); + } + return messages; + } +} diff --git a/src/model.ts b/src/generic.ts index 39b42dc..50c4435 100644 --- a/src/model.ts +++ b/src/generic.ts @@ -1,29 +1,30 @@ import OpenAI from "openai"; import { MAX_TOKENS, RESPONSE_LENGTH } from "./logic/constants"; -import type { AResult, ChatMessage, OChoice } from "./types"; +import type { AIModelAPI, ChatMessage, OChoice } from "./types"; +import type { AsyncRes } from "sortug"; type Message = OpenAI.Chat.Completions.ChatCompletionMessageParam; type Props = { baseURL: string; apiKey: string; - model: string; + model?: string; maxTokens?: number; tokenizer?: (text: string) => number; }; -export default class Conversation { +export default class OpenAIAPI implements AIModelAPI { private apiKey; private baseURL; - private maxTokens: number = MAX_TOKENS; - private tokenizer: (text: string) => number = (text) => text.length / 3; private api; - private model; + maxTokens: number = MAX_TOKENS; + tokenizer: (text: string) => number = (text) => text.length / 3; + model; constructor(props: Props) { this.apiKey = props.apiKey; this.baseURL = props.baseURL; this.api = new OpenAI({ baseURL: this.baseURL, apiKey: this.apiKey }); - this.model = props.model; + this.model = props.model || ""; if (props.maxTokens) this.maxTokens = props.maxTokens; if (props.tokenizer) this.tokenizer = props.tokenizer; } @@ -36,7 +37,7 @@ export default class Conversation { }); } - public async send(sys: string, input: ChatMessage[]): AResult<string[]> { + public async send(sys: string, input: ChatMessage[]): AsyncRes<string[]> { const messages = this.mapMessages(input); const sysMsg: Message = { role: "system", content: sys }; const allMessages = [sysMsg, ...messages]; @@ -44,12 +45,15 @@ export default class Conversation { const truncated = this.truncateHistory(allMessages); const res = await this.apiCall(truncated); if ("error" in res) return res; - else + else { try { - return { ok: res.ok.map((c) => c.message.content!) }; + // TODO type this properly + const choices: OChoice[] = res.ok; + return { ok: choices.map((c) => c.message.content!) }; } catch (e) { return { error: `${e}` }; } + } } public async stream( @@ -77,7 +81,7 @@ export default class Conversation { } // TODO custom temperature? - private async apiCall(messages: Message[]): AResult<OChoice[]> { + private async apiCall(messages: Message[]): AsyncRes<OChoice[]> { console.log({ messages }, "at the very end"); try { const completion = await this.api.chat.completions.create({ diff --git a/src/nlp/index.ts b/src/nlp/index.ts new file mode 100644 index 0000000..ebed586 --- /dev/null +++ b/src/nlp/index.ts @@ -0,0 +1,7 @@ +import * as Spacy from "./spacy"; +import * as Stanza from "./stanza"; +import * as ISO from "./iso"; +import { ocr } from "./ocr"; +import type * as Types from "./types"; +export * from "./nlp"; +export { ISO, ocr, Stanza, Spacy, type Types }; diff --git a/src/nlp/iso.ts b/src/nlp/iso.ts new file mode 100644 index 0000000..3e60850 --- /dev/null +++ b/src/nlp/iso.ts @@ -0,0 +1,10 @@ +import { franc, francAll } from "franc-all"; +import { iso6393To1 } from "iso-639-3"; +export { iso6393, iso6393To1, iso6393To2B, iso6393To2T } from "iso-639-3"; +export * as BCP47 from "bcp-47"; + +export function detectLang(text: string) { + const iso3 = franc(text); + const iso1 = iso6393To1[iso3]; + return iso1 ? iso1 : iso3; +} diff --git a/src/nlp/nlp.ts b/src/nlp/nlp.ts new file mode 100644 index 0000000..3b1e3a7 --- /dev/null +++ b/src/nlp/nlp.ts @@ -0,0 +1,208 @@ +export const isPunctuation = (text: string): boolean => { + // Common punctuation characters + const punctuationRegex = /^[.,;:!?()[\]{}'"«»""''…-]+$/; + return punctuationRegex.test(text); +}; + +// Get color for different syntactic categories +export function getColorForType(type: string): string { + const colors: Record<string, string> = { + // Phrasal categories + S: "#6495ED", // Sentence - cornflower blue + NP: "#FF7F50", // Noun Phrase - coral + VP: "#32CD32", // Verb Phrase - lime green + PP: "#9370DB", // Prepositional Phrase - medium purple + ADJP: "#FFD700", // Adjective Phrase - gold + ADVP: "#FF69B4", // Adverb Phrase - hot pink + + // Part-of-speech tags + NN: "#FFA07A", // Noun - light salmon + NNS: "#FFA07A", // Plural Noun - light salmon + NNP: "#FFA07A", // Proper Noun - light salmon + VB: "#90EE90", // Verb - light green + VBP: "#90EE90", // Present tense verb - light green + VBG: "#90EE90", // Gerund verb - light green + VBZ: "#90EE90", // 3rd person singular present verb - light green + VBD: "#90EE90", // Past tense verb - light green + VBN: "#90EE90", // Past participle verb - light green + JJ: "#F0E68C", // Adjective - khaki + RB: "#DDA0DD", // Adverb - plum + IN: "#87CEFA", // Preposition - light sky blue + DT: "#D3D3D3", // Determiner - light gray + PRP: "#D8BFD8", // Personal pronoun - thistle + CC: "#A9A9A9", // Coordinating conjunction - dark gray + + // Default + ROOT: "#000000", // Root - black + LEAF: "#666666", // Leaf nodes - dark gray + }; + + return colors[type] || "#666666"; +} + +// Get a description for node types +export function getDescription(type: string): string { + const descriptions: Record<string, string> = { + S: "Sentence", + SBAR: "Subordinating conjunction clause", + SBARQ: "Direct question", + SINV: "Declarative sentence with subject-aux inversion", + SQ: "Subconstituent of SBARQ excluding wh-word", + WHADVP: "wh-adverb phrase", + WHNP: "wh-nounphrase", + WHPP: "wh-prepositional phrase", + WDT: "wh-determiner", + WP: "wh-pronoun", + WRB: "wh-adverb", + WP$: "possesive wh-pronoun", + MD: "modal", + X: "Unknown", + NP: "Noun Phrase", + VP: "Verb Phrase", + PP: "Prepositional Phrase", + ADJP: "Adjective Phrase", + ADVP: "Adverb Phrase", + LS: "List item market", + SYM: "Symbol", + NN: "Noun", + NNS: "Plural Noun", + NNP: "Proper Noun", + NNPS: "Proper Noun, Plural", + VB: "Verb (base form)", + VBP: "Verb (present tense)", + VBG: "Verb (gerund/present participle)", + VBZ: "Verb (3rd person singular present)", + VBD: "Verb (past tense)", + VBN: "Verb (past participle)", + JJ: "Adjective", + JJR: "Adjective, comparative", + JJS: "Adjective, superlative", + EX: "Existential there", + RB: "Adverb", + RBR: "Adverb, comparative", + RBS: "Adverb, superlative", + RP: "Particle", + IN: "Preposition", + TO: "to", + DT: "Determiner", + PDT: "Predeterminer", + PRP: "Personal Pronoun", + PP$: "Possesive Pronoun", + PRP$: "Possesive Pronoun", + POS: "Possesive ending", + FW: "Foreign Word", + CC: "Coordinating Conjunction", + CD: "Cardinal number", + UH: "interjection", + ROOT: "Root Node", + CLR: "figurative motion", + FRAG: "fragment", + ":": "Colon/Semicolon", + ",": "Comma", + ".": "Period", + }; + + return descriptions[type] || type; +} + +// https://universaldependencies.org/u/dep/xcomp.htmlexport + +export function unpackDeprel(type: string): string { + const descriptions: Record<string, string> = { + nsubj: "nominal subject", + obj: "object", + iobj: "indirect object", + csubj: "clausal subject", + ccomp: "clausal complement", + xcomp: "open clausal complement", + obl: "oblique nominal", + vocative: "vocative", + expl: "expletive", + dislocated: "dislocated", + nmod: "nominal modifier", + appos: "appositional modifier", + nummod: "numeric modifier", + advcl: "adverbial clause modifier", + acl: "admonimal clause", + advmod: "adverbial modifier", + discourse: "dicourse element", + aux: "auxiliary", + cop: "copula", + mark: "marker", + amod: "adjectival modifier", + det: "determiner", + clf: "classifier", + case: "case marker", + conj: "conjunction", + cc: "coordinating conjunction", + fixed: "fixed multiword expression", + flat: "flat expression", + list: "list", + parataxis: "parataxis", + compound: "compound", + orphan: "orphan", + goeswith: "goes with", + reparandum: "overriden disfluency", + punct: "punctuation", + root: "root", + dep: "unspecified dependency", + }; + const res = descriptions[type]; + if (!res) console.log("tag not found!!", type); + + return res || type; +} + +export function deprelColors(type: string): string { + const colors: Record<string, string> = { + // Phrasal categories + s: "#6495ED", // Sentence - cornflower blue + nsubj: "#6495ED", // Sentence - cornflower blue + root: "#FFD700", // Adjective Phrase - gold + p: "#FFD700", // Adjective Phrase - gold + NP: "#FF7F50", // Noun Phrase - coral + VP: "#32CD32", // Verb Phrase - lime green + PP: "#9370DB", // Prepositional Phrase - medium purple + ADVP: "#FF69B4", // Adverb Phrase - hot pink + + // Part-of-speech tags + NN: "#FFA07A", // Noun - light salmon + NNS: "#FFA07A", // Plural Noun - light salmon + NNP: "#FFA07A", // Proper Noun - light salmon + VB: "#90EE90", // Verb - light green + VBP: "#90EE90", // Present tense verb - light green + VBG: "#90EE90", // Gerund verb - light green + VBZ: "#90EE90", // 3rd person singular present verb - light green + VBD: "#90EE90", // Past tense verb - light green + VBN: "#90EE90", // Past participle verb - light green + JJ: "#F0E68C", // Adjective - khaki + RB: "#DDA0DD", // Adverb - plum + IN: "#87CEFA", // Preposition - light sky blue + DT: "#D3D3D3", // Determiner - light gray + PRP: "#D8BFD8", // Personal pronoun - thistle + CC: "#A9A9A9", // Coordinating conjunction - dark gray + + // Default + ROOT: "#000000", // Root - black + LEAF: "#666666", // Leaf nodes - dark gray + }; + + return colors[type] || "#666666"; +} +export function unpackPos(pos: string): string { + const map: Record<string, string> = { + adj: "adjective", + adv: "adverb", + adv_phrase: "adverbial phrase", + combining_form: "combining form", + conj: "conjunction", + det: "determinant", + intj: "interjection", + num: "number", + prep: "preposition", + prep_phrase: "prepositional phrase", + pron: "pronoun", + punct: "punctuation", + }; + return map[pos] || pos; +} diff --git a/src/nlp/ocr.ts b/src/nlp/ocr.ts new file mode 100644 index 0000000..1c40355 --- /dev/null +++ b/src/nlp/ocr.ts @@ -0,0 +1,18 @@ +import type { AsyncRes } from "sortug"; + +export async function ocr(formData: FormData): AsyncRes<string[]> { + const endpoint = "http://localhost:8102/ocr"; + + const opts = { + method: "POST", + body: formData, + headers: { "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY! }, + }; + try { + const res = await fetch(endpoint, opts); + const j = await res.json(); + return { ok: j }; + } catch (e) { + return { error: `${e}` }; + } +} diff --git a/src/nlp/spacy.ts b/src/nlp/spacy.ts new file mode 100644 index 0000000..d79de55 --- /dev/null +++ b/src/nlp/spacy.ts @@ -0,0 +1,79 @@ +import type { AsyncRes, Result } from "sortug"; +import { detectLang } from "./iso"; +const ENDPOINT = "http://localhost:8102"; + +export async function run(text: string, langg?: string): AsyncRes<SpacyRes> { + try { + const lang = langg ? langg : detectLang(text); + const body = JSON.stringify({ string: text, lang }); + const opts = { + headers: { + "Content-type": "application/json", + "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!, + }, + method: "POST", + body, + }; + const res = await fetch(ENDPOINT + "/spacy", opts); + const j = await res.json(); + console.log("spacy", j); + return { ok: j }; + } catch (e) { + return { error: `${e}` }; + } +} + +export type SpacyResBig = { + doc: { + text: string; + ents: any[]; + sents: Array<{ start: number; end: number }>; + tokens: Token[]; + }; + segs: Sentence[]; +}; +export type SpacyRes = { + input: string; + segments: Sentence[]; +}; +export type Sentence = { + text: string; + start: number; + end: number; + root: Token; + subj: Token; + arcs: Arc[]; + words: Word[]; +}; +export type Arc = { + start: number; + end: number; + label: string; // deprel label + dir: string; +}; +export type Token = { + id: number; + head: number; + start: number; + end: number; + dep: string; + lemma: string; + morph: string; + pos: string; + tag: string; + text: string; +}; + +export interface Word extends Token { + ancestors: number[]; + children: []; + n_lefts: number; + n_rights: number; + left_edge: number; + right_edge: number; + morph_map: Record<string, string>; +} + +export function isChild(w: Word, topId: number): boolean { + return w.id === topId || w.ancestors.includes(topId); +} diff --git a/src/nlp/stanza.ts b/src/nlp/stanza.ts new file mode 100644 index 0000000..5836b91 --- /dev/null +++ b/src/nlp/stanza.ts @@ -0,0 +1,210 @@ +import type { AsyncRes, Result } from "sortug"; +import { detectLang } from "./iso"; + +const ENDPOINT = "http://localhost:8102"; +export async function segmenter( + text: string, + langg?: string, +): AsyncRes<StanzaRes> { + try { + const lang = langg ? langg : detectLang(text); + const body = JSON.stringify({ lang, string: text }); + const opts = { + headers: { + "Content-type": "application/json", + "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!, + }, + method: "POST", + body, + }; + const res = await fetch(ENDPOINT + "/stanza", opts); + const j = await res.json(); + return { ok: j }; + } catch (e) { + return { error: `${e}` }; + } +} +export async function idLang(text: string) { + try { + const body = JSON.stringify({ string: text }); + const opts = { + headers: { + "Content-type": "application/json", + "X-API-KEY": Bun.env.SORTUG_NLP_API_KEY!, + }, + method: "POST", + body, + }; + const res = await fetch(ENDPOINT + "/detect-lang", opts); + const j = await res.json(); + return { ok: j }; + } catch (e) { + return { error: `${e}` }; + } +} +export type StanzaRes = { input: string; segments: Sentence[] }; +export type Sentence = { + text: string; + sentiment: number; + constituency: TreeNode; + constring: string; + dependencies: Dependency[]; + entities: Entity[]; + tokens: Token[]; + words: Word[]; +}; +export type TreeNode = { + label: string; + children: TreeNode[]; +}; +export type Dependency = Array<[Word, string, Word]>; +export type Word = { + id: number; + text: string; + lemma: string; + upos: string; + xpos: string; + feats: string; + head: number; + deprel: string; + start_char: number; + end_char: number; +}; +export type Token = { + id: [number, number]; + text: string; + misc: string; + words: Word[]; + start_char: number; + end_char: number; + ner: string; +}; +export type Entity = { + text: string; + misc: string; + start_char: number; + end_char: number; + type: string; +}; + +// mine +export type Clause = { + words: Word[]; + dependency: Dependency; + text: string; +}; +// "amod", +// { +// "id": 1, +// "text": "Stony", +// "lemma": "Stony", +// "upos": "ADJ", +// "xpos": "NNP", +// "feats": "Degree=Pos", +// "head": 3, +// "deprel": "amod", +// "start_char": 0, +// "end_char": 5 +// } +// +// + +export interface ParsedGrammar { + predicateCore: number; + subjectCore: number | null; + tree: Record<number, number[]>; + wordMap: WordMap; + words: BigWord[]; +} +export interface BigWord extends Word { + ancestry: number[]; + component: "s" | "p" | "u"; +} +export type ComputedDependency = { + word: BigWord; + children: ComputedDependency[]; +}; +export type WordMap = Record<number, Word>; + +export function buildTreeFromWords(words: Word[]): Result<ParsedGrammar> { + const roots = words.filter((w) => w.deprel === "root"); + if (roots.length > 1) { + console.log("roots", roots); + return { error: "too many roots" }; + } else if (roots.length === 0) { + return { error: "no roots" }; + } else { + const root = roots[0]; + const wordmap = words.reduce((acc: WordMap, item) => { + acc[item.id] = item; + return acc; + }, {}); + return { ok: parseFurther(words, wordmap, root) }; + } +} +function parseFurther( + words: Word[], + wordMap: WordMap, + root: Word, +): ParsedGrammar { + const predicateCore = root.id; + let subjectCore: number | null = null; + const tree: Record<number, number[]> = {}; + const bigwords: BigWord[] = []; + const getAncestry = (parent: Word): number[] => { + const kids = tree[parent.head] || []; + tree[parent.head] = [...kids, parent.id]; + if (parent.deprel === "nsubj") subjectCore = parent.id; + + console.log("getting ancestry " + parent.id, parent.text); + const grandpa = wordMap[parent.head]; + if (!grandpa) return [parent.id]; + else return [parent.id, ...getAncestry(grandpa)]; + }; + let idx = 0; + for (const w of words) { + if (w.deprel === "punct") { + const prev = words[idx - 1]; + if (!prev) continue; + prev.text += w.text; + continue; + } + const parent = wordMap[w.head]; + if (!parent) tree[w.id] = []; + const ancestry = !parent ? [] : getAncestry(parent); + const component = + subjectCore && (w.id === subjectCore || ancestry.includes(subjectCore)) + ? "s" + : w.id === predicateCore || ancestry.includes(root.id) + ? "p" + : "u"; + const bw: BigWord = { ...w, component, ancestry }; + wordMap[w.id] = bw; + bigwords.push(bw); + idx++; + } + const pg: ParsedGrammar = { + predicateCore, + subjectCore, + wordMap, + tree, + words: bigwords, + }; + return pg; +} + +export function oneDescendant(node: TreeNode): boolean { + if (node.children.length !== 1) return false; + else { + const child = node.children[0]; + return child.children.length === 0; + } +} + +// function findChildren(wordmap: WordMap, word: Word): ComputedDependency { +// const children = words.filter((w) => w.head === head.id); +// return { +// word: head, +// children: children.map((c) => findChildren(words, c)), +// }; +// } diff --git a/src/nlp/types.ts b/src/nlp/types.ts new file mode 100644 index 0000000..605a637 --- /dev/null +++ b/src/nlp/types.ts @@ -0,0 +1,50 @@ +export type ViewLevel = + | "text" + | "paragraph" + | "sentence" + | "clause" + | "word" + | "syllable" + | "phoneme"; +export interface ViewState { + level: ViewLevel; + pIndex: number | null; + sIndex: number | null; + cIndex: number | null; + wIndex: number | null; + yIndex: number | null; + fIndex: number | null; +} + +export interface ViewProps { + idx: number; + rawText: string; + context: Context; +} +export type Context = { + parentText: string; + segmented: string[]; + idx: number; +}; + +export type WordData = { + confidence: number; + frequency: number | null; + id: number; + ipa: Array<{ ipa: string; tags: string[] }>; + spelling: string; + type: ExpressionType; + syllables: number; + lang: string; + prosody: any; + senses: Sense[]; +}; +export type ExpressionType = "word" | "expression" | "syllable"; +export type Sense = { + etymology: string; + pos: string; + forms: Array<{ form: string; tags: string[] }>; + related: any; + senses: Array<{ glosses: string[]; links: Array<[string, string]> }>; +}; +export type LoadingStatus = "pending" | "loading" | "success" | "error"; diff --git a/src/openai.ts b/src/openai.ts index 2e15dcf..12939bc 100644 --- a/src/openai.ts +++ b/src/openai.ts @@ -1,14 +1,8 @@ import fs from "fs"; import OpenAI from "openai"; import { RESPONSE_LENGTH } from "./logic/constants"; -import type { - AResult, - ChatMessage, - OChoice, - OChunk, - OMessage, - Result, -} from "./types"; +import type { ChatMessage, OChoice, OChunk, OMessage } from "./types"; +import type { AsyncRes, Result } from "sortug"; import OpenAIToolUse from "./openai_tools"; import type { FileObject } from "openai/src/resources/files.js"; @@ -26,7 +20,7 @@ export default class Conversation { private baseURL: string = "https://api.openai.com/v1"; private tokenizer: (text: string) => number = (text) => text.length / 3; openai; - private model: string = "chatgpt-4o-latest"; + private model: string = "gpt-4.1"; constructor(props: Props) { if (props.apiKey) this.apiKey = props.apiKey; @@ -56,7 +50,7 @@ export default class Conversation { }, []); } - public async send(sys: string, input: ChatMessage[]): AResult<OChoice[]> { + public async send(sys: string, input: ChatMessage[]): AsyncRes<OChoice[]> { const messages = this.mapMessages(input); const sysMsg: Message = { role: "system", content: sys }; const allMessages = [sysMsg, ...messages]; @@ -65,7 +59,7 @@ export default class Conversation { return res; } - public async sendR1(input: ChatMessage[]): AResult<OChoice[]> { + public async sendR1(input: ChatMessage[]): AsyncRes<OChoice[]> { const messages = this.mapMessagesR1(input); const truncated = this.truncateHistory(messages); const res = await this.apiCall(truncated); @@ -102,7 +96,7 @@ export default class Conversation { return messages; } - private async apiCall(messages: Message[]): AResult<OChoice[]> { + private async apiCall(messages: Message[]): AsyncRes<OChoice[]> { try { const completion = await this.openai.chat.completions.create({ temperature: 1.3, diff --git a/src/types/index.ts b/src/types/index.ts index 97be443..b276457 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -1,15 +1,29 @@ import type OpenAI from "openai"; +import type { AsyncRes } from "sortug"; export type ChatMessage = { author: string; text: string; sent: number; reasoning?: string; }; -export type Result<T> = { ok: T } | { error: string }; -export type AResult<T> = Promise<{ ok: T } | { error: string }>; // openai export type OChoice = OpenAI.Chat.Completions.ChatCompletion.Choice; export type OChunk = OpenAI.Chat.Completions.ChatCompletionChunk.Choice; export type OMessage = OpenAI.Chat.Completions.ChatCompletionMessageParam; export type ContentType = { text: string } | { audio: Response }; +export type AIModelChoice = + | { name: "deepseek" | "chatgpt" | "claude" | "gemini" | "grok" } + | { other: { baseURL: string; apiKey: string } }; +export interface AIModelAPI { + setModel: (model: string) => void; + tokenizer: (text: string) => number; + maxTokens: number; + + send: (systemPrompt: string, input: ChatMessage[]) => AsyncRes<string[]>; + stream: ( + systemPrompt: string, + input: ChatMessage[], + handler: (data: any) => void, + ) => void; +} |
