init
This commit is contained in:
commit
623fe6ac7a
3
.envrc
Normal file
3
.envrc
Normal file
@ -0,0 +1,3 @@
|
||||
source_url "https://raw.githubusercontent.com/cachix/devenv/95f329d49a8a5289d31e0982652f7058a189bfca/direnvrc" "sha256-d+8cBpDfDBj41inrADaJt+bDWhOktwslgoP5YiGJ1v0="
|
||||
|
||||
use devenv
|
9
.gitignore
vendored
Normal file
9
.gitignore
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
# Devenv
|
||||
.devenv*
|
||||
devenv.local.nix
|
||||
|
||||
# direnv
|
||||
.direnv
|
||||
|
||||
# pre-commit
|
||||
.pre-commit-config.yaml
|
122
devenv.lock
Normal file
122
devenv.lock
Normal file
@ -0,0 +1,122 @@
|
||||
{
|
||||
"nodes": {
|
||||
"devenv": {
|
||||
"locked": {
|
||||
"dir": "src/modules",
|
||||
"lastModified": 1725964132,
|
||||
"owner": "cachix",
|
||||
"repo": "devenv",
|
||||
"rev": "98c7c131e3fa30eb00e9bfe44c1a180c7f94102f",
|
||||
"treeHash": "145543926bdaaeabf22cd0f42e2991d0e35131c4",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"dir": "src/modules",
|
||||
"owner": "cachix",
|
||||
"repo": "devenv",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1696426674,
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
|
||||
"treeHash": "2addb7b71a20a25ea74feeaf5c2f6a6b30898ecb",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"pre-commit-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1709087332,
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
||||
"treeHash": "ca14199cabdfe1a06a7b1654c76ed49100a689f9",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1716977621,
|
||||
"owner": "cachix",
|
||||
"repo": "devenv-nixpkgs",
|
||||
"rev": "4267e705586473d3e5c8d50299e71503f16a6fb6",
|
||||
"treeHash": "6d9f1f7ca0faf1bc2eeb397c78a49623260d3412",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "cachix",
|
||||
"ref": "rolling",
|
||||
"repo": "devenv-nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs-stable": {
|
||||
"locked": {
|
||||
"lastModified": 1725826545,
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "f4c846aee8e1e29062aa8514d5e0ab270f4ec2f9",
|
||||
"treeHash": "8fc49deaed3f2728a7147c38163cc468a117570a",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-24.05",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"pre-commit-hooks": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"nixpkgs-stable": "nixpkgs-stable"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1725513492,
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"rev": "7570de7b9b504cfe92025dd1be797bf546f66528",
|
||||
"treeHash": "4b46d77870afecd8f642541cb4f4927326343b59",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"devenv": "devenv",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"pre-commit-hooks": "pre-commit-hooks"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
64
devenv.nix
Normal file
64
devenv.nix
Normal file
@ -0,0 +1,64 @@
|
||||
{ pkgs, lib, config, inputs, ... }:
|
||||
|
||||
{
|
||||
# https://devenv.sh/basics/
|
||||
env.GREET = "devenv";
|
||||
env.PYTHON_KEYRING_BACKEND= "keyring.backends.fail.Keyring";
|
||||
env.CUDA_HOME = pkgs.cudaPackages.cudatoolkit;
|
||||
env.CUDA_PATH = pkgs.cudaPackages.cudatoolkit;
|
||||
env.LD_LIBRARY_PATH = "/run/opengl-driver/lib:$LD_LIBRARY_PATH";
|
||||
env.CUDA_VISIBLE_DEVICES="0";
|
||||
env.PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:256";
|
||||
env.EXTRA_LDFLAGS="-L/lib -L${pkgs.linuxPackages.nvidia_x11}/lib";
|
||||
env.EXTRA_CCFLAGS="-I/usr/include";
|
||||
# env.LD_LIBRARY_PATH = ":${pkgs.cudatoolkit}/lib:${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/host-linux-x64/Mesa";
|
||||
|
||||
|
||||
# https://devenv.sh/packages/
|
||||
packages = with pkgs;[
|
||||
git
|
||||
python311Packages.python-lsp-server
|
||||
cudaPackages.cudatoolkit
|
||||
# gitRepo gnupg autoconf curl
|
||||
# procps gnumake utillinux m4 gperf unzip
|
||||
# linuxPackages.nvidia_x11
|
||||
# libGLU
|
||||
# xorg.libXi xorg.libXmu freeglut
|
||||
# xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib
|
||||
# ncurses5 stdenv.cc binutils
|
||||
# cudaPackages.libcublas
|
||||
# cudaPackages.cudnn
|
||||
];
|
||||
|
||||
# https://devenv.sh/scripts/
|
||||
scripts.hello.exec = "echo hello from $GREET";
|
||||
|
||||
enterShell = ''
|
||||
hello
|
||||
git --version
|
||||
'';
|
||||
|
||||
# https://devenv.sh/tests/
|
||||
enterTest = ''
|
||||
echo "Running tests"
|
||||
git --version | grep "2.42.0"
|
||||
'';
|
||||
|
||||
# https://devenv.sh/services/
|
||||
# services.postgres.enable = true;
|
||||
|
||||
# https://devenv.sh/languages/
|
||||
# languages.nix.enable = true;
|
||||
languages.python = {
|
||||
enable = true;
|
||||
venv.enable = true;
|
||||
};
|
||||
|
||||
# https://devenv.sh/pre-commit-hooks/
|
||||
# pre-commit.hooks.shellcheck.enable = true;
|
||||
|
||||
# https://devenv.sh/processes/
|
||||
# processes.ping.exec = "ping example.com";
|
||||
|
||||
# See full reference at https://devenv.sh/reference/options/
|
||||
}
|
15
devenv.yaml
Normal file
15
devenv.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
# yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
|
||||
inputs:
|
||||
nixpkgs:
|
||||
url: github:cachix/devenv-nixpkgs/rolling
|
||||
|
||||
# If you're using non-OSS software, you can set allowUnfree to true.
|
||||
allowUnfree: true
|
||||
|
||||
# If you're willing to use a package that's vulnerable
|
||||
# permittedInsecurePackages:
|
||||
# - "openssl-1.1.1w"
|
||||
|
||||
# If you have more than one devenv you can merge them
|
||||
#imports:
|
||||
# - ./backend
|
78
pkg/main.py
Normal file
78
pkg/main.py
Normal file
@ -0,0 +1,78 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
import time
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
import uvicorn
|
||||
import gc
|
||||
import os
|
||||
|
||||
app = FastAPI()
|
||||
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
||||
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
|
||||
|
||||
# Load model (do this only once at startup)
|
||||
print("Loading model...")
|
||||
model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True, torch_dtype="auto")
|
||||
model.eval()
|
||||
# model.half()
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
|
||||
print(device.type)
|
||||
model.to(device)
|
||||
if device.type == 'cuda':
|
||||
# gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.memory.empty_cache()
|
||||
torch.cuda.max_memory_allocated()
|
||||
|
||||
|
||||
print(f"Model loaded and moved to {device}")
|
||||
|
||||
# Define request and response models
|
||||
class EmbeddingRequest(BaseModel):
|
||||
texts: List[str]
|
||||
is_query: bool
|
||||
|
||||
class EmbeddingResponse(BaseModel):
|
||||
embeddings: List[List[float]]
|
||||
time_taken: float
|
||||
|
||||
# Each query needs to be accompanied by an corresponding instruction describing the task.
|
||||
task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
|
||||
query_prefix = "Instruct: " + task_name_to_instruct["example"] + "\nQuery: "
|
||||
|
||||
max_length = 32768
|
||||
|
||||
@app.post("/embed", response_model=EmbeddingResponse)
|
||||
async def embed_texts(request: EmbeddingRequest):
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
with torch.no_grad():
|
||||
if device.type == 'cuda':
|
||||
# gc.collect()
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.memory.empty_cache()
|
||||
if request.is_query:
|
||||
texts = [query_prefix + text for text in request.texts]
|
||||
else:
|
||||
texts = request.texts
|
||||
|
||||
embeddings = model.encode(texts, max_length=max_length)
|
||||
embeddings = F.normalize(embeddings, p=2, dim=1)
|
||||
|
||||
print(f"Embedding size: {embeddings.size()}")
|
||||
|
||||
embeddings_list = embeddings.cpu().float().tolist()
|
||||
|
||||
time_taken = time.time() - start_time
|
||||
return EmbeddingResponse(embeddings=embeddings_list, time_taken=time_taken)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
1
pkg/run.sh
Normal file
1
pkg/run.sh
Normal file
@ -0,0 +1 @@
|
||||
poetry run python -m uvicorn prosody.server:app --host 0.0.0.0 --reload
|
44
pkg/test.py
Normal file
44
pkg/test.py
Normal file
@ -0,0 +1,44 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from transformers import AutoTokenizer, AutoModel
|
||||
|
||||
# Each query needs to be accompanied by an corresponding instruction describing the task.
|
||||
task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
|
||||
|
||||
query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
|
||||
queries = [
|
||||
'are judo throws allowed in wrestling?',
|
||||
'how to become a radiology technician in michigan?'
|
||||
]
|
||||
|
||||
# No instruction needed for retrieval passages
|
||||
passage_prefix = ""
|
||||
passages = [
|
||||
"Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
|
||||
"Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
|
||||
]
|
||||
|
||||
# load model with tokenizer
|
||||
model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True)
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
|
||||
print(device.type)
|
||||
|
||||
|
||||
# get the embeddings
|
||||
max_length = 32768
|
||||
query_embeddings = model.encode(queries, instruction=query_prefix, max_length=max_length)
|
||||
passage_embeddings = model.encode(passages, instruction=passage_prefix, max_length=max_length)
|
||||
|
||||
# normalize embeddings
|
||||
query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
|
||||
passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
|
||||
|
||||
# get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
|
||||
# batch_size=2
|
||||
# query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=query_prefix, max_length=max_length, num_workers=32, return_numpy=True)
|
||||
# passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction=passage_prefix, max_length=max_length, num_workers=32, return_numpy=True)
|
||||
|
||||
scores = (query_embeddings @ passage_embeddings.T) * 100
|
||||
print(scores.tolist())
|
||||
# [[87.42693328857422, 0.46283677220344543], [0.965264618396759, 86.03721618652344]]
|
||||
|
1
pkg/tunnel.sh
Normal file
1
pkg/tunnel.sh
Normal file
@ -0,0 +1 @@
|
||||
autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -R 8000:localhost:8000 y@sortug
|
Loading…
x
Reference in New Issue
Block a user