init

2024-11-27 22:29:30 +07:00 · 2024-11-27 22:29:30 +07:00 · 623fe6ac7a
commit 623fe6ac7a
10 changed files with 343 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1,3 @@
+source_url "https://raw.githubusercontent.com/cachix/devenv/95f329d49a8a5289d31e0982652f7058a189bfca/direnvrc" "sha256-d+8cBpDfDBj41inrADaJt+bDWhOktwslgoP5YiGJ1v0="
+
+use devenv
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
+# Devenv
+.devenv*
+devenv.local.nix
+
+# direnv
+.direnv
+
+# pre-commit
+.pre-commit-config.yaml
--- a/devenv.lock
+++ b/devenv.lock
@ -0,0 +1,122 @@
+{
+  "nodes": {
+    "devenv": {
+      "locked": {
+        "dir": "src/modules",
+        "lastModified": 1725964132,
+        "owner": "cachix",
+        "repo": "devenv",
+        "rev": "98c7c131e3fa30eb00e9bfe44c1a180c7f94102f",
+        "treeHash": "145543926bdaaeabf22cd0f42e2991d0e35131c4",
+        "type": "github"
+      },
+      "original": {
+        "dir": "src/modules",
+        "owner": "cachix",
+        "repo": "devenv",
+        "type": "github"
+      }
+    },
+    "flake-compat": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1696426674,
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
+        "treeHash": "2addb7b71a20a25ea74feeaf5c2f6a6b30898ecb",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "gitignore": {
+      "inputs": {
+        "nixpkgs": [
+          "pre-commit-hooks",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1709087332,
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
+        "treeHash": "ca14199cabdfe1a06a7b1654c76ed49100a689f9",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1716977621,
+        "owner": "cachix",
+        "repo": "devenv-nixpkgs",
+        "rev": "4267e705586473d3e5c8d50299e71503f16a6fb6",
+        "treeHash": "6d9f1f7ca0faf1bc2eeb397c78a49623260d3412",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "ref": "rolling",
+        "repo": "devenv-nixpkgs",
+        "type": "github"
+      }
+    },
+    "nixpkgs-stable": {
+      "locked": {
+        "lastModified": 1725826545,
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "f4c846aee8e1e29062aa8514d5e0ab270f4ec2f9",
+        "treeHash": "8fc49deaed3f2728a7147c38163cc468a117570a",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-24.05",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "pre-commit-hooks": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "gitignore": "gitignore",
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "nixpkgs-stable": "nixpkgs-stable"
+      },
+      "locked": {
+        "lastModified": 1725513492,
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "rev": "7570de7b9b504cfe92025dd1be797bf546f66528",
+        "treeHash": "4b46d77870afecd8f642541cb4f4927326343b59",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "pre-commit-hooks.nix",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "devenv": "devenv",
+        "nixpkgs": "nixpkgs",
+        "pre-commit-hooks": "pre-commit-hooks"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/devenv.nix
+++ b/devenv.nix
@ -0,0 +1,64 @@
+{ pkgs, lib, config, inputs, ... }:
+
+{
+  # https://devenv.sh/basics/
+  env.GREET = "devenv";
+  env.PYTHON_KEYRING_BACKEND= "keyring.backends.fail.Keyring";
+  env.CUDA_HOME = pkgs.cudaPackages.cudatoolkit;
+  env.CUDA_PATH = pkgs.cudaPackages.cudatoolkit;
+  env.LD_LIBRARY_PATH = "/run/opengl-driver/lib:$LD_LIBRARY_PATH";
+  env.CUDA_VISIBLE_DEVICES="0";
+  env.PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:256";
+  env.EXTRA_LDFLAGS="-L/lib -L${pkgs.linuxPackages.nvidia_x11}/lib";
+	env.EXTRA_CCFLAGS="-I/usr/include";
+  # env.LD_LIBRARY_PATH = ":${pkgs.cudatoolkit}/lib:${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/host-linux-x64/Mesa";
+
+
+  # https://devenv.sh/packages/
+  packages = with pkgs;[ 
+    git 
+    python311Packages.python-lsp-server
+    cudaPackages.cudatoolkit
+    # gitRepo gnupg autoconf curl
+    # procps gnumake utillinux m4 gperf unzip
+    # linuxPackages.nvidia_x11
+    # libGLU
+    # xorg.libXi xorg.libXmu freeglut
+    # xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib 
+    # ncurses5 stdenv.cc binutils
+    # cudaPackages.libcublas
+    # cudaPackages.cudnn
+  ];
+
+  # https://devenv.sh/scripts/
+  scripts.hello.exec = "echo hello from $GREET";
+
+  enterShell = ''
+    hello
+    git --version
+  '';
+
+  # https://devenv.sh/tests/
+  enterTest = ''
+    echo "Running tests"
+    git --version | grep "2.42.0"
+  '';
+
+  # https://devenv.sh/services/
+  # services.postgres.enable = true;
+
+  # https://devenv.sh/languages/
+  # languages.nix.enable = true;
+  languages.python = {
+    enable = true;
+    venv.enable = true;
+  };
+
+  # https://devenv.sh/pre-commit-hooks/
+  # pre-commit.hooks.shellcheck.enable = true;
+
+  # https://devenv.sh/processes/
+  # processes.ping.exec = "ping example.com";
+
+  # See full reference at https://devenv.sh/reference/options/
+}
--- a/devenv.yaml
+++ b/devenv.yaml
@ -0,0 +1,15 @@
+# yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
+inputs:
+  nixpkgs:
+    url: github:cachix/devenv-nixpkgs/rolling
+
+# If you're using non-OSS software, you can set allowUnfree to true.
+allowUnfree: true
+
+# If you're willing to use a package that's vulnerable
+# permittedInsecurePackages:
+#  - "openssl-1.1.1w"
+
+# If you have more than one devenv you can merge them
+#imports:
+# - ./backend
--- a/pkg/main.py
+++ b/pkg/main.py
@ -0,0 +1,78 @@
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+import time
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from typing import List
+import uvicorn
+import gc
+import os
+
+app = FastAPI()
+os.environ['CUDA_VISIBLE_DEVICES'] = '0' 
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
+
+# Load model (do this only once at startup)
+print("Loading model...")
+model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True, torch_dtype="auto")
+model.eval()
+# model.half()
+
+device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
+print(device.type)
+model.to(device)
+if device.type == 'cuda':
+    # gc.collect()
+    torch.cuda.empty_cache()
+    torch.cuda.memory.empty_cache()
+    torch.cuda.max_memory_allocated()
+
+
+print(f"Model loaded and moved to {device}")
+
+# Define request and response models
+class EmbeddingRequest(BaseModel):
+    texts: List[str]
+    is_query: bool
+
+class EmbeddingResponse(BaseModel):
+    embeddings: List[List[float]]
+    time_taken: float
+
+# Each query needs to be accompanied by an corresponding instruction describing the task.
+task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
+query_prefix = "Instruct: " + task_name_to_instruct["example"] + "\nQuery: "
+
+max_length = 32768
+
+@app.post("/embed", response_model=EmbeddingResponse)
+async def embed_texts(request: EmbeddingRequest):
+    start_time = time.time()
+    
+    try:
+        with torch.no_grad():
+            if device.type == 'cuda':
+                # gc.collect()
+                torch.cuda.empty_cache()
+                torch.cuda.memory.empty_cache()
+            if request.is_query:
+                texts = [query_prefix + text for text in request.texts]
+            else:
+                texts = request.texts
+            
+            embeddings = model.encode(texts, max_length=max_length)
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+
+            print(f"Embedding size: {embeddings.size()}")
+            
+            embeddings_list = embeddings.cpu().float().tolist()
+            
+        time_taken = time.time() - start_time
+        return EmbeddingResponse(embeddings=embeddings_list, time_taken=time_taken)
+    
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/pkg/run.sh
+++ b/pkg/run.sh
@ -0,0 +1 @@
+poetry run python -m uvicorn prosody.server:app --host 0.0.0.0 --reload
--- a/pkg/test.py
+++ b/pkg/test.py
@ -0,0 +1,44 @@
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+
+# Each query needs to be accompanied by an corresponding instruction describing the task.
+task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
+
+query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
+queries = [
+    'are judo throws allowed in wrestling?', 
+    'how to become a radiology technician in michigan?'
+    ]
+
+# No instruction needed for retrieval passages
+passage_prefix = ""
+passages = [
+    "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
+    "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
+]
+
+# load model with tokenizer
+model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True)
+device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
+print(device.type)
+
+
+# get the embeddings
+max_length = 32768
+query_embeddings = model.encode(queries, instruction=query_prefix, max_length=max_length)
+passage_embeddings = model.encode(passages, instruction=passage_prefix, max_length=max_length)
+
+# normalize embeddings
+query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
+passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
+
+# get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
+# batch_size=2
+# query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=query_prefix, max_length=max_length, num_workers=32, return_numpy=True)
+# passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction=passage_prefix, max_length=max_length, num_workers=32, return_numpy=True)
+
+scores = (query_embeddings @ passage_embeddings.T) * 100
+print(scores.tolist())
+# [[87.42693328857422, 0.46283677220344543], [0.965264618396759, 86.03721618652344]]
+
--- a/pkg/tunnel.sh
+++ b/pkg/tunnel.sh
@ -0,0 +1 @@
+autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -R 8000:localhost:8000 y@sortug
--- a/test.sh
+++ b/test.sh
@ -0,0 +1,6 @@
+curl -X POST "http://localhost:8000/embed" \
+     -H "Content-Type: application/json" \
+     -d '{
+       "texts": ["What is the capital of France?", "Who wrote Romeo and Juliet?"],
+       "is_query": true
+     }'
				`@ -0,0 +1 @@`
				`poetry run python -m uvicorn prosody.server:app --host 0.0.0.0 --reload`
				`@ -0,0 +1 @@`
				`autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -R 8000:localhost:8000 y@sortug`