init

2024-11-27 22:29:30 +07:00 · 2024-11-27 22:29:30 +07:00 · 623fe6ac7a
commit 623fe6ac7a
10 changed files with 343 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@ -0,0 +1,3 @@
 source_url "https://raw.githubusercontent.com/cachix/devenv/95f329d49a8a5289d31e0982652f7058a189bfca/direnvrc" "sha256-d+8cBpDfDBj41inrADaJt+bDWhOktwslgoP5YiGJ1v0="
 use devenv
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,9 @@
 # Devenv
 .devenv*
 devenv.local.nix
 # direnv
 .direnv
 # pre-commit
 .pre-commit-config.yaml
--- a/devenv.lock
+++ b/devenv.lock
@ -0,0 +1,122 @@
 {
  "nodes": {
    "devenv": {
      "locked": {
        "dir": "src/modules",
        "lastModified": 1725964132,
        "owner": "cachix",
        "repo": "devenv",
        "rev": "98c7c131e3fa30eb00e9bfe44c1a180c7f94102f",
        "treeHash": "145543926bdaaeabf22cd0f42e2991d0e35131c4",
        "type": "github"
      },
      "original": {
        "dir": "src/modules",
        "owner": "cachix",
        "repo": "devenv",
        "type": "github"
      }
    },
    "flake-compat": {
      "flake": false,
      "locked": {
        "lastModified": 1696426674,
        "owner": "edolstra",
        "repo": "flake-compat",
        "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33",
        "treeHash": "2addb7b71a20a25ea74feeaf5c2f6a6b30898ecb",
        "type": "github"
      },
      "original": {
        "owner": "edolstra",
        "repo": "flake-compat",
        "type": "github"
      }
    },
    "gitignore": {
      "inputs": {
        "nixpkgs": [
          "pre-commit-hooks",
          "nixpkgs"
        ]
      },
      "locked": {
        "lastModified": 1709087332,
        "owner": "hercules-ci",
        "repo": "gitignore.nix",
        "rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
        "treeHash": "ca14199cabdfe1a06a7b1654c76ed49100a689f9",
        "type": "github"
      },
      "original": {
        "owner": "hercules-ci",
        "repo": "gitignore.nix",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1716977621,
        "owner": "cachix",
        "repo": "devenv-nixpkgs",
        "rev": "4267e705586473d3e5c8d50299e71503f16a6fb6",
        "treeHash": "6d9f1f7ca0faf1bc2eeb397c78a49623260d3412",
        "type": "github"
      },
      "original": {
        "owner": "cachix",
        "ref": "rolling",
        "repo": "devenv-nixpkgs",
        "type": "github"
      }
    },
    "nixpkgs-stable": {
      "locked": {
        "lastModified": 1725826545,
        "owner": "NixOS",
        "repo": "nixpkgs",
        "rev": "f4c846aee8e1e29062aa8514d5e0ab270f4ec2f9",
        "treeHash": "8fc49deaed3f2728a7147c38163cc468a117570a",
        "type": "github"
      },
      "original": {
        "owner": "NixOS",
        "ref": "nixos-24.05",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "pre-commit-hooks": {
      "inputs": {
        "flake-compat": "flake-compat",
        "gitignore": "gitignore",
        "nixpkgs": [
          "nixpkgs"
        ],
        "nixpkgs-stable": "nixpkgs-stable"
      },
      "locked": {
        "lastModified": 1725513492,
        "owner": "cachix",
        "repo": "pre-commit-hooks.nix",
        "rev": "7570de7b9b504cfe92025dd1be797bf546f66528",
        "treeHash": "4b46d77870afecd8f642541cb4f4927326343b59",
        "type": "github"
      },
      "original": {
        "owner": "cachix",
        "repo": "pre-commit-hooks.nix",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "devenv": "devenv",
        "nixpkgs": "nixpkgs",
        "pre-commit-hooks": "pre-commit-hooks"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/devenv.nix
+++ b/devenv.nix
@ -0,0 +1,64 @@
 { pkgs, lib, config, inputs, ... }:
 {
  # https://devenv.sh/basics/
  env.GREET = "devenv";
  env.PYTHON_KEYRING_BACKEND= "keyring.backends.fail.Keyring";
  env.CUDA_HOME = pkgs.cudaPackages.cudatoolkit;
  env.CUDA_PATH = pkgs.cudaPackages.cudatoolkit;
  env.LD_LIBRARY_PATH = "/run/opengl-driver/lib:$LD_LIBRARY_PATH";
  env.CUDA_VISIBLE_DEVICES="0";
  env.PYTORCH_CUDA_ALLOC_CONF="max_split_size_mb:256";
  env.EXTRA_LDFLAGS="-L/lib -L${pkgs.linuxPackages.nvidia_x11}/lib";
 	env.EXTRA_CCFLAGS="-I/usr/include";
  # env.LD_LIBRARY_PATH = ":${pkgs.cudatoolkit}/lib:${pkgs.cudatoolkit}/lib64:${pkgs.cudatoolkit}/host-linux-x64/Mesa";
  # https://devenv.sh/packages/
  packages = with pkgs;[ 
    git 
    python311Packages.python-lsp-server
    cudaPackages.cudatoolkit
    # gitRepo gnupg autoconf curl
    # procps gnumake utillinux m4 gperf unzip
    # linuxPackages.nvidia_x11
    # libGLU
    # xorg.libXi xorg.libXmu freeglut
    # xorg.libXext xorg.libX11 xorg.libXv xorg.libXrandr zlib 
    # ncurses5 stdenv.cc binutils
    # cudaPackages.libcublas
    # cudaPackages.cudnn
  ];
  # https://devenv.sh/scripts/
  scripts.hello.exec = "echo hello from $GREET";
  enterShell = ''
    hello
    git --version
  '';
  # https://devenv.sh/tests/
  enterTest = ''
    echo "Running tests"
    git --version | grep "2.42.0"
  '';
  # https://devenv.sh/services/
  # services.postgres.enable = true;
  # https://devenv.sh/languages/
  # languages.nix.enable = true;
  languages.python = {
    enable = true;
    venv.enable = true;
  };
  # https://devenv.sh/pre-commit-hooks/
  # pre-commit.hooks.shellcheck.enable = true;
  # https://devenv.sh/processes/
  # processes.ping.exec = "ping example.com";
  # See full reference at https://devenv.sh/reference/options/
 }
--- a/devenv.yaml
+++ b/devenv.yaml
@ -0,0 +1,15 @@
 # yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
 inputs:
  nixpkgs:
    url: github:cachix/devenv-nixpkgs/rolling
 # If you're using non-OSS software, you can set allowUnfree to true.
 allowUnfree: true
 # If you're willing to use a package that's vulnerable
 # permittedInsecurePackages:
 #  - "openssl-1.1.1w"
 # If you have more than one devenv you can merge them
 #imports:
 # - ./backend
--- a/pkg/main.py
+++ b/pkg/main.py
@ -0,0 +1,78 @@
 import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModel
 import time
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from typing import List
 import uvicorn
 import gc
 import os
 app = FastAPI()
 os.environ['CUDA_VISIBLE_DEVICES'] = '0' 
 os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
 # Load model (do this only once at startup)
 print("Loading model...")
 model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True, torch_dtype="auto")
 model.eval()
 # model.half()
 device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
 print(device.type)
 model.to(device)
 if device.type == 'cuda':
    # gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.memory.empty_cache()
    torch.cuda.max_memory_allocated()
 print(f"Model loaded and moved to {device}")
 # Define request and response models
 class EmbeddingRequest(BaseModel):
    texts: List[str]
    is_query: bool
 class EmbeddingResponse(BaseModel):
    embeddings: List[List[float]]
    time_taken: float
 # Each query needs to be accompanied by an corresponding instruction describing the task.
 task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
 query_prefix = "Instruct: " + task_name_to_instruct["example"] + "\nQuery: "
 max_length = 32768
@app.post("/embed", response_model=EmbeddingResponse)
 async def embed_texts(request: EmbeddingRequest):
    start_time = time.time()
    try:
        with torch.no_grad():
            if device.type == 'cuda':
                # gc.collect()
                torch.cuda.empty_cache()
                torch.cuda.memory.empty_cache()
            if request.is_query:
                texts = [query_prefix + text for text in request.texts]
            else:
                texts = request.texts
            embeddings = model.encode(texts, max_length=max_length)
            embeddings = F.normalize(embeddings, p=2, dim=1)
            print(f"Embedding size: {embeddings.size()}")
            embeddings_list = embeddings.cpu().float().tolist()
        time_taken = time.time() - start_time
        return EmbeddingResponse(embeddings=embeddings_list, time_taken=time_taken)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/pkg/run.sh
+++ b/pkg/run.sh
@ -0,0 +1 @@
 poetry run python -m uvicorn prosody.server:app --host 0.0.0.0 --reload
--- a/pkg/test.py
+++ b/pkg/test.py
@ -0,0 +1,44 @@
 import torch
 import torch.nn.functional as F
 from transformers import AutoTokenizer, AutoModel
 # Each query needs to be accompanied by an corresponding instruction describing the task.
 task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
 query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
 queries = [
    'are judo throws allowed in wrestling?', 
    'how to become a radiology technician in michigan?'
    ]
 # No instruction needed for retrieval passages
 passage_prefix = ""
 passages = [
    "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
    "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
 ]
 # load model with tokenizer
 model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True)
 device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
 print(device.type)
 # get the embeddings
 max_length = 32768
 query_embeddings = model.encode(queries, instruction=query_prefix, max_length=max_length)
 passage_embeddings = model.encode(passages, instruction=passage_prefix, max_length=max_length)
 # normalize embeddings
 query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
 passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
 # get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
 # batch_size=2
 # query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=query_prefix, max_length=max_length, num_workers=32, return_numpy=True)
 # passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction=passage_prefix, max_length=max_length, num_workers=32, return_numpy=True)
 scores = (query_embeddings @ passage_embeddings.T) * 100
 print(scores.tolist())
 # [[87.42693328857422, 0.46283677220344543], [0.965264618396759, 86.03721618652344]]
--- a/pkg/tunnel.sh
+++ b/pkg/tunnel.sh
@ -0,0 +1 @@
 autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -R 8000:localhost:8000 y@sortug
--- a/test.sh
+++ b/test.sh
@ -0,0 +1,6 @@
 curl -X POST "http://localhost:8000/embed" \
     -H "Content-Type: application/json" \
     -d '{
       "texts": ["What is the capital of France?", "Who wrote Romeo and Juliet?"],
       "is_query": true
     }'
		`@ -0,0 +1,3 @@`
							`source_url "https://raw.githubusercontent.com/cachix/devenv/95f329d49a8a5289d31e0982652f7058a189bfca/direnvrc" "sha256-d+8cBpDfDBj41inrADaJt+bDWhOktwslgoP5YiGJ1v0="`

							`use devenv`
		`@ -0,0 +1 @@`
							`poetry run python -m uvicorn prosody.server:app --host 0.0.0.0 --reload`
		`@ -0,0 +1 @@`
							`autossh -M 0 -o "ServerAliveInterval 30" -o "ServerAliveCountMax 3" -R 8000:localhost:8000 y@sortug`