Spaces:

gperdrizet
/

resumate

Configuration error

gperdrizet commited on Jul 14

Commit

a5548e7

verified ·

1 Parent(s): 2db495f

Reorganized inference endpoints

Files changed (3) hide show

inference_endpoints/{deepseekR1-qwen-32B.py → modal/deepseekR1-qwen-32B.py} RENAMED Viewed

File without changes

inference_endpoints/{llama3-1-8B-instruct.py → modal/llama3-1-8B-instruct.py} RENAMED Viewed

File without changes

inference_endpoints/{qwen2-5-coder-14B-instruct.py → modal/qwen2-5-coder-14B-instruct.py} RENAMED Viewed

@@ -63,12 +63,11 @@ def serve():
         "--uvicorn-log-level=info",
         MODEL_NAME,
         "--served-model-name", MODEL_NAME,
-        "--tensor-parallel-size", "2",
-        "--max-model-len", "16000",
         "--host", "0.0.0.0",
         "--port", str(VLLM_PORT),
         "--api-key",os.environ["MODAL_TOKEN_SECRET"],
-        "--enforce-eager"
     ]
     subprocess.Popen(" ".join(cmd), shell=True)

         "--uvicorn-log-level=info",
         MODEL_NAME,
         "--served-model-name", MODEL_NAME,
+        "--max-model-len", "32000",
         "--host", "0.0.0.0",
         "--port", str(VLLM_PORT),
         "--api-key",os.environ["MODAL_TOKEN_SECRET"],
+        "--enforce-eager" # Don't compile CUDA graph, saves memory and cold start time
     ]
     subprocess.Popen(" ".join(cmd), shell=True)