Granite4Tiny

Paused

TobDeBer commited on 21 days ago

Commit

a972051

verified ·

1 Parent(s): 3c2d17d

server on CUDA

Files changed (1) hide show

app.py CHANGED Viewed

@@ -41,14 +41,9 @@ except subprocess.CalledProcessError:
 except FileNotFoundError:
     platform = "CPU"
-print(f"Detected platform {platform}")
-# login to HF with space secret and download gguf and executable
-#hf_token = os.getenv("HF_TOKEN")  # Set this in your environment before running
-#if hf_token:
-#    login(token=hf_token)
-#else:
-#    raise ValueError("Hugging Face token not found. Please set HF_TOKEN environment variable.")
 gguf_name = "granite-4.0-tiny-preview-Q4_K_M.gguf"
 gguf_path = hf_hub_download(
@@ -64,11 +59,16 @@ exe_path = hf_hub_download(
             filename=exe_name,
             local_dir="."
 )
 # start llama-server
 subprocess.run(["chmod", "+x", exe_name])
 command = ["./" + exe_name, "-m", gguf_name, "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
-process = subprocess.Popen(command)
 print(f"Llama-server process started with PID {process.pid}")
 custom_theme = ResearchMonochrome()

 except FileNotFoundError:
     platform = "CPU"
+platform = "CUDA" # override for ZERO, because the GPU is not available at the time download decision is done
+print(f"Detected platform {platform}")
 gguf_name = "granite-4.0-tiny-preview-Q4_K_M.gguf"
 gguf_path = hf_hub_download(
             filename=exe_name,
             local_dir="."
 )
+server_env = os.environ.copy()
 # start llama-server
 subprocess.run(["chmod", "+x", exe_name])
 command = ["./" + exe_name, "-m", gguf_name, "--temp", "0.0", "-c", "2048", "-t", "8", "--port", "8081"]
+if platform == "CUDA":
+    # -ngl 999 tells the server to attempt to offload 999 layers (essentially all of them)
+    # to the GPU for execution. Without this, it runs on the CPU, even with the CUDA binary.
+    command.extend(["-ngl", "999"])
+process = subprocess.Popen(command, env=server_env)
 print(f"Llama-server process started with PID {process.pid}")
 custom_theme = ResearchMonochrome()