Tim Luka Horstmann
commited on
Commit
·
46825d7
1
Parent(s):
ba32cef
Smaller model and paremeters changed
Browse files- llm_server.py +4 -4
llm_server.py
CHANGED
|
@@ -25,8 +25,8 @@ if not hf_token:
|
|
| 25 |
login(token=hf_token)
|
| 26 |
|
| 27 |
# Models Configuration
|
| 28 |
-
repo_id = "unsloth/Qwen3-
|
| 29 |
-
filename = "Qwen3-
|
| 30 |
|
| 31 |
|
| 32 |
try:
|
|
@@ -40,9 +40,9 @@ try:
|
|
| 40 |
)
|
| 41 |
llm = Llama(
|
| 42 |
model_path=model_path,
|
| 43 |
-
n_ctx=
|
| 44 |
n_threads=2,
|
| 45 |
-
n_batch=
|
| 46 |
n_gpu_layers=0,
|
| 47 |
use_mlock=True,
|
| 48 |
f16_kv=True,
|
|
|
|
| 25 |
login(token=hf_token)
|
| 26 |
|
| 27 |
# Models Configuration
|
| 28 |
+
repo_id = "unsloth/Qwen3-0.6B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
|
| 29 |
+
filename = "Qwen3-0.6B-IQ4_XS.gguf" # "deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf"
|
| 30 |
|
| 31 |
|
| 32 |
try:
|
|
|
|
| 40 |
)
|
| 41 |
llm = Llama(
|
| 42 |
model_path=model_path,
|
| 43 |
+
n_ctx=1024,
|
| 44 |
n_threads=2,
|
| 45 |
+
n_batch=16,
|
| 46 |
n_gpu_layers=0,
|
| 47 |
use_mlock=True,
|
| 48 |
f16_kv=True,
|