Tim Luka Horstmann commited on
Commit
46825d7
·
1 Parent(s): ba32cef

Smaller model and paremeters changed

Browse files
Files changed (1) hide show
  1. llm_server.py +4 -4
llm_server.py CHANGED
@@ -25,8 +25,8 @@ if not hf_token:
25
  login(token=hf_token)
26
 
27
  # Models Configuration
28
- repo_id = "unsloth/Qwen3-1.7B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
29
- filename = "Qwen3-1.7B-Q4_K_M.gguf" # "deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf"
30
 
31
 
32
  try:
@@ -40,9 +40,9 @@ try:
40
  )
41
  llm = Llama(
42
  model_path=model_path,
43
- n_ctx=3072,
44
  n_threads=2,
45
- n_batch=64,
46
  n_gpu_layers=0,
47
  use_mlock=True,
48
  f16_kv=True,
 
25
  login(token=hf_token)
26
 
27
  # Models Configuration
28
+ repo_id = "unsloth/Qwen3-0.6B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-3B-GGUF" # "bartowski/deepcogito_cogito-v1-preview-llama-8B-GGUF"
29
+ filename = "Qwen3-0.6B-IQ4_XS.gguf" # "deepcogito_cogito-v1-preview-llama-3B-Q4_K_M.gguf"
30
 
31
 
32
  try:
 
40
  )
41
  llm = Llama(
42
  model_path=model_path,
43
+ n_ctx=1024,
44
  n_threads=2,
45
+ n_batch=16,
46
  n_gpu_layers=0,
47
  use_mlock=True,
48
  f16_kv=True,