Update README: set max_model_len to 8192 for optimal performance
Browse files
README.md
CHANGED
@@ -104,7 +104,7 @@ llm = LLM(
|
|
104 |
tensor_parallel_size=2, # Adjust based on your GPU setup
|
105 |
dtype="bfloat16",
|
106 |
gpu_memory_utilization=0.95, # Use 95% of GPU memory
|
107 |
-
max_model_len=
|
108 |
)
|
109 |
|
110 |
# Configure sampling
|
|
|
104 |
tensor_parallel_size=2, # Adjust based on your GPU setup
|
105 |
dtype="bfloat16",
|
106 |
gpu_memory_utilization=0.95, # Use 95% of GPU memory
|
107 |
+
max_model_len=8192 # Large context length for extended conversations
|
108 |
)
|
109 |
|
110 |
# Configure sampling
|