pentagoniac commited on
Commit
c99e12f
·
verified ·
1 Parent(s): c964c9e

Update README: set max_model_len to 8192 for optimal performance

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -104,7 +104,7 @@ llm = LLM(
104
  tensor_parallel_size=2, # Adjust based on your GPU setup
105
  dtype="bfloat16",
106
  gpu_memory_utilization=0.95, # Use 95% of GPU memory
107
- max_model_len=120000 # Large context length for extended conversations
108
  )
109
 
110
  # Configure sampling
 
104
  tensor_parallel_size=2, # Adjust based on your GPU setup
105
  dtype="bfloat16",
106
  gpu_memory_utilization=0.95, # Use 95% of GPU memory
107
+ max_model_len=8192 # Large context length for extended conversations
108
  )
109
 
110
  # Configure sampling