JunHowie commited on
Commit
76f6dd3
·
verified ·
1 Parent(s): e20c7d1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -2
README.md CHANGED
@@ -21,8 +21,23 @@ base_model_relation: quantized
21
  ```
22
  CONTEXT_LENGTH=32768
23
 
24
- vllm serve QuantTrio/GLM-4.5-Air-GPTQ-Int4-Int8Mix --served-model-name GLM-4.5-Air-GPTQ-Int4-Int8Mix --enable-expert-parallel --swap-space 16 --max-num-seqs 512 --max-model-len $CONTEXT_LENGTH --max-seq-len-to-capture $CONTEXT_LENGTH --gpu-memory-utilization 0.9 --tensor-parallel-size 8 --trust-remote-code --disable-log-requests --host 0.0.0.0 --port 8000
25
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  ### 【Dependencies】
28
  ```
 
21
  ```
22
  CONTEXT_LENGTH=32768
23
 
24
+ CONTEXT_LENGTH=32768
25
+
26
+ VLLM_USE_MODELSCOPE=true vllm serve \
27
+ QuantTrio/GLM-4.5-Air-GPTQ-Int4-Int8Mix \
28
+ --served-model-name GLM-4.5-Air-GPTQ-Int4-Int8Mix \
29
+ --enable-expert-parallel \
30
+ --swap-space 16 \
31
+ --max-num-seqs 512 \
32
+ --max-model-len $CONTEXT_LENGTH \
33
+ --max-seq-len-to-capture $CONTEXT_LENGTH \
34
+ --gpu-memory-utilization 0.9 \
35
+ --tensor-parallel-size 8 \
36
+ --trust-remote-code \
37
+ --disable-log-requests \
38
+ --host 0.0.0.0 \
39
+ --port 8000
40
+ ```
41
 
42
  ### 【Dependencies】
43
  ```