dacorvo HF staff commited on
Commit
84982b8
·
verified ·
1 Parent(s): 0a5ad93

Update inference-cache-config/qwen2.5-large.json

Browse files
inference-cache-config/qwen2.5-large.json CHANGED
@@ -1,5 +1,19 @@
1
  {
2
- "Qwen/Qwen2.5-32B-Instruct": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "batch_size": 1,
5
  "sequence_length": 4096,
 
1
  {
2
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": [
3
+ {
4
+ "batch_size": 1,
5
+ "sequence_length": 4096,
6
+ "num_cores": 8,
7
+ "auto_cast_type": "bf16"
8
+ },
9
+ {
10
+ "batch_size": 8,
11
+ "sequence_length": 4096,
12
+ "num_cores": 8,
13
+ "auto_cast_type": "bf16"
14
+ }
15
+ ],
16
+ "Qwen/Qwen2.5-32B-Instruct": [
17
  {
18
  "batch_size": 1,
19
  "sequence_length": 4096,