wenhuach commited on
Commit
62df7c3
·
1 Parent(s): fe1db26

change to autoround format

Browse files

Signed-off-by: wenhuach <[email protected]>

config.json CHANGED
@@ -26,11 +26,11 @@
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.2.dev",
 
29
  "batch_size": 8,
30
  "bits": 4,
31
- "damp_percent": 0.01,
32
  "data_type": "int",
33
- "desc_act": false,
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
@@ -41,7 +41,7 @@
41
  "lr": 0.001,
42
  "minmax_lr": 0.001,
43
  "nsamples": 512,
44
- "quant_method": "gptq",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
@@ -80,8 +80,7 @@
80
  "model.layers.30",
81
  "model.layers.31"
82
  ]
83
- ],
84
- "true_sequential": false
85
  },
86
  "rms_norm_eps": 1e-05,
87
  "rope_scaling": {
 
26
  "quantization_config": {
27
  "amp": true,
28
  "autoround_version": "0.4.2.dev",
29
+ "backend": "auto_round:gptq:exllamav2",
30
  "batch_size": 8,
31
  "bits": 4,
 
32
  "data_type": "int",
33
+ "dataset": "NeelNanda/pile-10k",
34
  "enable_minmax_tuning": true,
35
  "enable_norm_bias_tuning": false,
36
  "enable_quanted_input": true,
 
41
  "lr": 0.001,
42
  "minmax_lr": 0.001,
43
  "nsamples": 512,
44
+ "quant_method": "intel/auto-round",
45
  "scale_dtype": "torch.float16",
46
  "seqlen": 2048,
47
  "sym": true,
 
80
  "model.layers.30",
81
  "model.layers.31"
82
  ]
83
+ ]
 
84
  },
85
  "rms_norm_eps": 1e-05,
86
  "rope_scaling": {
quantize_config.json → quantization_config.json RENAMED
@@ -52,9 +52,8 @@
52
  ]
53
  ],
54
  "enable_norm_bias_tuning": false,
 
55
  "autoround_version": "0.4.2.dev",
56
- "quant_method": "gptq",
57
- "desc_act": false,
58
- "true_sequential": false,
59
- "damp_percent": 0.01
60
  }
 
52
  ]
53
  ],
54
  "enable_norm_bias_tuning": false,
55
+ "dataset": "NeelNanda/pile-10k",
56
  "autoround_version": "0.4.2.dev",
57
+ "quant_method": "intel/auto-round",
58
+ "backend": "auto_round:gptq:exllamav2"
 
 
59
  }