OPEA
/

Llama-3.1-Nemotron-70B-Instruct-HF-int4-sym-inc

4-bit precision

Model card Files Files and versions Community

weiweiz1 commited on 12 days ago

Commit

1135608

·

verified ·

1 Parent(s): 423db67

Update config.json

Files changed (1) hide show

config.json +1 -2

config.json CHANGED Viewed

@@ -26,7 +26,6 @@
   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.2.dev",
-    "backend": "auto_round:gptq:exllamav2",
     "batch_size": 8,
     "bits": 4,
     "data_type": "int",
@@ -41,7 +40,7 @@
     "lr": 0.005,
     "minmax_lr": 0.005,
     "nsamples": 128,
-    "quant_method": "intel/auto-round",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,

   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.2.dev",
     "batch_size": 8,
     "bits": 4,
     "data_type": "int",
     "lr": 0.005,
     "minmax_lr": 0.005,
     "nsamples": 128,
+    "quant_method": "auto-round",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,