sparsity_stage: sparsity_modifiers: SparseGPTModifier: sparsity: 0.5 mask_structure: '2:4' targets: [Linear] ignore: ['re:.*lm_head'] quantization_stage: quantization_modifiers: QuantizationModifier: targets: [Linear] ignore: [lm_head] scheme: FP8_DYNAMIC