Spaces:

Dovakiins
/

qwerrwe

Build error

Nanobit commited on Jan 9, 2024

Commit

c3e8165

unverified ·

1 Parent(s): 7f38175

fix: torch_dtype mistral default to fp32 (#1050)

Files changed (1) hide show

src/axolotl/utils/models.py CHANGED Viewed

@@ -599,7 +599,10 @@ def load_model(
     # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
     # convert them back to fp16/bf16 for flash-attn compatibility.
-    if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model):
         LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
         for name, module in model.named_modules():
             if "norm" in name:

     # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
     # convert them back to fp16/bf16 for flash-attn compatibility.
+    if needs_fa2_dtype or (
+        cfg.flash_attention
+        and (cfg.is_llama_derived_model or cfg.is_mistral_derived_model)
+    ):
         LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
         for name, module in model.named_modules():
             if "norm" in name: