Correct name of MixtralBlockSparseTop2MLP (L -> l) (#1667)
Browse files
src/axolotl/monkeypatch/mixtral/__init__.py
CHANGED
|
@@ -42,9 +42,9 @@ def patch_mixtral_moe_forward_zero3() -> None:
|
|
| 42 |
return final_hidden_states, router_logits
|
| 43 |
|
| 44 |
from transformers.models.mixtral.modeling_mixtral import (
|
| 45 |
-
|
| 46 |
MixtralSparseMoeBlock,
|
| 47 |
)
|
| 48 |
|
| 49 |
-
|
| 50 |
MixtralSparseMoeBlock.forward = moe_forward
|
|
|
|
| 42 |
return final_hidden_states, router_logits
|
| 43 |
|
| 44 |
from transformers.models.mixtral.modeling_mixtral import (
|
| 45 |
+
MixtralBlockSparseTop2MLP,
|
| 46 |
MixtralSparseMoeBlock,
|
| 47 |
)
|
| 48 |
|
| 49 |
+
MixtralBlockSparseTop2MLP.forward = mlp_forward
|
| 50 |
MixtralSparseMoeBlock.forward = moe_forward
|