base_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat | |
gate_mode: hidden # Common gating mechanism using hidden states. Alternatives: 'cheap_embed', 'random' | |
dtype: float16 # Use float16 to save memory/disk space, common for inference | |
experts: | |
- source_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat | |
positive_prompts: | |
- "General use" | |
- "Conversational" | |
- "Question answering" | |
- "Multilingual" | |
- "Translation" | |
- "Roleplay" | |
- source_model: Pinkstack/llama-3.2-superthoughts-expert-math | |
positive_prompts: | |
- "Mathematical" | |
- "Algebra" | |
- "Shape understanding" | |
- "counting problem" | |
- "Explain math" | |
- "placing objects" | |
- source_model: Pinkstack/llama-3.2-superthoughtslite-expert-medical | |
positive_prompts: | |
- "Medical" | |
- "Biology" | |
- "Science" | |
- "Sickness" | |
- "Illness" | |
- "emotional reasoning" # Note: Might overlap slightly with general chat, use prompts carefully | |
- source_model: Pinkstack/llama-3.2-superthoughts-lite-expert-code | |
positive_prompts: | |
- "Code generation" | |
- "Debugging" | |
- "Finish code" | |
- "Explain code" | |
- "Refine code" | |
- "Coding assistance" | |
# --- MoE Specific Parameters --- | |
# num_experts_per_tok: How many experts to activate per token during inference. | |
# Common values are 1 or 2. Using 2 often provides better quality. | |
num_experts_per_tok: 2 |