Pinkstack's picture
Upload merged MoE model from merged_model_moe
2abd53f verified
raw
history blame
1.44 kB
base_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat
gate_mode: hidden # Common gating mechanism using hidden states. Alternatives: 'cheap_embed', 'random'
dtype: float16 # Use float16 to save memory/disk space, common for inference
experts:
- source_model: Pinkstack/llama-3.2-superthoughtslite-expert-chat
positive_prompts:
- "General use"
- "Conversational"
- "Question answering"
- "Multilingual"
- "Translation"
- "Roleplay"
- source_model: Pinkstack/llama-3.2-superthoughts-expert-math
positive_prompts:
- "Mathematical"
- "Algebra"
- "Shape understanding"
- "counting problem"
- "Explain math"
- "placing objects"
- source_model: Pinkstack/llama-3.2-superthoughtslite-expert-medical
positive_prompts:
- "Medical"
- "Biology"
- "Science"
- "Sickness"
- "Illness"
- "emotional reasoning" # Note: Might overlap slightly with general chat, use prompts carefully
- source_model: Pinkstack/llama-3.2-superthoughts-lite-expert-code
positive_prompts:
- "Code generation"
- "Debugging"
- "Finish code"
- "Explain code"
- "Refine code"
- "Coding assistance"
# --- MoE Specific Parameters ---
# num_experts_per_tok: How many experts to activate per token during inference.
# Common values are 1 or 2. Using 2 often provides better quality.
num_experts_per_tok: 2