base_model: ehristoforu/Qwen2-1.5b-it-chat architecture: qwen gate_mode: hidden dtype: bfloat16 experts: - source_model: ehristoforu/Qwen2-1.5b-it-chat positive_prompts: ["chat", "assistant", "chat history", "chat context", "writing", "text writing", "editing", "text editing", "multilingual"] - source_model: ehristoforu/Qwen2-1.5b-it-bioinstruct positive_prompts: ["bio", "science", "biology", "natural sciences", "scientist"] - source_model: ehristoforu/Qwen2-1.5b-it-codealpaca positive_prompts: ["code", "coding", "coder", "programming", "programmer", "code analysis", "code review", "code fix", "code improvement"] - source_model: ehristoforu/Qwen2-1.5b-it-math positive_prompts: ["math", "mathematician", "problem solving", "calculating", "logics"] shared_experts: - source_model: ehristoforu/Qwen2-1.5b-it-chat positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed - "chat assistant" # (optional, but recommended:) residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model