ehristoforu
/

newmoe-medium

Model card Files Files and versions

newmoe-medium / mergekit_moe_config.yml

ehristoforu's picture

Upload folder using huggingface_hub

f6820e9 verified about 1 year ago

history blame contribute delete

1.09 kB


	base_model: ehristoforu/Qwen2-1.5b-it-chat
	architecture: qwen
	gate_mode: hidden
	dtype: bfloat16
	experts:
	- source_model: ehristoforu/Qwen2-1.5b-it-chat
	positive_prompts: ["chat", "assistant", "chat history", "chat context", "writing", "text writing", "editing", "text editing", "multilingual"]
	- source_model: ehristoforu/Qwen2-1.5b-it-bioinstruct
	positive_prompts: ["bio", "science", "biology", "natural sciences", "scientist"]
	- source_model: ehristoforu/Qwen2-1.5b-it-codealpaca
	positive_prompts: ["code", "coding", "coder", "programming", "programmer", "code analysis", "code review", "code fix", "code improvement"]
	- source_model: ehristoforu/Qwen2-1.5b-it-math
	positive_prompts: ["math", "mathematician", "problem solving", "calculating", "logics"]
	shared_experts:
	- source_model: ehristoforu/Qwen2-1.5b-it-chat
	positive_prompts: # required by Qwen MoE for "hidden" gate mode, otherwise not allowed
	- "chat assistant"
	# (optional, but recommended:)
	residual_scale: 0.1 # downweight output from shared expert to prevent overcooking the model