|
--- |
|
license: apache-2.0 |
|
language: |
|
- en |
|
- ru |
|
tags: |
|
- moe |
|
--- |
|
``` |
|
base_model: Qwen/Qwen2.5-1.5B-Instruct |
|
gate_mode: random |
|
architecture: qwen |
|
experts_per_token: 3 |
|
dtype: bfloat16 |
|
experts: |
|
- source_model: Qwen/Qwen2.5-1.5B-Instruct |
|
- source_model: Qwen/Qwen2.5-Coder-1.5B-Instruct |
|
- source_model: Qwen/Qwen2.5-Math-1.5B-Instruct |
|
- source_model: huihui-ai/Qwen2.5-1.5B-Instruct-abliterated |
|
- source_model: Rombo-Org/Rombo-LLM-V2.5-Qwen-1.5b |
|
- source_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B |
|
- source_model: Vikhrmodels/Vikhr-Qwen-2.5-1.5B-Instruct |
|
- source_model: RefalMachine/RuadaptQwen2.5-1.5B-instruct |
|
shared_experts: |
|
- source_model: Qwen/Qwen2.5-1.5B-Instruct |
|
positive_prompts: [""] |
|
residual_scale: 0.1 |
|
``` |