models: | |
- model: allura-org/GLM4-9B-Neon-v2 | |
- model: THUDM/LongReward-glm4-9b-DPO | |
parameters: | |
weight: [0.496, 0.166, 0.166, 0.496, 0.496, 0.166, 0.166, 0.496] | |
base_model: allura-org/GLM4-9B-Neon-v2 | |
merge_method: sce | |
parameters: | |
select_topk: 0.06 | |
lambda: 0.66 | |
tokenizer_source: base | |
dtype: float32 | |
out_dtype: bfloat16 |