models: - model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B - model: unreleased-novatempus-70b-v0.1.1 merge_method: slerp base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B parameters: t: - filter: self_attn value: [0.2, 0.25, 0.3, 0.25, 0.2] - filter: "q_proj|k_proj|v_proj" value: [0.2, 0.25, 0.3, 0.25, 0.2] - filter: "up_proj|down_proj" value: [0.2, 0.3, 0.4, 0.3, 0.2] - filter: mlp value: [0.25, 0.35, 0.55, 0.35, 0.25] - value: 0.45 # default for other components dtype: bfloat16 tokenizer: source: deepseek-ai/DeepSeek-R1-Distill-Llama-70B #necessary to fix tokenizer