models: | |
- model: TareksLab/L2-MERGE4 | |
parameters: | |
weight: | |
- filter: self_attn | |
value: [0.3, 0.1, 0.2] | |
- filter: mlp | |
value: [0.4, 0.2, 0.1] | |
- value: 0.2 | |
density: 0.7 | |
lambda: 1.05 | |
- model: TareksLab/L2-MERGE2a | |
parameters: | |
weight: | |
- filter: self_attn | |
value: [0.2, 0.1, 0.3] | |
- filter: mlp | |
value: [0.3, 0.1, 0.2] | |
- value: 0.2 | |
density: 0.65 | |
lambda: 1.05 | |
- model: TareksLab/L2-MERGE3 | |
parameters: | |
weight: | |
- filter: self_attn | |
value: [0.1, 0.3, 0.1] | |
- filter: mlp | |
value: [0.2, 0.3, 0.1] | |
- value: 0.2 | |
density: 0.6 | |
lambda: 1.05 | |
- model: TareksLab/L2-MERGE1 | |
parameters: | |
weight: | |
- filter: self_attn | |
value: [0.2, 0.2, 0.1] | |
- filter: mlp | |
value: [0.1, 0.2, 0.2] | |
- value: 0.2 | |
density: 0.6 | |
lambda: 1 | |
- model: TareksLab/L-BASE-V1 | |
parameters: | |
weight: | |
- filter: self_attn | |
value: [0.1, 0.3, 0.3] | |
- filter: mlp | |
value: [0.1, 0.2, 0.4] | |
- value: 0.2 | |
density: 0.55 | |
lambda: 1 | |
base_model: TareksLab/L-BASE-V1 | |
merge_method: dare_ties | |
parameters: | |
normalize: false | |
pad_to_multiple_of: 4 | |
tokenizer: | |
source: base | |
chat_template: llama3 | |
dtype: bfloat16 |