New-Dawn-Llama-3-70B-32K-v1.0 / mergekit_config.yml
sophosympatheia's picture
1a6ba6bdce9508d37e43ef8d8c209bf9e65e418ab26da5d6af23752a504e44db
cf182fe verified
raw
history blame
4.45 kB
name: new-dawn-llama3-70b-v0.13.2
models:
- model: bosonai/Higgs-Llama-3-70B
- model: turboderp/Cat-Llama-3-70B-instruct
merge_method: slerp
base_model: bosonai/Higgs-Llama-3-70B
parameters:
t:
- value: 0.2
dtype: float16
---
name: new-dawn-llama3-70b-v0.14
models:
- model: bosonai/Higgs-Llama-3-70B
- model: abacusai/Smaug-Llama-3-70B-Instruct
merge_method: slerp
base_model: bosonai/Higgs-Llama-3-70B
parameters:
t:
- value: 0.5
dtype: float16
---
name: new-dawn-llama3-70b-v0.15
models:
- model: new-dawn-llama3-70b-v0.13.2
- model: new-dawn-llama3-70b-v0.14
merge_method: slerp
base_model: new-dawn-llama3-70b-v0.13.2
parameters:
t:
- value: 0.5
dtype: float16
---
name: new-dawn-llama3-70b-v0.16
models:
- model: Sao10K/L3-70B-Euryale-v2.1
- model: new-dawn-llama3-70b-v0.15
merge_method: slerp
base_model: new-dawn-llama3-70b-v0.15
parameters:
t:
- value: 0.4
dtype: float16
---
# See https://huggingface.co/jukofyork/Dark-Miqu-70B/discussions/3
# Credit for merge recipe belongs to jukofyork
name: new-dawn-llama3-70b-v0.16-32K
merge_method: linear
models:
- model: abacusai/Smaug-Llama-3-70B-Instruct-32K
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: up_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- value: 1
- model: new-dawn-llama3-70b-v0.16
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: up_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- value: 0
base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
tokenizer_source: base
dtype: float16
---
name: _1-Smaug-bonsai-slerp
models:
- model: abacusai/Smaug-Llama-3-70B-Instruct-32K
- model: bosonai/Higgs-Llama-3-70B
merge_method: slerp
base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
parameters:
t:
- value: 0.6
dtype: float16
---
name: _2-Smaug-euryale-slerp
models:
- model: abacusai/Smaug-Llama-3-70B-Instruct-32K
- model: Sao10K/L3-70B-Euryale-v2.1
merge_method: slerp
base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
parameters:
t:
- value: 0.65
dtype: float16
---
name: _3-Smaug-bonsai_Smaug-euryale-slerp
models:
- model: _1-Smaug-bonsai-slerp
- model: _2-Smaug-euryale-slerp
merge_method: slerp
base_model: _1-Smaug-bonsai-slerp
parameters:
t:
- value: 0.5
dtype: float16
---
# See https://huggingface.co/jukofyork/Dark-Miqu-70B/discussions/3
# Credit for merge recipe belongs to jukofyork
name: new-dawn-llama3-70b-v0.18-32K
merge_method: linear
models:
- model: abacusai/Smaug-Llama-3-70B-Instruct-32K
parameters:
weight:
- filter: v_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: o_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: up_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: gate_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- filter: down_proj
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
- value: 1
- model: _3-Smaug-bonsair_Smaug-euryale-slerp
parameters:
weight:
- filter: v_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: o_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: up_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: gate_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- filter: down_proj
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
- value: 0
base_model: abacusai/Smaug-Llama-3-70B-Instruct-32K
tokenizer_source: base
dtype: float16
---
name: new-dawn-llama3-70b-32K-v1.0
models:
- model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.16-32K
- model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.18-32K
merge_method: slerp
base_model: /home/llm/mergequant/models/new-dawn-llama3-70b-v0.16-32K
parameters:
t:
- value: 0.5
dtype: float16