library_name: transformers

tags:

mergekit
merge

base_model:

Qwen/Qwen2.5-14B-Instruct
qingy2019/Qwen2.5-Math-14B-Instruct
Qwen/Qwen2.5-14B

language:

model-index:

name: Qwen2.5-Ultimate-14B-Instruct

results:
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: IFEval (0-Shot)
  
  type: HuggingFaceH4/ifeval
  
  args:
```
num_few_shot: 0
```
  metrics:
  - type: inst_level_strict_acc and prompt_level_strict_acc
    
    value: 39.38
    
    name: strict accuracy
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: BBH (3-Shot)
  
  type: BBH
  
  args:
```
num_few_shot: 3
```
  metrics:
  - type: acc_norm
    
    value: 40.58
    
    name: normalized accuracy
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: MATH Lvl 5 (4-Shot)
  
  type: hendrycks/competition_math
  
  args:
```
num_few_shot: 4
```
  metrics:
  - type: exact_match
    
    value: 28.02
    
    name: exact match
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: GPQA (0-shot)
  
  type: Idavidrein/gpqa
  
  args:
```
num_few_shot: 0
```
  metrics:
  - type: acc_norm
    
    value: 14.21
    
    name: acc_norm
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: MuSR (0-shot)
  
  type: TAUR-Lab/MuSR
  
  args:
```
num_few_shot: 0
```
  metrics:
  - type: acc_norm
    
    value: 9.89
    
    name: acc_norm
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard
- task:
  
  type: text-generation
  
  name: Text Generation
  
  dataset:
  
  name: MMLU-PRO (5-shot)
  
  type: TIGER-Lab/MMLU-Pro
  
  config: main
  
  split: test
  
  args:
```
num_few_shot: 5
```
  metrics:
  - type: acc
    
    value: 43.66
    
    name: accuracy
  source:
  
  url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=qingy2019/Qwen2.5-Ultimate-14B-Instruct
  
  name: Open LLM Leaderboard

Qwen2.5 Ultimate 14B Instruct

Merged using rombodawg's method and using the first iteration of my Qwen2.5 Math 14B Instruct.

Merge Details

Merge Method

This model was merged using the TIES merge method using Qwen/Qwen2.5-14B as a base.

Models Merged

The following models were included in the merge:

Configuration

The following YAML configuration was used to produce this model:


models:

  - model: qingy2019/Qwen2.5-Math-14B-Instruct

    parameters:

      weight: 1

      density: 1

  - model: Qwen/Qwen2.5-14B-Instruct

    parameters:

      weight: 1

      density: 1

merge_method: ties

base_model: Qwen/Qwen2.5-14B

parameters:

  weight: 1

  density: 1

  normalize: true

  int8_mask: true

tokenizer_source: qingy2019/Qwen2.5-Math-14B-Instruct

dtype: bfloat16

Open LLM Leaderboard Evaluation Results

Detailed results can be found here

| Metric |Value|

|-------------------|----:|

|Avg. |29.29|

|IFEval (0-Shot) |39.38|

|BBH (3-Shot) |40.58|

|MATH Lvl 5 (4-Shot)|28.02|

|GPQA (0-shot) |14.21|

|MuSR (0-shot) | 9.89|

|MMLU-PRO (5-shot) |43.66|