Spaces:
Running
Running
mehran
commited on
Commit
·
16ff4b4
1
Parent(s):
40447c8
add thinking columns
Browse files- leaderboard/__pycache__/leaderboard.cpython-310.pyc +0 -0
- leaderboard/boards_data/MMLU.jsonl +35 -35
- leaderboard/boards_data/all.jsonl +35 -35
- leaderboard/boards_data/extractive-qa_PQuAD.jsonl +35 -35
- leaderboard/boards_data/ifeval.jsonl +35 -35
- leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl +35 -35
- leaderboard/boards_data/mt_bench.jsonl +35 -35
- leaderboard/boards_data/ner_arman.jsonl +35 -35
- leaderboard/boards_data/nli_farstail.jsonl +35 -35
- leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl +35 -35
- leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl +35 -35
- leaderboard/boards_data/persian_csr.jsonl +35 -35
- leaderboard/boards_data/persian_nlg.jsonl +0 -0
- leaderboard/boards_data/persian_nlu.jsonl +0 -0
- leaderboard/boards_data/question-generation_PersianQA.jsonl +35 -35
- leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl +35 -35
- leaderboard/boards_data/sts_FarSICK.jsonl +35 -35
- leaderboard/boards_data/sts_SynPerSTS.jsonl +35 -35
- leaderboard/boards_data/summarization_PnSummary.jsonl +35 -35
- leaderboard/boards_data/summarization_SamSUM-fa.jsonl +35 -35
- leaderboard/boards_data/tone-classification_SynTone.jsonl +35 -35
- leaderboard/boards_data/topic-classification_sid.jsonl +35 -35
- leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl +35 -35
- leaderboard/boards_data/translation-en2fa_en2fa.jsonl +35 -35
- leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl +35 -35
- leaderboard/boards_data/translation-fa2en_fa2en.jsonl +35 -35
- leaderboard/leaderboard.py +20 -4
- leaderboard/leaderboard_config.yaml +36 -0
- leaderboard/refresh.py +28 -19
leaderboard/__pycache__/leaderboard.cpython-310.pyc
CHANGED
|
Binary files a/leaderboard/__pycache__/leaderboard.cpython-310.pyc and b/leaderboard/__pycache__/leaderboard.cpython-310.pyc differ
|
|
|
leaderboard/boards_data/MMLU.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8401114206,"cinema_acc":0.9375,"emergency_number_acc":0.8,"foods_acc":0.8,"games_acc":0.55,"herbal_drugs_acc":0.75,"places_acc":0.8857142857,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.9282051282,"Government_law_acc":0.9782608696,"proverbs_acc":0.9,"religous_acc":0.9777777778,"social_manners_acc":0.9438202247,"souvenirs_acc":0.78,"sports_acc":0.6507936508,"GPK_acc":0.8733798604,"SPK_acc":0.856476498,"UPK_acc":0.824103816}
|
| 2 |
-
{"Model Name":"o3","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8217442751,"cinema_acc":0.8125,"emergency_number_acc":1.0,"foods_acc":0.77,"games_acc":0.75,"herbal_drugs_acc":0.65,"places_acc":0.8952380952,"poetry_acc":0.875,"politicians_acc":1.0,"popular_people_acc":0.9179487179,"Government_law_acc":0.8913043478,"proverbs_acc":0.89,"religous_acc":0.9555555556,"social_manners_acc":0.8876404494,"souvenirs_acc":0.78,"sports_acc":0.5714285714,"GPK_acc":0.8454636092,"SPK_acc":0.833781603,"UPK_acc":0.8100680622}
|
| 3 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7274624374,"cinema_acc":0.8875,"emergency_number_acc":0.8,"foods_acc":0.82,"games_acc":0.4,"herbal_drugs_acc":0.75,"places_acc":0.8952380952,"poetry_acc":0.9,"politicians_acc":0.9,"popular_people_acc":0.8564102564,"Government_law_acc":0.9565217391,"proverbs_acc":0.87,"religous_acc":0.9555555556,"social_manners_acc":0.9101123596,"souvenirs_acc":0.72,"sports_acc":0.5873015873,"GPK_acc":0.8394815553,"SPK_acc":0.7645583229,"UPK_acc":0.686473306}
|
| 4 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7214996174,"cinema_acc":0.6875,"emergency_number_acc":0.7,"foods_acc":0.74,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.8,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.8913043478,"proverbs_acc":0.77,"religous_acc":0.9333333333,"social_manners_acc":0.9101123596,"souvenirs_acc":0.72,"sports_acc":0.6031746032,"GPK_acc":0.7936191426,"SPK_acc":0.7588245834,"UPK_acc":0.6854869755}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7040411769,"cinema_acc":0.7625,"emergency_number_acc":0.9,"foods_acc":0.78,"games_acc":0.7,"herbal_drugs_acc":0.625,"places_acc":0.8666666667,"poetry_acc":0.875,"politicians_acc":0.85,"popular_people_acc":0.8461538462,"Government_law_acc":0.8913043478,"proverbs_acc":0.86,"religous_acc":0.8888888889,"social_manners_acc":0.8651685393,"souvenirs_acc":0.68,"sports_acc":0.4761904762,"GPK_acc":0.8005982054,"SPK_acc":0.7258555814,"UPK_acc":0.6759912742}
|
| 6 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6957640676,"cinema_acc":0.725,"emergency_number_acc":0.6,"foods_acc":0.79,"games_acc":0.5,"herbal_drugs_acc":0.75,"places_acc":0.8380952381,"poetry_acc":0.825,"politicians_acc":0.75,"popular_people_acc":0.7846153846,"Government_law_acc":0.9565217391,"proverbs_acc":0.78,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.74,"sports_acc":0.4761904762,"GPK_acc":0.7756729811,"SPK_acc":0.7263931195,"UPK_acc":0.6635442063}
|
| 7 |
-
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6884607359,"cinema_acc":0.75,"emergency_number_acc":0.7,"foods_acc":0.78,"games_acc":0.6,"herbal_drugs_acc":0.7,"places_acc":0.8380952381,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.9347826087,"proverbs_acc":0.8,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.66,"sports_acc":0.5555555556,"GPK_acc":0.8015952144,"SPK_acc":0.720121842,"UPK_acc":0.6512254587}
|
| 8 |
-
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.6810513107,"cinema_acc":0.5125,"emergency_number_acc":0.5,"foods_acc":0.63,"games_acc":0.55,"herbal_drugs_acc":0.65,"places_acc":0.8666666667,"poetry_acc":0.55,"politicians_acc":0.8,"popular_people_acc":0.7435897436,"Government_law_acc":0.9347826087,"proverbs_acc":0.81,"religous_acc":0.9111111111,"social_manners_acc":0.8764044944,"souvenirs_acc":0.72,"sports_acc":0.5079365079,"GPK_acc":0.7288135593,"SPK_acc":0.7400865177,"UPK_acc":0.6328756576}
|
| 9 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6466578563,"cinema_acc":0.7125,"emergency_number_acc":0.6,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8666666667,"poetry_acc":0.8,"politicians_acc":0.8,"popular_people_acc":0.7743589744,"Government_law_acc":0.9347826087,"proverbs_acc":0.77,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.7686939182,"SPK_acc":0.6764020785,"UPK_acc":0.6096496856}
|
| 10 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6128538638,"cinema_acc":0.525,"emergency_number_acc":0.7,"foods_acc":0.73,"games_acc":0.55,"herbal_drugs_acc":0.625,"places_acc":0.8380952381,"poetry_acc":0.575,"politicians_acc":0.6,"popular_people_acc":0.7076923077,"Government_law_acc":0.847826087,"proverbs_acc":0.71,"religous_acc":0.6666666667,"social_manners_acc":0.8202247191,"souvenirs_acc":0.68,"sports_acc":0.4920634921,"GPK_acc":0.6949152542,"SPK_acc":0.6265902168,"UPK_acc":0.5924547671}
|
| 11 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.5980651448,"cinema_acc":0.6,"emergency_number_acc":0.5,"foods_acc":0.67,"games_acc":0.65,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.775,"politicians_acc":0.95,"popular_people_acc":0.8092783505,"Government_law_acc":0.8913043478,"proverbs_acc":0.78,"religous_acc":0.8666666667,"social_manners_acc":0.8988764045,"souvenirs_acc":0.68,"sports_acc":0.5396825397,"GPK_acc":0.7604790419,"SPK_acc":0.6417428725,"UPK_acc":0.5458980614}
|
| 12 |
-
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.5908047576,"cinema_acc":0.5875,"emergency_number_acc":0.4,"foods_acc":0.56,"games_acc":0.55,"herbal_drugs_acc":0.75,"places_acc":0.8285714286,"poetry_acc":0.75,"politicians_acc":0.7,"popular_people_acc":0.7794871795,"Government_law_acc":0.8695652174,"proverbs_acc":0.78,"religous_acc":0.8444444444,"social_manners_acc":0.808988764,"souvenirs_acc":0.74,"sports_acc":0.5555555556,"GPK_acc":0.7288135593,"SPK_acc":0.6348324673,"UPK_acc":0.541511613}
|
| 13 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.5714086374,"cinema_acc":0.5625,"emergency_number_acc":0.3,"foods_acc":0.56,"games_acc":0.6,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.6,"politicians_acc":0.85,"popular_people_acc":0.7282051282,"Government_law_acc":0.8913043478,"proverbs_acc":0.7,"religous_acc":0.8222222222,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.5555555556,"GPK_acc":0.6939182453,"SPK_acc":0.605489774,"UPK_acc":0.5310727179}
|
| 14 |
-
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.56986854,"cinema_acc":0.6625,"emergency_number_acc":0.4,"foods_acc":0.71,"games_acc":0.45,"herbal_drugs_acc":0.675,"places_acc":0.7714285714,"poetry_acc":0.675,"politicians_acc":0.75,"popular_people_acc":0.6820512821,"Government_law_acc":0.8913043478,"proverbs_acc":0.75,"religous_acc":0.7777777778,"social_manners_acc":0.7865168539,"souvenirs_acc":0.68,"sports_acc":0.5555555556,"GPK_acc":0.7078763709,"SPK_acc":0.6075972048,"UPK_acc":0.5250866162}
|
| 15 |
-
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.5635086255,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.45,"herbal_drugs_acc":0.45,"places_acc":0.6285714286,"poetry_acc":0.35,"politicians_acc":0.3,"popular_people_acc":0.4974358974,"Government_law_acc":0.7608695652,"proverbs_acc":0.64,"religous_acc":0.6888888889,"social_manners_acc":0.8202247191,"souvenirs_acc":0.56,"sports_acc":0.3968253968,"GPK_acc":0.5513459621,"SPK_acc":0.5967741935,"UPK_acc":0.5412549724}
|
| 16 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.5633303193,"cinema_acc":0.625,"emergency_number_acc":0.4,"foods_acc":0.68,"games_acc":0.35,"herbal_drugs_acc":0.6,"places_acc":0.7904761905,"poetry_acc":0.7,"politicians_acc":0.75,"popular_people_acc":0.641025641,"Government_law_acc":0.8913043478,"proverbs_acc":0.74,"religous_acc":0.7777777778,"social_manners_acc":0.8764044944,"souvenirs_acc":0.62,"sports_acc":0.6031746032,"GPK_acc":0.6989032901,"SPK_acc":0.5977423401,"UPK_acc":0.5212370076}
|
| 17 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5440356745,"cinema_acc":0.5263157895,"emergency_number_acc":0.4,"foods_acc":0.72,"games_acc":0.55,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.625,"politicians_acc":0.75,"popular_people_acc":0.6717948718,"Government_law_acc":0.8043478261,"proverbs_acc":0.72,"religous_acc":0.8444444444,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.4920634921,"GPK_acc":0.6906906907,"SPK_acc":0.5934420355,"UPK_acc":0.4897066392}
|
| 18 |
-
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.5139458858,"cinema_acc":0.55,"emergency_number_acc":0.4,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.6285714286,"poetry_acc":0.5,"politicians_acc":0.6,"popular_people_acc":0.5641025641,"Government_law_acc":0.7173913043,"proverbs_acc":0.5,"religous_acc":0.6444444444,"social_manners_acc":0.7415730337,"souvenirs_acc":0.28,"sports_acc":0.5396825397,"GPK_acc":0.5623130608,"SPK_acc":0.5513348862,"UPK_acc":0.4809444373}
|
| 19 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.5105376643,"cinema_acc":0.5,"emergency_number_acc":0.5,"foods_acc":0.53,"games_acc":0.4,"herbal_drugs_acc":0.625,"places_acc":0.6857142857,"poetry_acc":0.575,"politicians_acc":0.65,"popular_people_acc":0.6205128205,"Government_law_acc":0.8043478261,"proverbs_acc":0.61,"religous_acc":0.6,"social_manners_acc":0.7078651685,"souvenirs_acc":0.54,"sports_acc":0.5714285714,"GPK_acc":0.6091724826,"SPK_acc":0.556710267,"UPK_acc":0.4647760811}
|
| 20 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.5097725534,"cinema_acc":0.4875,"emergency_number_acc":0.3,"foods_acc":0.5,"games_acc":0.5,"herbal_drugs_acc":0.4,"places_acc":0.5428571429,"poetry_acc":0.45,"politicians_acc":0.4,"popular_people_acc":0.6051282051,"Government_law_acc":0.7826086957,"proverbs_acc":0.63,"religous_acc":0.7333333333,"social_manners_acc":0.6853932584,"souvenirs_acc":0.34,"sports_acc":0.5079365079,"GPK_acc":0.5593220339,"SPK_acc":0.5384339724,"UPK_acc":0.4828692416}
|
| 21 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.4979475405,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.37,"games_acc":0.35,"herbal_drugs_acc":0.375,"places_acc":0.4666666667,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4564102564,"Government_law_acc":0.6739130435,"proverbs_acc":0.36,"religous_acc":0.5333333333,"social_manners_acc":0.6292134831,"souvenirs_acc":0.32,"sports_acc":0.3174603175,"GPK_acc":0.4416749751,"SPK_acc":0.5344086022,"UPK_acc":0.4790757381}
|
| 22 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.4800723378,"cinema_acc":0.675,"emergency_number_acc":0.5,"foods_acc":0.69,"games_acc":0.5,"herbal_drugs_acc":0.625,"places_acc":0.8,"poetry_acc":0.775,"politicians_acc":0.75,"popular_people_acc":0.7487179487,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.5555555556,"social_manners_acc":0.7865168539,"souvenirs_acc":0.62,"sports_acc":0.5396825397,"GPK_acc":0.701894317,"SPK_acc":0.5158573732,"UPK_acc":0.425895034}
|
| 23 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.4784030048,"cinema_acc":0.4875,"emergency_number_acc":0.5,"foods_acc":0.49,"games_acc":0.6,"herbal_drugs_acc":0.6,"places_acc":0.6857142857,"poetry_acc":0.5,"politicians_acc":0.4,"popular_people_acc":0.6,"Government_law_acc":0.8260869565,"proverbs_acc":0.62,"religous_acc":0.6888888889,"social_manners_acc":0.7528089888,"souvenirs_acc":0.68,"sports_acc":0.3333333333,"GPK_acc":0.5972083749,"SPK_acc":0.5077943021,"UPK_acc":0.4420633902}
|
| 24 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.4763231198,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.41,"games_acc":0.45,"herbal_drugs_acc":0.475,"places_acc":0.6952380952,"poetry_acc":0.325,"politicians_acc":0.45,"popular_people_acc":0.4974358974,"Government_law_acc":0.847826087,"proverbs_acc":0.55,"religous_acc":0.6666666667,"social_manners_acc":0.7078651685,"souvenirs_acc":0.42,"sports_acc":0.4603174603,"GPK_acc":0.5343968096,"SPK_acc":0.5164066703,"UPK_acc":0.4401028278}
|
| 25 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","acc":0.4682478959,"cinema_acc":0.6375,"emergency_number_acc":0.5,"foods_acc":0.58,"games_acc":0.65,"herbal_drugs_acc":0.575,"places_acc":0.7238095238,"poetry_acc":0.5,"politicians_acc":0.45,"popular_people_acc":0.5692307692,"Government_law_acc":0.7826086957,"proverbs_acc":0.62,"religous_acc":0.5555555556,"social_manners_acc":0.7752808989,"souvenirs_acc":0.48,"sports_acc":0.4444444444,"GPK_acc":0.6081754736,"SPK_acc":0.5244579824,"UPK_acc":0.4099833184}
|
| 26 |
-
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.4467552341,"cinema_acc":0.375,"emergency_number_acc":0.4,"foods_acc":0.29,"games_acc":0.35,"herbal_drugs_acc":0.4,"places_acc":0.5047619048,"poetry_acc":0.2,"politicians_acc":0.25,"popular_people_acc":0.3692307692,"Government_law_acc":0.7608695652,"proverbs_acc":0.44,"religous_acc":0.5555555556,"social_manners_acc":0.6741573034,"souvenirs_acc":0.18,"sports_acc":0.3968253968,"GPK_acc":0.4207377866,"SPK_acc":0.4819924745,"UPK_acc":0.4248684717}
|
| 27 |
-
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.4140641302,"cinema_acc":0.4875,"emergency_number_acc":0.4,"foods_acc":0.6,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.7904761905,"poetry_acc":0.55,"politicians_acc":0.6,"popular_people_acc":0.6820512821,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.6222222222,"social_manners_acc":0.7640449438,"souvenirs_acc":0.64,"sports_acc":0.4126984127,"GPK_acc":0.6470588235,"SPK_acc":0.4373768142,"UPK_acc":0.3673809829}
|
| 28 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.4046741323,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.7523809524,"poetry_acc":0.325,"politicians_acc":0.5,"popular_people_acc":0.6307692308,"Government_law_acc":0.8043478261,"proverbs_acc":0.59,"religous_acc":0.5111111111,"social_manners_acc":0.595505618,"souvenirs_acc":0.34,"sports_acc":0.4285714286,"GPK_acc":0.5583250249,"SPK_acc":0.4311055366,"UPK_acc":0.3659694598}
|
| 29 |
-
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.4025179106,"cinema_acc":0.425,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.4,"herbal_drugs_acc":0.425,"places_acc":0.4095238095,"poetry_acc":0.4,"politicians_acc":0.5,"popular_people_acc":0.4974358974,"Government_law_acc":0.6739130435,"proverbs_acc":0.48,"religous_acc":0.5111111111,"social_manners_acc":0.5617977528,"souvenirs_acc":0.26,"sports_acc":0.4126984127,"GPK_acc":0.4656031904,"SPK_acc":0.4341515857,"UPK_acc":0.3717438727}
|
| 30 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.3825554705,"cinema_acc":0.4875,"emergency_number_acc":0.2,"foods_acc":0.46,"games_acc":0.7,"herbal_drugs_acc":0.475,"places_acc":0.5523809524,"poetry_acc":0.525,"politicians_acc":0.5,"popular_people_acc":0.5076923077,"Government_law_acc":0.7608695652,"proverbs_acc":0.54,"religous_acc":0.4444444444,"social_manners_acc":0.6292134831,"souvenirs_acc":0.48,"sports_acc":0.4285714286,"GPK_acc":0.5224327019,"SPK_acc":0.4135459595,"UPK_acc":0.3423585269}
|
| 31 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2830214927,"cinema_acc":0.5125,"emergency_number_acc":0.2,"foods_acc":0.51,"games_acc":0.25,"herbal_drugs_acc":0.5,"places_acc":0.3904761905,"poetry_acc":0.525,"politicians_acc":0.8,"popular_people_acc":0.5179487179,"Government_law_acc":0.347826087,"proverbs_acc":0.34,"religous_acc":0.3333333333,"social_manners_acc":0.2696629213,"souvenirs_acc":0.24,"sports_acc":0.4603174603,"GPK_acc":0.4267198405,"SPK_acc":0.2777280057,"UPK_acc":0.268317721}
|
| 32 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.278430827,"cinema_acc":0.25,"emergency_number_acc":0.0,"foods_acc":0.16,"games_acc":0.3,"herbal_drugs_acc":0.125,"places_acc":0.4380952381,"poetry_acc":0.075,"politicians_acc":0.0,"popular_people_acc":0.1948717949,"Government_law_acc":0.5652173913,"proverbs_acc":0.2,"religous_acc":0.2444444444,"social_manners_acc":0.3146067416,"souvenirs_acc":0.28,"sports_acc":0.2063492063,"GPK_acc":0.2452642074,"SPK_acc":0.2999462462,"UPK_acc":0.2672911587}
|
| 33 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.2553383877,"cinema_acc":0.375,"emergency_number_acc":0.1,"foods_acc":0.47,"games_acc":0.15,"herbal_drugs_acc":0.425,"places_acc":0.4285714286,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4051282051,"Government_law_acc":0.4782608696,"proverbs_acc":0.13,"religous_acc":0.4,"social_manners_acc":0.3707865169,"souvenirs_acc":0.12,"sports_acc":0.3333333333,"GPK_acc":0.3599202393,"SPK_acc":0.2727109837,"UPK_acc":0.2294366739}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.1987201781,"cinema_acc":0.3375,"emergency_number_acc":0.0,"foods_acc":0.18,"games_acc":0.55,"herbal_drugs_acc":0.175,"places_acc":0.4,"poetry_acc":0.15,"politicians_acc":0.25,"popular_people_acc":0.2615384615,"Government_law_acc":0.2608695652,"proverbs_acc":0.31,"religous_acc":0.1555555556,"social_manners_acc":0.393258427,"souvenirs_acc":0.26,"sports_acc":0.2380952381,"GPK_acc":0.2791625125,"SPK_acc":0.2065938004,"UPK_acc":0.1827280893}
|
| 35 |
-
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":null,"cinema_acc":null,"emergency_number_acc":null,"foods_acc":null,"games_acc":null,"herbal_drugs_acc":null,"places_acc":null,"poetry_acc":null,"politicians_acc":null,"popular_people_acc":null,"Government_law_acc":null,"proverbs_acc":null,"religous_acc":null,"social_manners_acc":null,"souvenirs_acc":null,"sports_acc":null,"GPK_acc":null,"SPK_acc":null,"UPK_acc":null}
|
|
|
|
| 1 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8401114206,"cinema_acc":0.9375,"emergency_number_acc":0.8,"foods_acc":0.8,"games_acc":0.55,"herbal_drugs_acc":0.75,"places_acc":0.8857142857,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.9282051282,"Government_law_acc":0.9782608696,"proverbs_acc":0.9,"religous_acc":0.9777777778,"social_manners_acc":0.9438202247,"souvenirs_acc":0.78,"sports_acc":0.6507936508,"GPK_acc":0.8733798604,"SPK_acc":0.856476498,"UPK_acc":0.824103816}
|
| 2 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8217442751,"cinema_acc":0.8125,"emergency_number_acc":1.0,"foods_acc":0.77,"games_acc":0.75,"herbal_drugs_acc":0.65,"places_acc":0.8952380952,"poetry_acc":0.875,"politicians_acc":1.0,"popular_people_acc":0.9179487179,"Government_law_acc":0.8913043478,"proverbs_acc":0.89,"religous_acc":0.9555555556,"social_manners_acc":0.8876404494,"souvenirs_acc":0.78,"sports_acc":0.5714285714,"GPK_acc":0.8454636092,"SPK_acc":0.833781603,"UPK_acc":0.8100680622}
|
| 3 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7274624374,"cinema_acc":0.8875,"emergency_number_acc":0.8,"foods_acc":0.82,"games_acc":0.4,"herbal_drugs_acc":0.75,"places_acc":0.8952380952,"poetry_acc":0.9,"politicians_acc":0.9,"popular_people_acc":0.8564102564,"Government_law_acc":0.9565217391,"proverbs_acc":0.87,"religous_acc":0.9555555556,"social_manners_acc":0.9101123596,"souvenirs_acc":0.72,"sports_acc":0.5873015873,"GPK_acc":0.8394815553,"SPK_acc":0.7645583229,"UPK_acc":0.686473306}
|
| 4 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7214996174,"cinema_acc":0.6875,"emergency_number_acc":0.7,"foods_acc":0.74,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.8,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.8913043478,"proverbs_acc":0.77,"religous_acc":0.9333333333,"social_manners_acc":0.9101123596,"souvenirs_acc":0.72,"sports_acc":0.6031746032,"GPK_acc":0.7936191426,"SPK_acc":0.7588245834,"UPK_acc":0.6854869755}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7040411769,"cinema_acc":0.7625,"emergency_number_acc":0.9,"foods_acc":0.78,"games_acc":0.7,"herbal_drugs_acc":0.625,"places_acc":0.8666666667,"poetry_acc":0.875,"politicians_acc":0.85,"popular_people_acc":0.8461538462,"Government_law_acc":0.8913043478,"proverbs_acc":0.86,"religous_acc":0.8888888889,"social_manners_acc":0.8651685393,"souvenirs_acc":0.68,"sports_acc":0.4761904762,"GPK_acc":0.8005982054,"SPK_acc":0.7258555814,"UPK_acc":0.6759912742}
|
| 6 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6957640676,"cinema_acc":0.725,"emergency_number_acc":0.6,"foods_acc":0.79,"games_acc":0.5,"herbal_drugs_acc":0.75,"places_acc":0.8380952381,"poetry_acc":0.825,"politicians_acc":0.75,"popular_people_acc":0.7846153846,"Government_law_acc":0.9565217391,"proverbs_acc":0.78,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.74,"sports_acc":0.4761904762,"GPK_acc":0.7756729811,"SPK_acc":0.7263931195,"UPK_acc":0.6635442063}
|
| 7 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6884607359,"cinema_acc":0.75,"emergency_number_acc":0.7,"foods_acc":0.78,"games_acc":0.6,"herbal_drugs_acc":0.7,"places_acc":0.8380952381,"poetry_acc":0.9,"politicians_acc":0.95,"popular_people_acc":0.8615384615,"Government_law_acc":0.9347826087,"proverbs_acc":0.8,"religous_acc":0.9333333333,"social_manners_acc":0.8426966292,"souvenirs_acc":0.66,"sports_acc":0.5555555556,"GPK_acc":0.8015952144,"SPK_acc":0.720121842,"UPK_acc":0.6512254587}
|
| 8 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.6810513107,"cinema_acc":0.5125,"emergency_number_acc":0.5,"foods_acc":0.63,"games_acc":0.55,"herbal_drugs_acc":0.65,"places_acc":0.8666666667,"poetry_acc":0.55,"politicians_acc":0.8,"popular_people_acc":0.7435897436,"Government_law_acc":0.9347826087,"proverbs_acc":0.81,"religous_acc":0.9111111111,"social_manners_acc":0.8764044944,"souvenirs_acc":0.72,"sports_acc":0.5079365079,"GPK_acc":0.7288135593,"SPK_acc":0.7400865177,"UPK_acc":0.6328756576}
|
| 9 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6466578563,"cinema_acc":0.7125,"emergency_number_acc":0.6,"foods_acc":0.72,"games_acc":0.5,"herbal_drugs_acc":0.7,"places_acc":0.8666666667,"poetry_acc":0.8,"politicians_acc":0.8,"popular_people_acc":0.7743589744,"Government_law_acc":0.9347826087,"proverbs_acc":0.77,"religous_acc":0.9111111111,"social_manners_acc":0.8539325843,"souvenirs_acc":0.68,"sports_acc":0.5873015873,"GPK_acc":0.7686939182,"SPK_acc":0.6764020785,"UPK_acc":0.6096496856}
|
| 10 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.6128538638,"cinema_acc":0.525,"emergency_number_acc":0.7,"foods_acc":0.73,"games_acc":0.55,"herbal_drugs_acc":0.625,"places_acc":0.8380952381,"poetry_acc":0.575,"politicians_acc":0.6,"popular_people_acc":0.7076923077,"Government_law_acc":0.847826087,"proverbs_acc":0.71,"religous_acc":0.6666666667,"social_manners_acc":0.8202247191,"souvenirs_acc":0.68,"sports_acc":0.4920634921,"GPK_acc":0.6949152542,"SPK_acc":0.6265902168,"UPK_acc":0.5924547671}
|
| 11 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.5980651448,"cinema_acc":0.6,"emergency_number_acc":0.5,"foods_acc":0.67,"games_acc":0.65,"herbal_drugs_acc":0.675,"places_acc":0.8476190476,"poetry_acc":0.775,"politicians_acc":0.95,"popular_people_acc":0.8092783505,"Government_law_acc":0.8913043478,"proverbs_acc":0.78,"religous_acc":0.8666666667,"social_manners_acc":0.8988764045,"souvenirs_acc":0.68,"sports_acc":0.5396825397,"GPK_acc":0.7604790419,"SPK_acc":0.6417428725,"UPK_acc":0.5458980614}
|
| 12 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.5908047576,"cinema_acc":0.5875,"emergency_number_acc":0.4,"foods_acc":0.56,"games_acc":0.55,"herbal_drugs_acc":0.75,"places_acc":0.8285714286,"poetry_acc":0.75,"politicians_acc":0.7,"popular_people_acc":0.7794871795,"Government_law_acc":0.8695652174,"proverbs_acc":0.78,"religous_acc":0.8444444444,"social_manners_acc":0.808988764,"souvenirs_acc":0.74,"sports_acc":0.5555555556,"GPK_acc":0.7288135593,"SPK_acc":0.6348324673,"UPK_acc":0.541511613}
|
| 13 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.5714086374,"cinema_acc":0.5625,"emergency_number_acc":0.3,"foods_acc":0.56,"games_acc":0.6,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.6,"politicians_acc":0.85,"popular_people_acc":0.7282051282,"Government_law_acc":0.8913043478,"proverbs_acc":0.7,"religous_acc":0.8222222222,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.5555555556,"GPK_acc":0.6939182453,"SPK_acc":0.605489774,"UPK_acc":0.5310727179}
|
| 14 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.56986854,"cinema_acc":0.6625,"emergency_number_acc":0.4,"foods_acc":0.71,"games_acc":0.45,"herbal_drugs_acc":0.675,"places_acc":0.7714285714,"poetry_acc":0.675,"politicians_acc":0.75,"popular_people_acc":0.6820512821,"Government_law_acc":0.8913043478,"proverbs_acc":0.75,"religous_acc":0.7777777778,"social_manners_acc":0.7865168539,"souvenirs_acc":0.68,"sports_acc":0.5555555556,"GPK_acc":0.7078763709,"SPK_acc":0.6075972048,"UPK_acc":0.5250866162}
|
| 15 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.5635086255,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.45,"herbal_drugs_acc":0.45,"places_acc":0.6285714286,"poetry_acc":0.35,"politicians_acc":0.3,"popular_people_acc":0.4974358974,"Government_law_acc":0.7608695652,"proverbs_acc":0.64,"religous_acc":0.6888888889,"social_manners_acc":0.8202247191,"souvenirs_acc":0.56,"sports_acc":0.3968253968,"GPK_acc":0.5513459621,"SPK_acc":0.5967741935,"UPK_acc":0.5412549724}
|
| 16 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.5633303193,"cinema_acc":0.625,"emergency_number_acc":0.4,"foods_acc":0.68,"games_acc":0.35,"herbal_drugs_acc":0.6,"places_acc":0.7904761905,"poetry_acc":0.7,"politicians_acc":0.75,"popular_people_acc":0.641025641,"Government_law_acc":0.8913043478,"proverbs_acc":0.74,"religous_acc":0.7777777778,"social_manners_acc":0.8764044944,"souvenirs_acc":0.62,"sports_acc":0.6031746032,"GPK_acc":0.6989032901,"SPK_acc":0.5977423401,"UPK_acc":0.5212370076}
|
| 17 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5440356745,"cinema_acc":0.5263157895,"emergency_number_acc":0.4,"foods_acc":0.72,"games_acc":0.55,"herbal_drugs_acc":0.575,"places_acc":0.8095238095,"poetry_acc":0.625,"politicians_acc":0.75,"popular_people_acc":0.6717948718,"Government_law_acc":0.8043478261,"proverbs_acc":0.72,"religous_acc":0.8444444444,"social_manners_acc":0.8539325843,"souvenirs_acc":0.6,"sports_acc":0.4920634921,"GPK_acc":0.6906906907,"SPK_acc":0.5934420355,"UPK_acc":0.4897066392}
|
| 18 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.5139458858,"cinema_acc":0.55,"emergency_number_acc":0.4,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.6285714286,"poetry_acc":0.5,"politicians_acc":0.6,"popular_people_acc":0.5641025641,"Government_law_acc":0.7173913043,"proverbs_acc":0.5,"religous_acc":0.6444444444,"social_manners_acc":0.7415730337,"souvenirs_acc":0.28,"sports_acc":0.5396825397,"GPK_acc":0.5623130608,"SPK_acc":0.5513348862,"UPK_acc":0.4809444373}
|
| 19 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.5105376643,"cinema_acc":0.5,"emergency_number_acc":0.5,"foods_acc":0.53,"games_acc":0.4,"herbal_drugs_acc":0.625,"places_acc":0.6857142857,"poetry_acc":0.575,"politicians_acc":0.65,"popular_people_acc":0.6205128205,"Government_law_acc":0.8043478261,"proverbs_acc":0.61,"religous_acc":0.6,"social_manners_acc":0.7078651685,"souvenirs_acc":0.54,"sports_acc":0.5714285714,"GPK_acc":0.6091724826,"SPK_acc":0.556710267,"UPK_acc":0.4647760811}
|
| 20 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.5097725534,"cinema_acc":0.4875,"emergency_number_acc":0.3,"foods_acc":0.5,"games_acc":0.5,"herbal_drugs_acc":0.4,"places_acc":0.5428571429,"poetry_acc":0.45,"politicians_acc":0.4,"popular_people_acc":0.6051282051,"Government_law_acc":0.7826086957,"proverbs_acc":0.63,"religous_acc":0.7333333333,"social_manners_acc":0.6853932584,"souvenirs_acc":0.34,"sports_acc":0.5079365079,"GPK_acc":0.5593220339,"SPK_acc":0.5384339724,"UPK_acc":0.4828692416}
|
| 21 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.4979475405,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.37,"games_acc":0.35,"herbal_drugs_acc":0.375,"places_acc":0.4666666667,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4564102564,"Government_law_acc":0.6739130435,"proverbs_acc":0.36,"religous_acc":0.5333333333,"social_manners_acc":0.6292134831,"souvenirs_acc":0.32,"sports_acc":0.3174603175,"GPK_acc":0.4416749751,"SPK_acc":0.5344086022,"UPK_acc":0.4790757381}
|
| 22 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.4800723378,"cinema_acc":0.675,"emergency_number_acc":0.5,"foods_acc":0.69,"games_acc":0.5,"herbal_drugs_acc":0.625,"places_acc":0.8,"poetry_acc":0.775,"politicians_acc":0.75,"popular_people_acc":0.7487179487,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.5555555556,"social_manners_acc":0.7865168539,"souvenirs_acc":0.62,"sports_acc":0.5396825397,"GPK_acc":0.701894317,"SPK_acc":0.5158573732,"UPK_acc":0.425895034}
|
| 23 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.4784030048,"cinema_acc":0.4875,"emergency_number_acc":0.5,"foods_acc":0.49,"games_acc":0.6,"herbal_drugs_acc":0.6,"places_acc":0.6857142857,"poetry_acc":0.5,"politicians_acc":0.4,"popular_people_acc":0.6,"Government_law_acc":0.8260869565,"proverbs_acc":0.62,"religous_acc":0.6888888889,"social_manners_acc":0.7528089888,"souvenirs_acc":0.68,"sports_acc":0.3333333333,"GPK_acc":0.5972083749,"SPK_acc":0.5077943021,"UPK_acc":0.4420633902}
|
| 24 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.4763231198,"cinema_acc":0.45,"emergency_number_acc":0.2,"foods_acc":0.41,"games_acc":0.45,"herbal_drugs_acc":0.475,"places_acc":0.6952380952,"poetry_acc":0.325,"politicians_acc":0.45,"popular_people_acc":0.4974358974,"Government_law_acc":0.847826087,"proverbs_acc":0.55,"religous_acc":0.6666666667,"social_manners_acc":0.7078651685,"souvenirs_acc":0.42,"sports_acc":0.4603174603,"GPK_acc":0.5343968096,"SPK_acc":0.5164066703,"UPK_acc":0.4401028278}
|
| 25 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","acc":0.4682478959,"cinema_acc":0.6375,"emergency_number_acc":0.5,"foods_acc":0.58,"games_acc":0.65,"herbal_drugs_acc":0.575,"places_acc":0.7238095238,"poetry_acc":0.5,"politicians_acc":0.45,"popular_people_acc":0.5692307692,"Government_law_acc":0.7826086957,"proverbs_acc":0.62,"religous_acc":0.5555555556,"social_manners_acc":0.7752808989,"souvenirs_acc":0.48,"sports_acc":0.4444444444,"GPK_acc":0.6081754736,"SPK_acc":0.5244579824,"UPK_acc":0.4099833184}
|
| 26 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.4467552341,"cinema_acc":0.375,"emergency_number_acc":0.4,"foods_acc":0.29,"games_acc":0.35,"herbal_drugs_acc":0.4,"places_acc":0.5047619048,"poetry_acc":0.2,"politicians_acc":0.25,"popular_people_acc":0.3692307692,"Government_law_acc":0.7608695652,"proverbs_acc":0.44,"religous_acc":0.5555555556,"social_manners_acc":0.6741573034,"souvenirs_acc":0.18,"sports_acc":0.3968253968,"GPK_acc":0.4207377866,"SPK_acc":0.4819924745,"UPK_acc":0.4248684717}
|
| 27 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.4140641302,"cinema_acc":0.4875,"emergency_number_acc":0.4,"foods_acc":0.6,"games_acc":0.5,"herbal_drugs_acc":0.675,"places_acc":0.7904761905,"poetry_acc":0.55,"politicians_acc":0.6,"popular_people_acc":0.6820512821,"Government_law_acc":0.847826087,"proverbs_acc":0.66,"religous_acc":0.6222222222,"social_manners_acc":0.7640449438,"souvenirs_acc":0.64,"sports_acc":0.4126984127,"GPK_acc":0.6470588235,"SPK_acc":0.4373768142,"UPK_acc":0.3673809829}
|
| 28 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.4046741323,"cinema_acc":0.425,"emergency_number_acc":0.3,"foods_acc":0.52,"games_acc":0.5,"herbal_drugs_acc":0.5,"places_acc":0.7523809524,"poetry_acc":0.325,"politicians_acc":0.5,"popular_people_acc":0.6307692308,"Government_law_acc":0.8043478261,"proverbs_acc":0.59,"religous_acc":0.5111111111,"social_manners_acc":0.595505618,"souvenirs_acc":0.34,"sports_acc":0.4285714286,"GPK_acc":0.5583250249,"SPK_acc":0.4311055366,"UPK_acc":0.3659694598}
|
| 29 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.4025179106,"cinema_acc":0.425,"emergency_number_acc":0.2,"foods_acc":0.49,"games_acc":0.4,"herbal_drugs_acc":0.425,"places_acc":0.4095238095,"poetry_acc":0.4,"politicians_acc":0.5,"popular_people_acc":0.4974358974,"Government_law_acc":0.6739130435,"proverbs_acc":0.48,"religous_acc":0.5111111111,"social_manners_acc":0.5617977528,"souvenirs_acc":0.26,"sports_acc":0.4126984127,"GPK_acc":0.4656031904,"SPK_acc":0.4341515857,"UPK_acc":0.3717438727}
|
| 30 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.3825554705,"cinema_acc":0.4875,"emergency_number_acc":0.2,"foods_acc":0.46,"games_acc":0.7,"herbal_drugs_acc":0.475,"places_acc":0.5523809524,"poetry_acc":0.525,"politicians_acc":0.5,"popular_people_acc":0.5076923077,"Government_law_acc":0.7608695652,"proverbs_acc":0.54,"religous_acc":0.4444444444,"social_manners_acc":0.6292134831,"souvenirs_acc":0.48,"sports_acc":0.4285714286,"GPK_acc":0.5224327019,"SPK_acc":0.4135459595,"UPK_acc":0.3423585269}
|
| 31 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2830214927,"cinema_acc":0.5125,"emergency_number_acc":0.2,"foods_acc":0.51,"games_acc":0.25,"herbal_drugs_acc":0.5,"places_acc":0.3904761905,"poetry_acc":0.525,"politicians_acc":0.8,"popular_people_acc":0.5179487179,"Government_law_acc":0.347826087,"proverbs_acc":0.34,"religous_acc":0.3333333333,"social_manners_acc":0.2696629213,"souvenirs_acc":0.24,"sports_acc":0.4603174603,"GPK_acc":0.4267198405,"SPK_acc":0.2777280057,"UPK_acc":0.268317721}
|
| 32 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.278430827,"cinema_acc":0.25,"emergency_number_acc":0.0,"foods_acc":0.16,"games_acc":0.3,"herbal_drugs_acc":0.125,"places_acc":0.4380952381,"poetry_acc":0.075,"politicians_acc":0.0,"popular_people_acc":0.1948717949,"Government_law_acc":0.5652173913,"proverbs_acc":0.2,"religous_acc":0.2444444444,"social_manners_acc":0.3146067416,"souvenirs_acc":0.28,"sports_acc":0.2063492063,"GPK_acc":0.2452642074,"SPK_acc":0.2999462462,"UPK_acc":0.2672911587}
|
| 33 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.2553383877,"cinema_acc":0.375,"emergency_number_acc":0.1,"foods_acc":0.47,"games_acc":0.15,"herbal_drugs_acc":0.425,"places_acc":0.4285714286,"poetry_acc":0.425,"politicians_acc":0.45,"popular_people_acc":0.4051282051,"Government_law_acc":0.4782608696,"proverbs_acc":0.13,"religous_acc":0.4,"social_manners_acc":0.3707865169,"souvenirs_acc":0.12,"sports_acc":0.3333333333,"GPK_acc":0.3599202393,"SPK_acc":0.2727109837,"UPK_acc":0.2294366739}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.1987201781,"cinema_acc":0.3375,"emergency_number_acc":0.0,"foods_acc":0.18,"games_acc":0.55,"herbal_drugs_acc":0.175,"places_acc":0.4,"poetry_acc":0.15,"politicians_acc":0.25,"popular_people_acc":0.2615384615,"Government_law_acc":0.2608695652,"proverbs_acc":0.31,"religous_acc":0.1555555556,"social_manners_acc":0.393258427,"souvenirs_acc":0.26,"sports_acc":0.2380952381,"GPK_acc":0.2791625125,"SPK_acc":0.2065938004,"UPK_acc":0.1827280893}
|
| 35 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":null,"cinema_acc":null,"emergency_number_acc":null,"foods_acc":null,"games_acc":null,"herbal_drugs_acc":null,"places_acc":null,"poetry_acc":null,"politicians_acc":null,"popular_people_acc":null,"Government_law_acc":null,"proverbs_acc":null,"religous_acc":null,"social_manners_acc":null,"souvenirs_acc":null,"sports_acc":null,"GPK_acc":null,"SPK_acc":null,"UPK_acc":null}
|
leaderboard/boards_data/all.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"o3","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7468,"Persian IFEval":0.926035503,"Persian MT-Bench":0.91375,"PerMMLU":0.8217442751,"PerCoR":0.9218,"Persian NLU":0.7207167537,"Persian NLG":0.1764906292}
|
| 2 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7332,"Persian IFEval":0.8911764706,"Persian MT-Bench":0.91075,"PerMMLU":0.8401114206,"PerCoR":0.9427585507,"Persian NLU":0.6992555201,"Persian NLG":0.1151518212}
|
| 3 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7127,"Persian IFEval":0.8810572687,"Persian MT-Bench":0.8695,"PerMMLU":0.7214996174,"PerCoR":0.9117647059,"Persian NLU":0.7143086066,"Persian NLG":0.1779340777}
|
| 4 |
-
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6992,"Persian IFEval":0.8634361233,"Persian MT-Bench":0.87325,"PerMMLU":0.7040411769,"PerCoR":0.8839,"Persian NLU":0.6758278127,"Persian NLG":0.194675133}
|
| 5 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6986,"Persian IFEval":0.8796992481,"Persian MT-Bench":0.8812289562,"PerMMLU":0.7274624374,"PerCoR":0.8717,"Persian NLU":0.6944128198,"Persian NLG":0.1368740087}
|
| 6 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6886,"Persian IFEval":0.8497790869,"Persian MT-Bench":0.838973064,"PerMMLU":0.6957640676,"PerCoR":0.8637863786,"Persian NLU":0.7050532433,"Persian NLG":0.178231145}
|
| 7 |
-
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6877,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.8371666667,"PerMMLU":0.6884607359,"PerCoR":0.8665,"Persian NLU":0.7146808531,"Persian NLG":0.18964968}
|
| 8 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.664,"Persian IFEval":0.8414096916,"Persian MT-Bench":0.8244166667,"PerMMLU":0.6466578563,"PerCoR":0.8143,"Persian NLU":0.6914202844,"Persian NLG":0.1659339021}
|
| 9 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6556,"Persian IFEval":0.8340675477,"Persian MT-Bench":0.8418333333,"PerMMLU":0.6128538638,"PerCoR":0.7712,"Persian NLU":0.6833497104,"Persian NLG":0.1901206806}
|
| 10 |
-
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6549,"Persian IFEval":0.8370044053,"Persian MT-Bench":0.86175,"PerMMLU":0.6810513107,"PerCoR":0.825165033,"Persian NLU":0.6361186163,"Persian NLG":0.0880621978}
|
| 11 |
-
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6458,"Persian IFEval":0.8311306902,"Persian MT-Bench":0.8600833333,"PerMMLU":0.5908047576,"PerCoR":0.8241,"Persian NLU":0.6752949557,"Persian NLG":0.0934094344}
|
| 12 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","Average":0.6247,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.796,"PerMMLU":0.5633303193,"PerCoR":0.7628,"Persian NLU":0.6898261633,"Persian NLG":0.1067134448}
|
| 13 |
-
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6246,"Persian IFEval":0.8017621145,"Persian MT-Bench":0.7891666667,"PerMMLU":0.56986854,"PerCoR":0.7598,"Persian NLU":0.6459120734,"Persian NLG":0.1810678527}
|
| 14 |
-
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","Average":0.6224,"Persian IFEval":0.803030303,"Persian MT-Bench":0.7632996633,"PerMMLU":0.5635086255,"PerCoR":0.7654,"Persian NLU":0.6714091535,"Persian NLG":0.1679338638}
|
| 15 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","Average":0.613,"Persian IFEval":0.7125925926,"Persian MT-Bench":0.7172558923,"PerMMLU":0.5714086374,"PerCoR":0.7956,"Persian NLU":0.6800109206,"Persian NLG":0.2010896964}
|
| 16 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","Average":0.6098,"Persian IFEval":0.8438880707,"Persian MT-Bench":0.8219166667,"PerMMLU":0.5980651448,"PerCoR":0.798859772,"Persian NLU":0.4824528512,"Persian NLG":0.1137933652}
|
| 17 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","Average":0.6008,"Persian IFEval":0.8149779736,"Persian MT-Bench":0.75125,"PerMMLU":0.5105376643,"PerCoR":0.7094,"Persian NLU":0.699116864,"Persian NLG":0.1196804312}
|
| 18 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","Average":0.5939,"Persian IFEval":0.8325508607,"Persian MT-Bench":0.7431271478,"PerMMLU":0.5097725534,"PerCoR":0.688,"Persian NLU":0.6255818412,"Persian NLG":0.164118288}
|
| 19 |
-
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","Average":0.5912,"Persian IFEval":0.8105726872,"Persian MT-Bench":0.7204545455,"PerMMLU":0.5139458858,"PerCoR":0.6958,"Persian NLU":0.6460328733,"Persian NLG":0.16056333}
|
| 20 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","Average":0.5705,"Persian IFEval":0.7007407407,"Persian MT-Bench":0.688,"PerMMLU":0.4800723378,"PerCoR":0.7364,"Persian NLU":0.6297634971,"Persian NLG":0.1880477876}
|
| 21 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","Average":0.5576,"Persian IFEval":0.7526555387,"Persian MT-Bench":0.7290833333,"PerMMLU":0.4763231198,"PerCoR":0.6894,"Persian NLU":0.5661558794,"Persian NLG":0.1319091735}
|
| 22 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5546,"Persian IFEval":0.825256975,"Persian MT-Bench":0.7585,"PerMMLU":0.5440356745,"PerCoR":0.7160432086,"Persian NLU":0.3749414991,"Persian NLG":0.1089333827}
|
| 23 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5524,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.7363333333,"PerMMLU":0.4784030048,"PerCoR":0.5494,"Persian NLU":0.6262096694,"Persian NLG":0.1665903777}
|
| 24 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","Average":0.541,"Persian IFEval":0.7503924647,"Persian MT-Bench":0.6863924051,"PerMMLU":0.4682478959,"PerCoR":0.5915,"Persian NLU":0.6552152029,"Persian NLG":0.0940241349}
|
| 25 |
-
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.5252,"Persian IFEval":0.7474302496,"Persian MT-Bench":0.6607526882,"PerMMLU":0.4467552341,"PerCoR":0.5437,"Persian NLU":0.5968415875,"Persian NLG":0.1557270864}
|
| 26 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","Average":0.4996,"Persian IFEval":0.7444933921,"Persian MT-Bench":0.66825,"PerMMLU":0.3825554705,"PerCoR":0.4832,"Persian NLU":0.6241793507,"Persian NLG":0.0949943578}
|
| 27 |
-
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","Average":0.4945,"Persian IFEval":0.6989720999,"Persian MT-Bench":0.7085833333,"PerMMLU":0.4140641302,"PerCoR":0.6327,"Persian NLU":0.3928685253,"Persian NLG":0.1196400535}
|
| 28 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.4914,"Persian IFEval":0.6801242236,"Persian MT-Bench":0.6741666667,"PerMMLU":0.4979475405,"PerCoR":0.5272636318,"Persian NLU":0.456845738,"Persian NLG":0.112015688}
|
| 29 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","Average":0.4813,"Persian IFEval":0.5790251108,"Persian MT-Bench":0.6090833333,"PerMMLU":0.4046741323,"PerCoR":0.6,"Persian NLU":0.531045981,"Persian NLG":0.1641995602}
|
| 30 |
-
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","Average":0.4791,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.5599462366,"PerMMLU":0.4025179106,"PerCoR":0.5033,"Persian NLU":0.5121418762,"Persian NLG":0.1389297212}
|
| 31 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","Average":0.3252,"Persian IFEval":0.5447870778,"Persian MT-Bench":0.4333333333,"PerMMLU":0.2830214927,"PerCoR":0.2599,"Persian NLU":0.3619547874,"Persian NLG":0.0682994522}
|
| 32 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","Average":0.3039,"Persian IFEval":0.4405286344,"Persian MT-Bench":0.3398268398,"PerMMLU":0.2553383877,"PerCoR":0.3015,"Persian NLU":0.3916645306,"Persian NLG":0.0944140383}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","Average":0.2815,"Persian IFEval":0.5330396476,"Persian MT-Bench":0.3756410256,"PerMMLU":0.278430827,"PerCoR":0.2521,"Persian NLU":0.1368924446,"Persian NLG":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","Average":0.205,"Persian IFEval":0.3656387665,"Persian MT-Bench":0.2952160494,"PerMMLU":0.1987201781,"PerCoR":0.2412,"Persian NLU":0.046805056,"Persian NLG":0.0823387318}
|
| 35 |
-
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":null,"Persian IFEval":null,"Persian MT-Bench":null,"PerMMLU":null,"PerCoR":0.8551,"Persian NLU":null,"Persian NLG":null}
|
|
|
|
| 1 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7468,"Persian IFEval":0.926035503,"Persian MT-Bench":0.91375,"PerMMLU":0.8217442751,"PerCoR":0.9218,"Persian NLU":0.7207167537,"Persian NLG":0.1764906292}
|
| 2 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7332,"Persian IFEval":0.8911764706,"Persian MT-Bench":0.91075,"PerMMLU":0.8401114206,"PerCoR":0.9427585507,"Persian NLU":0.6992555201,"Persian NLG":0.1151518212}
|
| 3 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.7127,"Persian IFEval":0.8810572687,"Persian MT-Bench":0.8695,"PerMMLU":0.7214996174,"PerCoR":0.9117647059,"Persian NLU":0.7143086066,"Persian NLG":0.1779340777}
|
| 4 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6992,"Persian IFEval":0.8634361233,"Persian MT-Bench":0.87325,"PerMMLU":0.7040411769,"PerCoR":0.8839,"Persian NLU":0.6758278127,"Persian NLG":0.194675133}
|
| 5 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6986,"Persian IFEval":0.8796992481,"Persian MT-Bench":0.8812289562,"PerMMLU":0.7274624374,"PerCoR":0.8717,"Persian NLU":0.6944128198,"Persian NLG":0.1368740087}
|
| 6 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6886,"Persian IFEval":0.8497790869,"Persian MT-Bench":0.838973064,"PerMMLU":0.6957640676,"PerCoR":0.8637863786,"Persian NLU":0.7050532433,"Persian NLG":0.178231145}
|
| 7 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6877,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.8371666667,"PerMMLU":0.6884607359,"PerCoR":0.8665,"Persian NLU":0.7146808531,"Persian NLG":0.18964968}
|
| 8 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.664,"Persian IFEval":0.8414096916,"Persian MT-Bench":0.8244166667,"PerMMLU":0.6466578563,"PerCoR":0.8143,"Persian NLU":0.6914202844,"Persian NLG":0.1659339021}
|
| 9 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6556,"Persian IFEval":0.8340675477,"Persian MT-Bench":0.8418333333,"PerMMLU":0.6128538638,"PerCoR":0.7712,"Persian NLU":0.6833497104,"Persian NLG":0.1901206806}
|
| 10 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6549,"Persian IFEval":0.8370044053,"Persian MT-Bench":0.86175,"PerMMLU":0.6810513107,"PerCoR":0.825165033,"Persian NLU":0.6361186163,"Persian NLG":0.0880621978}
|
| 11 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","Average":0.6458,"Persian IFEval":0.8311306902,"Persian MT-Bench":0.8600833333,"PerMMLU":0.5908047576,"PerCoR":0.8241,"Persian NLU":0.6752949557,"Persian NLG":0.0934094344}
|
| 12 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","Average":0.6247,"Persian IFEval":0.8296622614,"Persian MT-Bench":0.796,"PerMMLU":0.5633303193,"PerCoR":0.7628,"Persian NLU":0.6898261633,"Persian NLG":0.1067134448}
|
| 13 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.6246,"Persian IFEval":0.8017621145,"Persian MT-Bench":0.7891666667,"PerMMLU":0.56986854,"PerCoR":0.7598,"Persian NLU":0.6459120734,"Persian NLG":0.1810678527}
|
| 14 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","Average":0.6224,"Persian IFEval":0.803030303,"Persian MT-Bench":0.7632996633,"PerMMLU":0.5635086255,"PerCoR":0.7654,"Persian NLU":0.6714091535,"Persian NLG":0.1679338638}
|
| 15 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","Average":0.613,"Persian IFEval":0.7125925926,"Persian MT-Bench":0.7172558923,"PerMMLU":0.5714086374,"PerCoR":0.7956,"Persian NLU":0.6800109206,"Persian NLG":0.2010896964}
|
| 16 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","Average":0.6098,"Persian IFEval":0.8438880707,"Persian MT-Bench":0.8219166667,"PerMMLU":0.5980651448,"PerCoR":0.798859772,"Persian NLU":0.4824528512,"Persian NLG":0.1137933652}
|
| 17 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","Average":0.6008,"Persian IFEval":0.8149779736,"Persian MT-Bench":0.75125,"PerMMLU":0.5105376643,"PerCoR":0.7094,"Persian NLU":0.699116864,"Persian NLG":0.1196804312}
|
| 18 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","Average":0.5939,"Persian IFEval":0.8325508607,"Persian MT-Bench":0.7431271478,"PerMMLU":0.5097725534,"PerCoR":0.688,"Persian NLU":0.6255818412,"Persian NLG":0.164118288}
|
| 19 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","Average":0.5912,"Persian IFEval":0.8105726872,"Persian MT-Bench":0.7204545455,"PerMMLU":0.5139458858,"PerCoR":0.6958,"Persian NLU":0.6460328733,"Persian NLG":0.16056333}
|
| 20 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","Average":0.5705,"Persian IFEval":0.7007407407,"Persian MT-Bench":0.688,"PerMMLU":0.4800723378,"PerCoR":0.7364,"Persian NLU":0.6297634971,"Persian NLG":0.1880477876}
|
| 21 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","Average":0.5576,"Persian IFEval":0.7526555387,"Persian MT-Bench":0.7290833333,"PerMMLU":0.4763231198,"PerCoR":0.6894,"Persian NLU":0.5661558794,"Persian NLG":0.1319091735}
|
| 22 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5546,"Persian IFEval":0.825256975,"Persian MT-Bench":0.7585,"PerMMLU":0.5440356745,"PerCoR":0.7160432086,"Persian NLU":0.3749414991,"Persian NLG":0.1089333827}
|
| 23 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":0.5524,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.7363333333,"PerMMLU":0.4784030048,"PerCoR":0.5494,"Persian NLU":0.6262096694,"Persian NLG":0.1665903777}
|
| 24 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","Average":0.541,"Persian IFEval":0.7503924647,"Persian MT-Bench":0.6863924051,"PerMMLU":0.4682478959,"PerCoR":0.5915,"Persian NLU":0.6552152029,"Persian NLG":0.0940241349}
|
| 25 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.5252,"Persian IFEval":0.7474302496,"Persian MT-Bench":0.6607526882,"PerMMLU":0.4467552341,"PerCoR":0.5437,"Persian NLU":0.5968415875,"Persian NLG":0.1557270864}
|
| 26 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","Average":0.4996,"Persian IFEval":0.7444933921,"Persian MT-Bench":0.66825,"PerMMLU":0.3825554705,"PerCoR":0.4832,"Persian NLU":0.6241793507,"Persian NLG":0.0949943578}
|
| 27 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","Average":0.4945,"Persian IFEval":0.6989720999,"Persian MT-Bench":0.7085833333,"PerMMLU":0.4140641302,"PerCoR":0.6327,"Persian NLU":0.3928685253,"Persian NLG":0.1196400535}
|
| 28 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","Average":0.4914,"Persian IFEval":0.6801242236,"Persian MT-Bench":0.6741666667,"PerMMLU":0.4979475405,"PerCoR":0.5272636318,"Persian NLU":0.456845738,"Persian NLG":0.112015688}
|
| 29 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","Average":0.4813,"Persian IFEval":0.5790251108,"Persian MT-Bench":0.6090833333,"PerMMLU":0.4046741323,"PerCoR":0.6,"Persian NLU":0.531045981,"Persian NLG":0.1641995602}
|
| 30 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","Average":0.4791,"Persian IFEval":0.7577092511,"Persian MT-Bench":0.5599462366,"PerMMLU":0.4025179106,"PerCoR":0.5033,"Persian NLU":0.5121418762,"Persian NLG":0.1389297212}
|
| 31 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","Average":0.3252,"Persian IFEval":0.5447870778,"Persian MT-Bench":0.4333333333,"PerMMLU":0.2830214927,"PerCoR":0.2599,"Persian NLU":0.3619547874,"Persian NLG":0.0682994522}
|
| 32 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","Average":0.3039,"Persian IFEval":0.4405286344,"Persian MT-Bench":0.3398268398,"PerMMLU":0.2553383877,"PerCoR":0.3015,"Persian NLU":0.3916645306,"Persian NLG":0.0944140383}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","Average":0.2815,"Persian IFEval":0.5330396476,"Persian MT-Bench":0.3756410256,"PerMMLU":0.278430827,"PerCoR":0.2521,"Persian NLU":0.1368924446,"Persian NLG":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","Average":0.205,"Persian IFEval":0.3656387665,"Persian MT-Bench":0.2952160494,"PerMMLU":0.1987201781,"PerCoR":0.2412,"Persian NLU":0.046805056,"Persian NLG":0.0823387318}
|
| 35 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","Average":null,"Persian IFEval":null,"Persian MT-Bench":null,"PerMMLU":null,"PerCoR":0.8551,"Persian NLU":null,"Persian NLG":null}
|
leaderboard/boards_data/extractive-qa_PQuAD.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.8957345972,"extractive-qa_PQuAD_f1":0.5899280585,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.7121215175,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":24.9289099526,"extractive-qa_PQuAD_f1":0.5952537387,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":12.0379146919,"extractive-qa_PQuAD_f1":0.5152644082,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":1.5165876777,"extractive-qa_PQuAD_f1":0.3221621809,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":7.0142180095,"extractive-qa_PQuAD_f1":0.4986764425,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.4739336493,"extractive-qa_PQuAD_f1":0.5660677645,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":15.6398104265,"extractive-qa_PQuAD_f1":0.4797901431,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.8530805687,"extractive-qa_PQuAD_f1":0.3570972648,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.2938388626,"extractive-qa_PQuAD_f1":0.7091014157,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":29.0995260664,"extractive-qa_PQuAD_f1":0.6500014945,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.5924170616,"extractive-qa_PQuAD_f1":0.7918102773,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":5.8767772512,"extractive-qa_PQuAD_f1":0.4459269248,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.6861140935,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.1184834123,"extractive-qa_PQuAD_f1":0.7795163265,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6885320288,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.5165876777,"extractive-qa_PQuAD_f1":0.6052090568,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.4739336493,"extractive-qa_PQuAD_f1":0.3440209421,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":17.5355450237,"extractive-qa_PQuAD_f1":0.5641459437,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":2.3696682464,"extractive-qa_PQuAD_f1":0.4003473594,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.663507109,"extractive-qa_PQuAD_f1":0.3378125221,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":49.5734597156,"extractive-qa_PQuAD_f1":0.7803597788,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.8957345972,"extractive-qa_PQuAD_f1":0.5899280585,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":29.8578199052,"extractive-qa_PQuAD_f1":0.6483891649,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":51.6587677725,"extractive-qa_PQuAD_f1":0.7997294818,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":16.2085308057,"extractive-qa_PQuAD_f1":0.5540542726,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.7121215175,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":24.9289099526,"extractive-qa_PQuAD_f1":0.5952537387,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":11.9431279621,"extractive-qa_PQuAD_f1":0.5054306037,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":12.0379146919,"extractive-qa_PQuAD_f1":0.5152644082,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":1.5165876777,"extractive-qa_PQuAD_f1":0.3221621809,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":13.0805687204,"extractive-qa_PQuAD_f1":0.5111951184,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":null,"extractive-qa_PQuAD_f1":null,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":7.0142180095,"extractive-qa_PQuAD_f1":0.4986764425,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":20.4739336493,"extractive-qa_PQuAD_f1":0.5660677645,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":15.6398104265,"extractive-qa_PQuAD_f1":0.4797901431,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.8530805687,"extractive-qa_PQuAD_f1":0.3570972648,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":38.2938388626,"extractive-qa_PQuAD_f1":0.7091014157,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":29.0995260664,"extractive-qa_PQuAD_f1":0.6500014945,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.5924170616,"extractive-qa_PQuAD_f1":0.7918102773,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":7.2037914692,"extractive-qa_PQuAD_f1":0.4722142546,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":5.8767772512,"extractive-qa_PQuAD_f1":0.4459269248,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":37.4407582938,"extractive-qa_PQuAD_f1":0.6861140935,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":45.1184834123,"extractive-qa_PQuAD_f1":0.7795163265,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":33.2701421801,"extractive-qa_PQuAD_f1":0.6885320288,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":41.990521327,"extractive-qa_PQuAD_f1":0.7401025641,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":21.5165876777,"extractive-qa_PQuAD_f1":0.6052090568,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.8957345972,"extractive-qa_PQuAD_f1":0.4954484984,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.4739336493,"extractive-qa_PQuAD_f1":0.3440209421,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":17.5355450237,"extractive-qa_PQuAD_f1":0.5641459437,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":36.7772511848,"extractive-qa_PQuAD_f1":0.7059801524,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":1.4218009479,"extractive-qa_PQuAD_f1":0.6109462131,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":2.3696682464,"extractive-qa_PQuAD_f1":0.4003473594,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","extractive-qa_PQuAD_exact_match":0.663507109,"extractive-qa_PQuAD_f1":0.3378125221,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","extractive-qa_PQuAD_exact_match":49.5734597156,"extractive-qa_PQuAD_f1":0.7803597788,"nlu_score":0.6992555201}
|
leaderboard/boards_data/ifeval.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"o3","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8967032967,"strict_instruction_accuracy":0.926035503,"loose_prompt_accuracy":0.9076923077,"loose_instruction_accuracy":0.9378698225,"strict_combination_category":0.859375,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.965034965,"strict_language_category":1.0,"strict_length_constraints_category":0.8852459016,"strict_punctuation_category":0.95,"strict_startend_category":0.9682539683,"loose_combination_category":0.875,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.979020979,"loose_language_category":1.0,"loose_length_constraints_category":0.9016393443,"loose_punctuation_category":1.0,"loose_startend_category":0.9682539683}
|
| 2 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8464912281,"strict_instruction_accuracy":0.8911764706,"loose_prompt_accuracy":0.8815789474,"loose_instruction_accuracy":0.9191176471,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9777777778,"strict_detectable_format_category":0.8707482993,"strict_keywords_category":0.875862069,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8211382114,"strict_punctuation_category":1.0,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9777777778,"loose_detectable_format_category":0.8707482993,"loose_keywords_category":0.9172413793,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9268292683,"loose_punctuation_category":1.0,"loose_startend_category":0.9523809524}
|
| 3 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8336980306,"strict_instruction_accuracy":0.8810572687,"loose_prompt_accuracy":0.8774617068,"loose_instruction_accuracy":0.9148311307,"strict_combination_category":0.8307692308,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8965517241,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.8536585366,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9682539683}
|
| 4 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.836689038,"strict_instruction_accuracy":0.8796992481,"loose_prompt_accuracy":0.8680089485,"loose_instruction_accuracy":0.9067669173,"strict_combination_category":0.8,"strict_detectable_content_category":0.9772727273,"strict_detectable_format_category":0.8689655172,"strict_keywords_category":0.8623188406,"strict_language_category":0.935483871,"strict_length_constraints_category":0.8166666667,"strict_punctuation_category":0.9666666667,"strict_startend_category":0.9677419355,"loose_combination_category":0.8307692308,"loose_detectable_content_category":0.9772727273,"loose_detectable_format_category":0.875862069,"loose_keywords_category":0.9130434783,"loose_language_category":0.935483871,"loose_length_constraints_category":0.8666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9677419355}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8140043764,"strict_instruction_accuracy":0.8634361233,"loose_prompt_accuracy":0.8512035011,"loose_instruction_accuracy":0.8942731278,"strict_combination_category":0.7846153846,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.8482758621,"strict_language_category":1.0,"strict_length_constraints_category":0.7804878049,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9523809524,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.9103448276,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9523809524}
|
| 6 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7916666667,"strict_instruction_accuracy":0.8497790869,"loose_prompt_accuracy":0.8245614035,"loose_instruction_accuracy":0.8777614138,"strict_combination_category":0.6875,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7916666667,"strict_language_category":1.0,"strict_length_constraints_category":0.7642276423,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.9523809524,"loose_combination_category":0.703125,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.8541666667,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.9672131148,"loose_startend_category":0.9523809524}
|
| 7 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7802197802,"strict_instruction_accuracy":0.8438880707,"loose_prompt_accuracy":0.832967033,"loose_instruction_accuracy":0.88365243,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.7685950413,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.9047619048,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8482758621,"loose_language_category":1.0,"loose_length_constraints_category":0.8429752066,"loose_punctuation_category":0.868852459,"loose_startend_category":0.9365079365}
|
| 8 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7921225383,"strict_instruction_accuracy":0.8414096916,"loose_prompt_accuracy":0.8161925602,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8367346939,"strict_keywords_category":0.8137931034,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7642276423,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.9523809524,"loose_combination_category":0.7384615385,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8048780488,"loose_punctuation_category":0.9672131148,"loose_startend_category":0.9523809524}
|
| 9 |
-
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8370044053,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8707782673,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.7862068966,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.8571428571,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8571428571,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.8048780488,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
| 10 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8340675477,"loose_prompt_accuracy":0.7986870897,"loose_instruction_accuracy":0.8575624082,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8344827586,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7154471545,"strict_punctuation_category":0.9180327869,"strict_startend_category":0.9523809524,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8827586207,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.756097561,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9523809524}
|
| 11 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","strict_prompt_accuracy":0.7662037037,"strict_instruction_accuracy":0.8325508607,"loose_prompt_accuracy":0.8078703704,"loose_instruction_accuracy":0.8638497653,"strict_combination_category":0.6349206349,"strict_detectable_content_category":0.8837209302,"strict_detectable_format_category":0.9136690647,"strict_keywords_category":0.7954545455,"strict_language_category":0.9655172414,"strict_length_constraints_category":0.7192982456,"strict_punctuation_category":0.9655172414,"strict_startend_category":0.9180327869,"loose_combination_category":0.7936507937,"loose_detectable_content_category":0.8837209302,"loose_detectable_format_category":0.928057554,"loose_keywords_category":0.8257575758,"loose_language_category":0.9655172414,"loose_length_constraints_category":0.7543859649,"loose_punctuation_category":0.9655172414,"loose_startend_category":0.9180327869}
|
| 12 |
-
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8311306902,"loose_prompt_accuracy":0.8205689278,"loose_instruction_accuracy":0.8693098385,"strict_combination_category":0.7846153846,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.8,"strict_language_category":1.0,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.8888888889,"loose_combination_category":0.8153846154,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.8639455782,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9365079365}
|
| 13 |
-
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.8,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.8360655738,"strict_startend_category":0.9206349206,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9365079365}
|
| 14 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","strict_prompt_accuracy":0.7636761488,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8052516411,"loose_instruction_accuracy":0.8634361233,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.7931034483,"strict_language_category":1.0,"strict_length_constraints_category":0.7886178862,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.873015873,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8780487805,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.873015873}
|
| 15 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.761487965,"strict_instruction_accuracy":0.825256975,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.8413793103,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6097560976,"strict_punctuation_category":1.0,"strict_startend_category":0.8888888889,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.875862069,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
| 16 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","strict_prompt_accuracy":0.7396061269,"strict_instruction_accuracy":0.8149779736,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7586206897,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.8888888889,"loose_combination_category":0.6923076923,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8911564626,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.8292682927,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.9047619048}
|
| 17 |
-
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7483588621,"strict_instruction_accuracy":0.8105726872,"loose_prompt_accuracy":0.7899343545,"loose_instruction_accuracy":0.845814978,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7724137931,"strict_language_category":0.935483871,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.7049180328,"strict_startend_category":0.8888888889,"loose_combination_category":0.7692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8413793103,"loose_language_category":0.935483871,"loose_length_constraints_category":0.7642276423,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.9523809524}
|
| 18 |
-
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7342342342,"strict_instruction_accuracy":0.803030303,"loose_prompt_accuracy":0.786036036,"loose_instruction_accuracy":0.846969697,"strict_combination_category":0.625,"strict_detectable_content_category":0.9111111111,"strict_detectable_format_category":0.8951048951,"strict_keywords_category":0.7785714286,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6140350877,"strict_punctuation_category":0.9333333333,"strict_startend_category":0.8888888889,"loose_combination_category":0.6875,"loose_detectable_content_category":0.9111111111,"loose_detectable_format_category":0.9090909091,"loose_keywords_category":0.85,"loose_language_category":1.0,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":0.95,"loose_startend_category":0.9682539683}
|
| 19 |
-
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7308533917,"strict_instruction_accuracy":0.8017621145,"loose_prompt_accuracy":0.772428884,"loose_instruction_accuracy":0.8355359765,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.843537415,"strict_keywords_category":0.7379310345,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9016393443,"strict_startend_category":0.9365079365,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.7793103448,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7398373984,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9365079365}
|
| 20 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6827133479,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7199124726,"loose_instruction_accuracy":0.7885462555,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8027210884,"strict_keywords_category":0.7793103448,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7704918033,"strict_startend_category":0.8095238095,"loose_combination_category":0.6153846154,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.8344827586,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.8412698413}
|
| 21 |
-
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","strict_prompt_accuracy":0.6717724289,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7264770241,"loose_instruction_accuracy":0.798825257,"strict_combination_category":0.5384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7172413793,"strict_language_category":0.8387096774,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.8196721311,"strict_startend_category":0.7142857143,"loose_combination_category":0.5692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.7931034483,"loose_language_category":0.8709677419,"loose_length_constraints_category":0.7235772358,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.746031746}
|
| 22 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6644144144,"strict_instruction_accuracy":0.7526555387,"loose_prompt_accuracy":0.7274774775,"loose_instruction_accuracy":0.7996965099,"strict_combination_category":0.6153846154,"strict_detectable_content_category":0.8444444444,"strict_detectable_format_category":0.8014184397,"strict_keywords_category":0.7642857143,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.641025641,"strict_punctuation_category":0.7333333333,"strict_startend_category":0.8166666667,"loose_combination_category":0.6615384615,"loose_detectable_content_category":0.8444444444,"loose_detectable_format_category":0.8156028369,"loose_keywords_category":0.8071428571,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7606837607,"loose_punctuation_category":0.8,"loose_startend_category":0.85}
|
| 23 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","strict_prompt_accuracy":0.6674418605,"strict_instruction_accuracy":0.7503924647,"loose_prompt_accuracy":0.7139534884,"loose_instruction_accuracy":0.7912087912,"strict_combination_category":0.5396825397,"strict_detectable_content_category":0.7435897436,"strict_detectable_format_category":0.8581560284,"strict_keywords_category":0.7067669173,"strict_language_category":1.0,"strict_length_constraints_category":0.6754385965,"strict_punctuation_category":0.8644067797,"strict_startend_category":0.7192982456,"loose_combination_category":0.5873015873,"loose_detectable_content_category":0.7435897436,"loose_detectable_format_category":0.8723404255,"loose_keywords_category":0.7518796992,"loose_language_category":1.0,"loose_length_constraints_category":0.7719298246,"loose_punctuation_category":0.8813559322,"loose_startend_category":0.7719298246}
|
| 24 |
-
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.6542669584,"strict_instruction_accuracy":0.7474302496,"loose_prompt_accuracy":0.7089715536,"loose_instruction_accuracy":0.7900146843,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8260869565,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.6689655172,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.8412698413,"loose_combination_category":0.6461538462,"loose_detectable_content_category":0.8260869565,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.7655172414,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6991869919,"loose_punctuation_category":0.7868852459,"loose_startend_category":0.873015873}
|
| 25 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6652078775,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7177242888,"loose_instruction_accuracy":0.7914831131,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8299319728,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7540983607,"strict_startend_category":0.7301587302,"loose_combination_category":0.5538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.7931034483,"loose_language_category":1.0,"loose_length_constraints_category":0.7804878049,"loose_punctuation_category":0.8032786885,"loose_startend_category":0.7619047619}
|
| 26 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","strict_prompt_accuracy":0.6208425721,"strict_instruction_accuracy":0.7125925926,"loose_prompt_accuracy":0.6518847007,"loose_instruction_accuracy":0.7392592593,"strict_combination_category":0.65625,"strict_detectable_content_category":0.8043478261,"strict_detectable_format_category":0.8333333333,"strict_keywords_category":0.7342657343,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.5365853659,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.5873015873,"loose_combination_category":0.65625,"loose_detectable_content_category":0.8043478261,"loose_detectable_format_category":0.8402777778,"loose_keywords_category":0.7692307692,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.6349206349}
|
| 27 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6114790287,"strict_instruction_accuracy":0.7007407407,"loose_prompt_accuracy":0.6379690949,"loose_instruction_accuracy":0.7274074074,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8,"strict_detectable_format_category":0.801369863,"strict_keywords_category":0.7872340426,"strict_language_category":0.6774193548,"strict_length_constraints_category":0.6422764228,"strict_punctuation_category":0.6229508197,"strict_startend_category":0.5238095238,"loose_combination_category":0.5846153846,"loose_detectable_content_category":0.8,"loose_detectable_format_category":0.8287671233,"loose_keywords_category":0.8156028369,"loose_language_category":0.6774193548,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.6393442623,"loose_startend_category":0.5238095238}
|
| 28 |
-
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6126914661,"strict_instruction_accuracy":0.6989720999,"loose_prompt_accuracy":0.6805251641,"loose_instruction_accuracy":0.7547723935,"strict_combination_category":0.5230769231,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.593495935,"strict_punctuation_category":0.2950819672,"strict_startend_category":0.873015873,"loose_combination_category":0.6307692308,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.7379310345,"loose_language_category":1.0,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.3606557377,"loose_startend_category":0.8888888889}
|
| 29 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.5898617512,"strict_instruction_accuracy":0.6801242236,"loose_prompt_accuracy":0.6520737327,"loose_instruction_accuracy":0.7313664596,"strict_combination_category":0.606557377,"strict_detectable_content_category":0.9285714286,"strict_detectable_format_category":0.8028169014,"strict_keywords_category":0.7338129496,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.5840707965,"strict_punctuation_category":0.1964285714,"strict_startend_category":0.6833333333,"loose_combination_category":0.6393442623,"loose_detectable_content_category":0.9285714286,"loose_detectable_format_category":0.8169014085,"loose_keywords_category":0.7625899281,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6814159292,"loose_punctuation_category":0.3392857143,"loose_startend_category":0.7833333333}
|
| 30 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4835164835,"strict_instruction_accuracy":0.5790251108,"loose_prompt_accuracy":0.5384615385,"loose_instruction_accuracy":0.6203840473,"strict_combination_category":0.3384615385,"strict_detectable_content_category":0.6444444444,"strict_detectable_format_category":0.7414965986,"strict_keywords_category":0.7062937063,"strict_language_category":0.6451612903,"strict_length_constraints_category":0.6260162602,"strict_punctuation_category":0.1333333333,"strict_startend_category":0.4126984127,"loose_combination_category":0.4,"loose_detectable_content_category":0.6444444444,"loose_detectable_format_category":0.7551020408,"loose_keywords_category":0.7412587413,"loose_language_category":0.6451612903,"loose_length_constraints_category":0.6910569106,"loose_punctuation_category":0.2166666667,"loose_startend_category":0.4761904762}
|
| 31 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4376367615,"strict_instruction_accuracy":0.5447870778,"loose_prompt_accuracy":0.4748358862,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.1384615385,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.768707483,"strict_keywords_category":0.5862068966,"strict_language_category":0.8709677419,"strict_length_constraints_category":0.5853658537,"strict_punctuation_category":0.1803278689,"strict_startend_category":0.3015873016,"loose_combination_category":0.1384615385,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.768707483,"loose_keywords_category":0.6413793103,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6829268293,"loose_punctuation_category":0.2295081967,"loose_startend_category":0.3174603175}
|
| 32 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","strict_prompt_accuracy":0.4245076586,"strict_instruction_accuracy":0.5330396476,"loose_prompt_accuracy":0.4792122538,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.2461538462,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.6258503401,"strict_keywords_category":0.6137931034,"strict_language_category":0.5806451613,"strict_length_constraints_category":0.5772357724,"strict_punctuation_category":0.5409836066,"strict_startend_category":0.3174603175,"loose_combination_category":0.3230769231,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.6462585034,"loose_keywords_category":0.6551724138,"loose_language_category":0.5806451613,"loose_length_constraints_category":0.674796748,"loose_punctuation_category":0.606557377,"loose_startend_category":0.3650793651}
|
| 33 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","strict_prompt_accuracy":0.3369803063,"strict_instruction_accuracy":0.4405286344,"loose_prompt_accuracy":0.398249453,"loose_instruction_accuracy":0.4948604993,"strict_combination_category":0.0307692308,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.5034013605,"strict_keywords_category":0.6068965517,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.4146341463,"strict_punctuation_category":0.2459016393,"strict_startend_category":0.2857142857,"loose_combination_category":0.0923076923,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.5170068027,"loose_keywords_category":0.6482758621,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.5447154472,"loose_punctuation_category":0.3278688525,"loose_startend_category":0.3492063492}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","strict_prompt_accuracy":0.2516411379,"strict_instruction_accuracy":0.3656387665,"loose_prompt_accuracy":0.284463895,"loose_instruction_accuracy":0.4111600587,"strict_combination_category":0.0615384615,"strict_detectable_content_category":0.3913043478,"strict_detectable_format_category":0.3945578231,"strict_keywords_category":0.4,"strict_language_category":0.5161290323,"strict_length_constraints_category":0.4959349593,"strict_punctuation_category":0.3606557377,"strict_startend_category":0.1904761905,"loose_combination_category":0.0615384615,"loose_detectable_content_category":0.3913043478,"loose_detectable_format_category":0.4013605442,"loose_keywords_category":0.4551724138,"loose_language_category":0.5161290323,"loose_length_constraints_category":0.5691056911,"loose_punctuation_category":0.4918032787,"loose_startend_category":0.2698412698}
|
| 35 |
-
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":null,"strict_instruction_accuracy":null,"loose_prompt_accuracy":null,"loose_instruction_accuracy":null,"strict_combination_category":null,"strict_detectable_content_category":null,"strict_detectable_format_category":null,"strict_keywords_category":null,"strict_language_category":null,"strict_length_constraints_category":null,"strict_punctuation_category":null,"strict_startend_category":null,"loose_combination_category":null,"loose_detectable_content_category":null,"loose_detectable_format_category":null,"loose_keywords_category":null,"loose_language_category":null,"loose_length_constraints_category":null,"loose_punctuation_category":null,"loose_startend_category":null}
|
|
|
|
| 1 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8967032967,"strict_instruction_accuracy":0.926035503,"loose_prompt_accuracy":0.9076923077,"loose_instruction_accuracy":0.9378698225,"strict_combination_category":0.859375,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.965034965,"strict_language_category":1.0,"strict_length_constraints_category":0.8852459016,"strict_punctuation_category":0.95,"strict_startend_category":0.9682539683,"loose_combination_category":0.875,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.979020979,"loose_language_category":1.0,"loose_length_constraints_category":0.9016393443,"loose_punctuation_category":1.0,"loose_startend_category":0.9682539683}
|
| 2 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8464912281,"strict_instruction_accuracy":0.8911764706,"loose_prompt_accuracy":0.8815789474,"loose_instruction_accuracy":0.9191176471,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9777777778,"strict_detectable_format_category":0.8707482993,"strict_keywords_category":0.875862069,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.8211382114,"strict_punctuation_category":1.0,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9777777778,"loose_detectable_format_category":0.8707482993,"loose_keywords_category":0.9172413793,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.9268292683,"loose_punctuation_category":1.0,"loose_startend_category":0.9523809524}
|
| 3 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8336980306,"strict_instruction_accuracy":0.8810572687,"loose_prompt_accuracy":0.8774617068,"loose_instruction_accuracy":0.9148311307,"strict_combination_category":0.8307692308,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8965517241,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9508196721,"strict_startend_category":0.9523809524,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.9448275862,"loose_language_category":1.0,"loose_length_constraints_category":0.8536585366,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9682539683}
|
| 4 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.836689038,"strict_instruction_accuracy":0.8796992481,"loose_prompt_accuracy":0.8680089485,"loose_instruction_accuracy":0.9067669173,"strict_combination_category":0.8,"strict_detectable_content_category":0.9772727273,"strict_detectable_format_category":0.8689655172,"strict_keywords_category":0.8623188406,"strict_language_category":0.935483871,"strict_length_constraints_category":0.8166666667,"strict_punctuation_category":0.9666666667,"strict_startend_category":0.9677419355,"loose_combination_category":0.8307692308,"loose_detectable_content_category":0.9772727273,"loose_detectable_format_category":0.875862069,"loose_keywords_category":0.9130434783,"loose_language_category":0.935483871,"loose_length_constraints_category":0.8666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9677419355}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.8140043764,"strict_instruction_accuracy":0.8634361233,"loose_prompt_accuracy":0.8512035011,"loose_instruction_accuracy":0.8942731278,"strict_combination_category":0.7846153846,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.8482758621,"strict_language_category":1.0,"strict_length_constraints_category":0.7804878049,"strict_punctuation_category":0.868852459,"strict_startend_category":0.9523809524,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.9103448276,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9523809524}
|
| 6 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7916666667,"strict_instruction_accuracy":0.8497790869,"loose_prompt_accuracy":0.8245614035,"loose_instruction_accuracy":0.8777614138,"strict_combination_category":0.6875,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7916666667,"strict_language_category":1.0,"strict_length_constraints_category":0.7642276423,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.9523809524,"loose_combination_category":0.703125,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.9047619048,"loose_keywords_category":0.8541666667,"loose_language_category":1.0,"loose_length_constraints_category":0.837398374,"loose_punctuation_category":0.9672131148,"loose_startend_category":0.9523809524}
|
| 7 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7802197802,"strict_instruction_accuracy":0.8438880707,"loose_prompt_accuracy":0.832967033,"loose_instruction_accuracy":0.88365243,"strict_combination_category":0.8461538462,"strict_detectable_content_category":0.9347826087,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.7685950413,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.9047619048,"loose_combination_category":0.8461538462,"loose_detectable_content_category":0.9347826087,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8482758621,"loose_language_category":1.0,"loose_length_constraints_category":0.8429752066,"loose_punctuation_category":0.868852459,"loose_startend_category":0.9365079365}
|
| 8 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7921225383,"strict_instruction_accuracy":0.8414096916,"loose_prompt_accuracy":0.8161925602,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8367346939,"strict_keywords_category":0.8137931034,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7642276423,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.9523809524,"loose_combination_category":0.7384615385,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8048780488,"loose_punctuation_category":0.9672131148,"loose_startend_category":0.9523809524}
|
| 9 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8370044053,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8707782673,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.7862068966,"strict_language_category":1.0,"strict_length_constraints_category":0.756097561,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.8571428571,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.8571428571,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.8048780488,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
| 10 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8340675477,"loose_prompt_accuracy":0.7986870897,"loose_instruction_accuracy":0.8575624082,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.847826087,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.8344827586,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7154471545,"strict_punctuation_category":0.9180327869,"strict_startend_category":0.9523809524,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.847826087,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.8827586207,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.756097561,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9523809524}
|
| 11 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","strict_prompt_accuracy":0.7662037037,"strict_instruction_accuracy":0.8325508607,"loose_prompt_accuracy":0.8078703704,"loose_instruction_accuracy":0.8638497653,"strict_combination_category":0.6349206349,"strict_detectable_content_category":0.8837209302,"strict_detectable_format_category":0.9136690647,"strict_keywords_category":0.7954545455,"strict_language_category":0.9655172414,"strict_length_constraints_category":0.7192982456,"strict_punctuation_category":0.9655172414,"strict_startend_category":0.9180327869,"loose_combination_category":0.7936507937,"loose_detectable_content_category":0.8837209302,"loose_detectable_format_category":0.928057554,"loose_keywords_category":0.8257575758,"loose_language_category":0.9655172414,"loose_length_constraints_category":0.7543859649,"loose_punctuation_category":0.9655172414,"loose_startend_category":0.9180327869}
|
| 12 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","strict_prompt_accuracy":0.7702407002,"strict_instruction_accuracy":0.8311306902,"loose_prompt_accuracy":0.8205689278,"loose_instruction_accuracy":0.8693098385,"strict_combination_category":0.7846153846,"strict_detectable_content_category":1.0,"strict_detectable_format_category":0.8503401361,"strict_keywords_category":0.8,"strict_language_category":1.0,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9672131148,"strict_startend_category":0.8888888889,"loose_combination_category":0.8153846154,"loose_detectable_content_category":1.0,"loose_detectable_format_category":0.8639455782,"loose_keywords_category":0.8551724138,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.9365079365}
|
| 13 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.772428884,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8140043764,"loose_instruction_accuracy":0.8649045521,"strict_combination_category":0.8,"strict_detectable_content_category":0.9782608696,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7793103448,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.8360655738,"strict_startend_category":0.9206349206,"loose_combination_category":0.8153846154,"loose_detectable_content_category":0.9782608696,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.7479674797,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.9365079365}
|
| 14 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","strict_prompt_accuracy":0.7636761488,"strict_instruction_accuracy":0.8296622614,"loose_prompt_accuracy":0.8052516411,"loose_instruction_accuracy":0.8634361233,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8095238095,"strict_keywords_category":0.7931034483,"strict_language_category":1.0,"strict_length_constraints_category":0.7886178862,"strict_punctuation_category":0.9836065574,"strict_startend_category":0.873015873,"loose_combination_category":0.7230769231,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.875862069,"loose_language_category":1.0,"loose_length_constraints_category":0.8780487805,"loose_punctuation_category":0.9836065574,"loose_startend_category":0.873015873}
|
| 15 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.761487965,"strict_instruction_accuracy":0.825256975,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.9565217391,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.8413793103,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6097560976,"strict_punctuation_category":1.0,"strict_startend_category":0.8888888889,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.9565217391,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.875862069,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":1.0,"loose_startend_category":0.9206349206}
|
| 16 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","strict_prompt_accuracy":0.7396061269,"strict_instruction_accuracy":0.8149779736,"loose_prompt_accuracy":0.7877461707,"loose_instruction_accuracy":0.8516886931,"strict_combination_category":0.6923076923,"strict_detectable_content_category":0.9130434783,"strict_detectable_format_category":0.8911564626,"strict_keywords_category":0.7586206897,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.8032786885,"strict_startend_category":0.8888888889,"loose_combination_category":0.6923076923,"loose_detectable_content_category":0.9130434783,"loose_detectable_format_category":0.8911564626,"loose_keywords_category":0.8344827586,"loose_language_category":1.0,"loose_length_constraints_category":0.8292682927,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.9047619048}
|
| 17 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7483588621,"strict_instruction_accuracy":0.8105726872,"loose_prompt_accuracy":0.7899343545,"loose_instruction_accuracy":0.845814978,"strict_combination_category":0.7230769231,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8979591837,"strict_keywords_category":0.7724137931,"strict_language_category":0.935483871,"strict_length_constraints_category":0.7479674797,"strict_punctuation_category":0.7049180328,"strict_startend_category":0.8888888889,"loose_combination_category":0.7692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.8413793103,"loose_language_category":0.935483871,"loose_length_constraints_category":0.7642276423,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.9523809524}
|
| 18 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","strict_prompt_accuracy":0.7342342342,"strict_instruction_accuracy":0.803030303,"loose_prompt_accuracy":0.786036036,"loose_instruction_accuracy":0.846969697,"strict_combination_category":0.625,"strict_detectable_content_category":0.9111111111,"strict_detectable_format_category":0.8951048951,"strict_keywords_category":0.7785714286,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6140350877,"strict_punctuation_category":0.9333333333,"strict_startend_category":0.8888888889,"loose_combination_category":0.6875,"loose_detectable_content_category":0.9111111111,"loose_detectable_format_category":0.9090909091,"loose_keywords_category":0.85,"loose_language_category":1.0,"loose_length_constraints_category":0.6666666667,"loose_punctuation_category":0.95,"loose_startend_category":0.9682539683}
|
| 19 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.7308533917,"strict_instruction_accuracy":0.8017621145,"loose_prompt_accuracy":0.772428884,"loose_instruction_accuracy":0.8355359765,"strict_combination_category":0.7384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.843537415,"strict_keywords_category":0.7379310345,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.9016393443,"strict_startend_category":0.9365079365,"loose_combination_category":0.7538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8775510204,"loose_keywords_category":0.7793103448,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7398373984,"loose_punctuation_category":0.9344262295,"loose_startend_category":0.9365079365}
|
| 20 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":0.6827133479,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7199124726,"loose_instruction_accuracy":0.7885462555,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8027210884,"strict_keywords_category":0.7793103448,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7704918033,"strict_startend_category":0.8095238095,"loose_combination_category":0.6153846154,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8095238095,"loose_keywords_category":0.8344827586,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.8360655738,"loose_startend_category":0.8412698413}
|
| 21 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","strict_prompt_accuracy":0.6717724289,"strict_instruction_accuracy":0.7577092511,"loose_prompt_accuracy":0.7264770241,"loose_instruction_accuracy":0.798825257,"strict_combination_category":0.5384615385,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.9047619048,"strict_keywords_category":0.7172413793,"strict_language_category":0.8387096774,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.8196721311,"strict_startend_category":0.7142857143,"loose_combination_category":0.5692307692,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.9115646259,"loose_keywords_category":0.7931034483,"loose_language_category":0.8709677419,"loose_length_constraints_category":0.7235772358,"loose_punctuation_category":0.8852459016,"loose_startend_category":0.746031746}
|
| 22 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6644144144,"strict_instruction_accuracy":0.7526555387,"loose_prompt_accuracy":0.7274774775,"loose_instruction_accuracy":0.7996965099,"strict_combination_category":0.6153846154,"strict_detectable_content_category":0.8444444444,"strict_detectable_format_category":0.8014184397,"strict_keywords_category":0.7642857143,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.641025641,"strict_punctuation_category":0.7333333333,"strict_startend_category":0.8166666667,"loose_combination_category":0.6615384615,"loose_detectable_content_category":0.8444444444,"loose_detectable_format_category":0.8156028369,"loose_keywords_category":0.8071428571,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.7606837607,"loose_punctuation_category":0.8,"loose_startend_category":0.85}
|
| 23 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","strict_prompt_accuracy":0.6674418605,"strict_instruction_accuracy":0.7503924647,"loose_prompt_accuracy":0.7139534884,"loose_instruction_accuracy":0.7912087912,"strict_combination_category":0.5396825397,"strict_detectable_content_category":0.7435897436,"strict_detectable_format_category":0.8581560284,"strict_keywords_category":0.7067669173,"strict_language_category":1.0,"strict_length_constraints_category":0.6754385965,"strict_punctuation_category":0.8644067797,"strict_startend_category":0.7192982456,"loose_combination_category":0.5873015873,"loose_detectable_content_category":0.7435897436,"loose_detectable_format_category":0.8723404255,"loose_keywords_category":0.7518796992,"loose_language_category":1.0,"loose_length_constraints_category":0.7719298246,"loose_punctuation_category":0.8813559322,"loose_startend_category":0.7719298246}
|
| 24 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.6542669584,"strict_instruction_accuracy":0.7474302496,"loose_prompt_accuracy":0.7089715536,"loose_instruction_accuracy":0.7900146843,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8260869565,"strict_detectable_format_category":0.8775510204,"strict_keywords_category":0.6689655172,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.6666666667,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.8412698413,"loose_combination_category":0.6461538462,"loose_detectable_content_category":0.8260869565,"loose_detectable_format_category":0.8843537415,"loose_keywords_category":0.7655172414,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6991869919,"loose_punctuation_category":0.7868852459,"loose_startend_category":0.873015873}
|
| 25 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6652078775,"strict_instruction_accuracy":0.7444933921,"loose_prompt_accuracy":0.7177242888,"loose_instruction_accuracy":0.7914831131,"strict_combination_category":0.5076923077,"strict_detectable_content_category":0.8913043478,"strict_detectable_format_category":0.8299319728,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.6829268293,"strict_punctuation_category":0.7540983607,"strict_startend_category":0.7301587302,"loose_combination_category":0.5538461538,"loose_detectable_content_category":0.8913043478,"loose_detectable_format_category":0.8367346939,"loose_keywords_category":0.7931034483,"loose_language_category":1.0,"loose_length_constraints_category":0.7804878049,"loose_punctuation_category":0.8032786885,"loose_startend_category":0.7619047619}
|
| 26 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","strict_prompt_accuracy":0.6208425721,"strict_instruction_accuracy":0.7125925926,"loose_prompt_accuracy":0.6518847007,"loose_instruction_accuracy":0.7392592593,"strict_combination_category":0.65625,"strict_detectable_content_category":0.8043478261,"strict_detectable_format_category":0.8333333333,"strict_keywords_category":0.7342657343,"strict_language_category":0.9677419355,"strict_length_constraints_category":0.5365853659,"strict_punctuation_category":0.7213114754,"strict_startend_category":0.5873015873,"loose_combination_category":0.65625,"loose_detectable_content_category":0.8043478261,"loose_detectable_format_category":0.8402777778,"loose_keywords_category":0.7692307692,"loose_language_category":0.9677419355,"loose_length_constraints_category":0.593495935,"loose_punctuation_category":0.7540983607,"loose_startend_category":0.6349206349}
|
| 27 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","strict_prompt_accuracy":0.6114790287,"strict_instruction_accuracy":0.7007407407,"loose_prompt_accuracy":0.6379690949,"loose_instruction_accuracy":0.7274074074,"strict_combination_category":0.5846153846,"strict_detectable_content_category":0.8,"strict_detectable_format_category":0.801369863,"strict_keywords_category":0.7872340426,"strict_language_category":0.6774193548,"strict_length_constraints_category":0.6422764228,"strict_punctuation_category":0.6229508197,"strict_startend_category":0.5238095238,"loose_combination_category":0.5846153846,"loose_detectable_content_category":0.8,"loose_detectable_format_category":0.8287671233,"loose_keywords_category":0.8156028369,"loose_language_category":0.6774193548,"loose_length_constraints_category":0.7154471545,"loose_punctuation_category":0.6393442623,"loose_startend_category":0.5238095238}
|
| 28 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","strict_prompt_accuracy":0.6126914661,"strict_instruction_accuracy":0.6989720999,"loose_prompt_accuracy":0.6805251641,"loose_instruction_accuracy":0.7547723935,"strict_combination_category":0.5230769231,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.8571428571,"strict_keywords_category":0.7172413793,"strict_language_category":1.0,"strict_length_constraints_category":0.593495935,"strict_punctuation_category":0.2950819672,"strict_startend_category":0.873015873,"loose_combination_category":0.6307692308,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.8979591837,"loose_keywords_category":0.7379310345,"loose_language_category":1.0,"loose_length_constraints_category":0.7317073171,"loose_punctuation_category":0.3606557377,"loose_startend_category":0.8888888889}
|
| 29 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","strict_prompt_accuracy":0.5898617512,"strict_instruction_accuracy":0.6801242236,"loose_prompt_accuracy":0.6520737327,"loose_instruction_accuracy":0.7313664596,"strict_combination_category":0.606557377,"strict_detectable_content_category":0.9285714286,"strict_detectable_format_category":0.8028169014,"strict_keywords_category":0.7338129496,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.5840707965,"strict_punctuation_category":0.1964285714,"strict_startend_category":0.6833333333,"loose_combination_category":0.6393442623,"loose_detectable_content_category":0.9285714286,"loose_detectable_format_category":0.8169014085,"loose_keywords_category":0.7625899281,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6814159292,"loose_punctuation_category":0.3392857143,"loose_startend_category":0.7833333333}
|
| 30 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4835164835,"strict_instruction_accuracy":0.5790251108,"loose_prompt_accuracy":0.5384615385,"loose_instruction_accuracy":0.6203840473,"strict_combination_category":0.3384615385,"strict_detectable_content_category":0.6444444444,"strict_detectable_format_category":0.7414965986,"strict_keywords_category":0.7062937063,"strict_language_category":0.6451612903,"strict_length_constraints_category":0.6260162602,"strict_punctuation_category":0.1333333333,"strict_startend_category":0.4126984127,"loose_combination_category":0.4,"loose_detectable_content_category":0.6444444444,"loose_detectable_format_category":0.7551020408,"loose_keywords_category":0.7412587413,"loose_language_category":0.6451612903,"loose_length_constraints_category":0.6910569106,"loose_punctuation_category":0.2166666667,"loose_startend_category":0.4761904762}
|
| 31 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","strict_prompt_accuracy":0.4376367615,"strict_instruction_accuracy":0.5447870778,"loose_prompt_accuracy":0.4748358862,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.1384615385,"strict_detectable_content_category":0.7608695652,"strict_detectable_format_category":0.768707483,"strict_keywords_category":0.5862068966,"strict_language_category":0.8709677419,"strict_length_constraints_category":0.5853658537,"strict_punctuation_category":0.1803278689,"strict_startend_category":0.3015873016,"loose_combination_category":0.1384615385,"loose_detectable_content_category":0.7608695652,"loose_detectable_format_category":0.768707483,"loose_keywords_category":0.6413793103,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.6829268293,"loose_punctuation_category":0.2295081967,"loose_startend_category":0.3174603175}
|
| 32 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","strict_prompt_accuracy":0.4245076586,"strict_instruction_accuracy":0.5330396476,"loose_prompt_accuracy":0.4792122538,"loose_instruction_accuracy":0.5814977974,"strict_combination_category":0.2461538462,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.6258503401,"strict_keywords_category":0.6137931034,"strict_language_category":0.5806451613,"strict_length_constraints_category":0.5772357724,"strict_punctuation_category":0.5409836066,"strict_startend_category":0.3174603175,"loose_combination_category":0.3230769231,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.6462585034,"loose_keywords_category":0.6551724138,"loose_language_category":0.5806451613,"loose_length_constraints_category":0.674796748,"loose_punctuation_category":0.606557377,"loose_startend_category":0.3650793651}
|
| 33 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","strict_prompt_accuracy":0.3369803063,"strict_instruction_accuracy":0.4405286344,"loose_prompt_accuracy":0.398249453,"loose_instruction_accuracy":0.4948604993,"strict_combination_category":0.0307692308,"strict_detectable_content_category":0.5217391304,"strict_detectable_format_category":0.5034013605,"strict_keywords_category":0.6068965517,"strict_language_category":0.9032258065,"strict_length_constraints_category":0.4146341463,"strict_punctuation_category":0.2459016393,"strict_startend_category":0.2857142857,"loose_combination_category":0.0923076923,"loose_detectable_content_category":0.5217391304,"loose_detectable_format_category":0.5170068027,"loose_keywords_category":0.6482758621,"loose_language_category":0.9032258065,"loose_length_constraints_category":0.5447154472,"loose_punctuation_category":0.3278688525,"loose_startend_category":0.3492063492}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","strict_prompt_accuracy":0.2516411379,"strict_instruction_accuracy":0.3656387665,"loose_prompt_accuracy":0.284463895,"loose_instruction_accuracy":0.4111600587,"strict_combination_category":0.0615384615,"strict_detectable_content_category":0.3913043478,"strict_detectable_format_category":0.3945578231,"strict_keywords_category":0.4,"strict_language_category":0.5161290323,"strict_length_constraints_category":0.4959349593,"strict_punctuation_category":0.3606557377,"strict_startend_category":0.1904761905,"loose_combination_category":0.0615384615,"loose_detectable_content_category":0.3913043478,"loose_detectable_format_category":0.4013605442,"loose_keywords_category":0.4551724138,"loose_language_category":0.5161290323,"loose_length_constraints_category":0.5691056911,"loose_punctuation_category":0.4918032787,"loose_startend_category":0.2698412698}
|
| 35 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","strict_prompt_accuracy":null,"strict_instruction_accuracy":null,"loose_prompt_accuracy":null,"loose_instruction_accuracy":null,"strict_combination_category":null,"strict_detectable_content_category":null,"strict_detectable_format_category":null,"strict_keywords_category":null,"strict_language_category":null,"strict_length_constraints_category":null,"strict_punctuation_category":null,"strict_startend_category":null,"loose_combination_category":null,"loose_detectable_content_category":null,"loose_detectable_format_category":null,"loose_keywords_category":null,"loose_language_category":null,"loose_length_constraints_category":null,"loose_punctuation_category":null,"loose_startend_category":null}
|
leaderboard/boards_data/keyword-extraction_SynKeywords.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1409784417,"keyword-extraction_SynKeywords_precision_mean":0.1216706248,"keyword-extraction_SynKeywords_recall_mean":0.1832427536,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1555238683,"keyword-extraction_SynKeywords_precision_mean":0.1317069998,"keyword-extraction_SynKeywords_recall_mean":0.2076992754,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1901147976,"keyword-extraction_SynKeywords_precision_mean":0.1676428493,"keyword-extraction_SynKeywords_recall_mean":0.2307065217,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1986622723,"keyword-extraction_SynKeywords_precision_mean":0.1812999953,"keyword-extraction_SynKeywords_recall_mean":0.2295289855,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1470867235,"keyword-extraction_SynKeywords_precision_mean":0.1387418439,"keyword-extraction_SynKeywords_recall_mean":0.1666666667,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1559734933,"keyword-extraction_SynKeywords_precision_mean":0.1449240072,"keyword-extraction_SynKeywords_recall_mean":0.1766304348,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2115068728,"keyword-extraction_SynKeywords_precision_mean":0.1912410205,"keyword-extraction_SynKeywords_recall_mean":0.2483695652,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.229921048,"keyword-extraction_SynKeywords_precision_mean":0.21147343,"keyword-extraction_SynKeywords_recall_mean":0.2634963768,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0860842686,"keyword-extraction_SynKeywords_precision_mean":0.0757882818,"keyword-extraction_SynKeywords_recall_mean":0.1065217391,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2645652392,"keyword-extraction_SynKeywords_precision_mean":0.2349391249,"keyword-extraction_SynKeywords_recall_mean":0.3166666667,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1217550899,"keyword-extraction_SynKeywords_precision_mean":0.1020894964,"keyword-extraction_SynKeywords_recall_mean":0.1608695652,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":null,"keyword-extraction_SynKeywords_precision_mean":null,"keyword-extraction_SynKeywords_recall_mean":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3352048805,"keyword-extraction_SynKeywords_precision_mean":0.2914121808,"keyword-extraction_SynKeywords_recall_mean":0.4166666667,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2384077673,"keyword-extraction_SynKeywords_precision_mean":0.2041836259,"keyword-extraction_SynKeywords_recall_mean":0.3015398551,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.233766167,"keyword-extraction_SynKeywords_precision_mean":0.1893302534,"keyword-extraction_SynKeywords_recall_mean":0.3297101449,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1369232983,"keyword-extraction_SynKeywords_precision_mean":0.1117212542,"keyword-extraction_SynKeywords_recall_mean":0.1863224638,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2568096145,"keyword-extraction_SynKeywords_precision_mean":0.2483731877,"keyword-extraction_SynKeywords_recall_mean":0.2765873016,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1942845429,"keyword-extraction_SynKeywords_precision_mean":0.168197784,"keyword-extraction_SynKeywords_recall_mean":0.2451992754,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1409784417,"keyword-extraction_SynKeywords_precision_mean":0.1216706248,"keyword-extraction_SynKeywords_recall_mean":0.1832427536,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.261926093,"keyword-extraction_SynKeywords_precision_mean":0.2173028298,"keyword-extraction_SynKeywords_recall_mean":0.3492753623,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2175605056,"keyword-extraction_SynKeywords_precision_mean":0.1768294437,"keyword-extraction_SynKeywords_recall_mean":0.3029891304,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2097414246,"keyword-extraction_SynKeywords_precision_mean":0.1802822781,"keyword-extraction_SynKeywords_recall_mean":0.2621376812,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2199224821,"keyword-extraction_SynKeywords_precision_mean":0.1924904051,"keyword-extraction_SynKeywords_recall_mean":0.2695652174,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2876907753,"keyword-extraction_SynKeywords_precision_mean":0.2733133111,"keyword-extraction_SynKeywords_recall_mean":0.322192029,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1856116909,"keyword-extraction_SynKeywords_precision_mean":0.157770465,"keyword-extraction_SynKeywords_recall_mean":0.2412137681,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1555238683,"keyword-extraction_SynKeywords_precision_mean":0.1317069998,"keyword-extraction_SynKeywords_recall_mean":0.2076992754,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2783400189,"keyword-extraction_SynKeywords_precision_mean":0.2250927598,"keyword-extraction_SynKeywords_recall_mean":0.3842391304,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.1901147976,"keyword-extraction_SynKeywords_precision_mean":0.1676428493,"keyword-extraction_SynKeywords_recall_mean":0.2307065217,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.205802897,"keyword-extraction_SynKeywords_precision_mean":0.1860666658,"keyword-extraction_SynKeywords_recall_mean":0.2421195652,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1986622723,"keyword-extraction_SynKeywords_precision_mean":0.1812999953,"keyword-extraction_SynKeywords_recall_mean":0.2295289855,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1470867235,"keyword-extraction_SynKeywords_precision_mean":0.1387418439,"keyword-extraction_SynKeywords_recall_mean":0.1666666667,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0012077295,"keyword-extraction_SynKeywords_precision_mean":0.0013586957,"keyword-extraction_SynKeywords_recall_mean":0.0010869565,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1690961,"keyword-extraction_SynKeywords_precision_mean":0.1495665943,"keyword-extraction_SynKeywords_recall_mean":0.2049818841,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2080704644,"keyword-extraction_SynKeywords_precision_mean":0.1673321849,"keyword-extraction_SynKeywords_recall_mean":0.294384058,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2510623051,"keyword-extraction_SynKeywords_precision_mean":0.1899292026,"keyword-extraction_SynKeywords_recall_mean":0.4099637681,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.2160808904,"keyword-extraction_SynKeywords_precision_mean":0.1901842722,"keyword-extraction_SynKeywords_recall_mean":0.2683876812,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.3267802104,"keyword-extraction_SynKeywords_precision_mean":0.2985915051,"keyword-extraction_SynKeywords_recall_mean":0.3825181159,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.2600489896,"keyword-extraction_SynKeywords_precision_mean":0.2150796745,"keyword-extraction_SynKeywords_recall_mean":0.3497282609,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0224485659,"keyword-extraction_SynKeywords_precision_mean":0.0230331263,"keyword-extraction_SynKeywords_recall_mean":0.022826087,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","keyword-extraction_SynKeywords_f1_mean":0.0,"keyword-extraction_SynKeywords_precision_mean":0.0,"keyword-extraction_SynKeywords_recall_mean":0.0,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","keyword-extraction_SynKeywords_f1_mean":0.1559734933,"keyword-extraction_SynKeywords_precision_mean":0.1449240072,"keyword-extraction_SynKeywords_recall_mean":0.1766304348,"nlu_score":0.6992555201}
|
leaderboard/boards_data/mt_bench.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"o3","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":9.1428571429,"score_mean":9.1375,"writing_score_w_mean":8.8148148148,"writing_score_mean":8.8083333333,"roleplay_score_w_mean":8.5357142857,"roleplay_score_mean":8.55,"reasoning_score_w_mean":9.7,"reasoning_score_mean":9.675,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":9.15,"coding_score_mean":9.15,"extraction_score_w_mean":8.95,"extraction_score_mean":8.95,"stem_score_w_mean":9.35,"stem_score_mean":9.35,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":8.4444444444,"persian_general_knowledge_score_mean":8.5,"chatbot_rag_score_w_mean":9.7,"chatbot_rag_score_mean":9.6916666667}
|
| 2 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":9.1020408163,"score_mean":9.1075,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.6416666667,"roleplay_score_w_mean":8.7857142857,"roleplay_score_mean":8.7916666667,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.9666666667,"math_score_w_mean":9.6956521739,"math_score_mean":9.65,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":9.3,"extraction_score_mean":9.3,"stem_score_w_mean":8.75,"stem_score_mean":8.75,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":9.1851851852,"persian_general_knowledge_score_mean":9.2583333333,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.2166666667}
|
| 3 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7983539095,"score_mean":8.8122895623,"writing_score_w_mean":8.52,"writing_score_mean":8.5462962963,"roleplay_score_w_mean":8.2857142857,"roleplay_score_mean":8.2583333333,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.3666666667,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":8.8,"coding_score_mean":8.8,"extraction_score_w_mean":8.75,"extraction_score_mean":8.75,"stem_score_w_mean":8.7,"stem_score_mean":8.7,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.8148148148,"persian_general_knowledge_score_mean":8.9166666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1583333333}
|
| 4 |
-
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7428571429,"score_mean":8.7325,"writing_score_w_mean":8.4074074074,"writing_score_mean":8.3833333333,"roleplay_score_w_mean":8.6071428571,"roleplay_score_mean":8.625,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.9666666667,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":8.15,"coding_score_mean":8.15,"extraction_score_w_mean":8.45,"extraction_score_mean":8.45,"stem_score_w_mean":8.7,"stem_score_mean":8.7,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":7.7777777778,"persian_general_knowledge_score_mean":7.9333333333,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.2666666667}
|
| 5 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7510204082,"score_mean":8.695,"writing_score_w_mean":8.5925925926,"writing_score_mean":8.625,"roleplay_score_w_mean":8.2142857143,"roleplay_score_mean":8.2166666667,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.7833333333,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.2,"humanities_score_mean":9.2,"persian_general_knowledge_score_w_mean":8.1481481481,"persian_general_knowledge_score_mean":8.0,"chatbot_rag_score_w_mean":9.3666666667,"chatbot_rag_score_mean":9.375}
|
| 6 |
-
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5795918367,"score_mean":8.6175,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.65,"roleplay_score_w_mean":8.1785714286,"roleplay_score_mean":8.225,"reasoning_score_w_mean":8.9,"reasoning_score_mean":8.7416666667,"math_score_w_mean":9.3043478261,"math_score_mean":9.2,"coding_score_w_mean":8.75,"coding_score_mean":8.75,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":6.8148148148,"persian_general_knowledge_score_mean":7.2416666667,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1666666667}
|
| 7 |
-
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5102040816,"score_mean":8.6008333333,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.4916666667,"roleplay_score_w_mean":8.9285714286,"roleplay_score_mean":8.9666666667,"reasoning_score_w_mean":8.3666666667,"reasoning_score_mean":8.3083333333,"math_score_w_mean":9.1304347826,"math_score_mean":9.0,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":8.65,"extraction_score_mean":8.65,"stem_score_w_mean":9.05,"stem_score_mean":9.05,"humanities_score_w_mean":9.25,"humanities_score_mean":9.25,"persian_general_knowledge_score_w_mean":5.0740740741,"persian_general_knowledge_score_mean":5.4916666667,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.45}
|
| 8 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3183673469,"score_mean":8.4183333333,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.125,"roleplay_score_w_mean":8.0714285714,"roleplay_score_mean":8.0333333333,"reasoning_score_w_mean":8.1333333333,"reasoning_score_mean":8.0833333333,"math_score_w_mean":9.4347826087,"math_score_mean":9.35,"coding_score_w_mean":8.85,"coding_score_mean":8.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":5.2222222222,"persian_general_knowledge_score_mean":5.8083333333,"chatbot_rag_score_w_mean":9.4666666667,"chatbot_rag_score_mean":9.4333333333}
|
| 9 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.4522821577,"score_mean":8.3897306397,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.475,"roleplay_score_w_mean":8.6785714286,"roleplay_score_mean":8.65,"reasoning_score_w_mean":8.4,"reasoning_score_mean":8.3333333333,"math_score_w_mean":9.0434782609,"math_score_mean":8.9,"coding_score_w_mean":7.05,"coding_score_mean":7.05,"extraction_score_w_mean":7.6,"extraction_score_mean":7.6,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.4074074074,"persian_general_knowledge_score_mean":8.4166666667,"chatbot_rag_score_w_mean":9.1538461538,"chatbot_rag_score_mean":9.1481481481}
|
| 10 |
-
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3551020408,"score_mean":8.3716666667,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.175,"roleplay_score_w_mean":7.5,"roleplay_score_mean":7.45,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.4833333333,"math_score_w_mean":8.7391304348,"math_score_mean":8.8416666667,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.25,"extraction_score_mean":8.25,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":7.8888888889,"persian_general_knowledge_score_mean":7.9,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
| 11 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.212244898,"score_mean":8.2441666667,"writing_score_w_mean":8.4074074074,"writing_score_mean":8.3166666667,"roleplay_score_w_mean":7.7857142857,"roleplay_score_mean":7.7583333333,"reasoning_score_w_mean":6.7666666667,"reasoning_score_mean":6.8,"math_score_w_mean":9.2608695652,"math_score_mean":9.3,"coding_score_w_mean":8.4,"coding_score_mean":8.4,"extraction_score_w_mean":8.1,"extraction_score_mean":8.1,"stem_score_w_mean":8.35,"stem_score_mean":8.35,"humanities_score_w_mean":8.8,"humanities_score_mean":8.8,"persian_general_knowledge_score_w_mean":7.5925925926,"persian_general_knowledge_score_mean":7.5083333333,"chatbot_rag_score_w_mean":9.1,"chatbot_rag_score_mean":9.1083333333}
|
| 12 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","score_w_mean":8.1795918367,"score_mean":8.2191666667,"writing_score_w_mean":8.1851851852,"writing_score_mean":8.25,"roleplay_score_w_mean":8.2857142857,"roleplay_score_mean":8.3333333333,"reasoning_score_w_mean":7.5333333333,"reasoning_score_mean":7.5416666667,"math_score_w_mean":9.0869565217,"math_score_mean":8.95,"coding_score_w_mean":8.3,"coding_score_mean":8.3,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":6.2962962963,"persian_general_knowledge_score_mean":6.4916666667,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.325}
|
| 13 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","score_w_mean":7.8653061224,"score_mean":7.96,"writing_score_w_mean":8.3333333333,"writing_score_mean":8.3083333333,"roleplay_score_w_mean":7.6785714286,"roleplay_score_mean":7.5916666667,"reasoning_score_w_mean":7.0666666667,"reasoning_score_mean":7.2083333333,"math_score_w_mean":8.7826086957,"math_score_mean":8.65,"coding_score_w_mean":7.5,"coding_score_mean":7.5,"extraction_score_w_mean":8.4,"extraction_score_mean":8.4,"stem_score_w_mean":8.65,"stem_score_mean":8.65,"humanities_score_w_mean":8.85,"humanities_score_mean":8.85,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.3083333333,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1333333333}
|
| 14 |
-
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.8081632653,"score_mean":7.8916666667,"writing_score_w_mean":7.962962963,"writing_score_mean":7.9666666667,"roleplay_score_w_mean":7.6071428571,"roleplay_score_mean":7.5083333333,"reasoning_score_w_mean":6.7666666667,"reasoning_score_mean":6.7166666667,"math_score_w_mean":8.2608695652,"math_score_mean":8.225,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.3,"extraction_score_mean":8.3,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.8,"humanities_score_mean":8.8,"persian_general_knowledge_score_w_mean":5.3703703704,"persian_general_knowledge_score_mean":5.7666666667,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.2833333333}
|
| 15 |
-
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","score_w_mean":7.5371900826,"score_mean":7.632996633,"writing_score_w_mean":7.4074074074,"writing_score_mean":7.4083333333,"roleplay_score_w_mean":7.4642857143,"roleplay_score_mean":7.2833333333,"reasoning_score_w_mean":7.5185185185,"reasoning_score_mean":7.6296296296,"math_score_w_mean":9.0,"math_score_mean":8.85,"coding_score_w_mean":8.1,"coding_score_mean":8.1,"extraction_score_w_mean":7.75,"extraction_score_mean":7.75,"stem_score_w_mean":9.0,"stem_score_mean":9.0,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":1.8148148148,"persian_general_knowledge_score_mean":2.0083333333,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.25}
|
| 16 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.4857142857,"score_mean":7.585,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.0416666667,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":7.1833333333,"reasoning_score_w_mean":5.8,"reasoning_score_mean":5.65,"math_score_w_mean":8.4782608696,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":7.9,"extraction_score_mean":7.9,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.5,"humanities_score_mean":8.5,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.375,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
| 17 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","score_w_mean":7.3918367347,"score_mean":7.5125,"writing_score_w_mean":7.8888888889,"writing_score_mean":7.95,"roleplay_score_w_mean":7.7142857143,"roleplay_score_mean":7.6416666667,"reasoning_score_w_mean":5.4333333333,"reasoning_score_mean":5.3166666667,"math_score_w_mean":8.7391304348,"math_score_mean":8.7,"coding_score_w_mean":7.55,"coding_score_mean":7.55,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":3.1851851852,"persian_general_knowledge_score_mean":3.5166666667,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
| 18 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","score_w_mean":7.3613445378,"score_mean":7.4312714777,"writing_score_w_mean":7.0,"writing_score_mean":6.9083333333,"roleplay_score_w_mean":7.2307692308,"roleplay_score_mean":7.0185185185,"reasoning_score_w_mean":6.8518518519,"reasoning_score_mean":6.8981481481,"math_score_w_mean":9.5652173913,"math_score_mean":9.5,"coding_score_w_mean":8.6,"coding_score_mean":8.6,"extraction_score_w_mean":6.75,"extraction_score_mean":6.75,"stem_score_w_mean":8.2,"stem_score_mean":8.2,"humanities_score_w_mean":8.3,"humanities_score_mean":8.3,"persian_general_knowledge_score_w_mean":2.16,"persian_general_knowledge_score_mean":2.1851851852,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.3333333333}
|
| 19 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.2367346939,"score_mean":7.3633333333,"writing_score_w_mean":7.7407407407,"writing_score_mean":7.6666666667,"roleplay_score_w_mean":7.6428571429,"roleplay_score_mean":7.5416666667,"reasoning_score_w_mean":5.7,"reasoning_score_mean":6.0416666667,"math_score_w_mean":8.1304347826,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":7.85,"stem_score_mean":7.85,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.9259259259,"persian_general_knowledge_score_mean":4.3833333333,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
| 20 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","score_w_mean":7.2448979592,"score_mean":7.2908333333,"writing_score_w_mean":7.2592592593,"writing_score_mean":7.2333333333,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6666666667,"reasoning_score_w_mean":6.3333333333,"reasoning_score_mean":6.2,"math_score_w_mean":8.5217391304,"math_score_mean":8.3,"coding_score_w_mean":7.6,"coding_score_mean":7.6,"extraction_score_w_mean":7.95,"extraction_score_mean":7.95,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.1481481481,"persian_general_knowledge_score_mean":3.4583333333,"chatbot_rag_score_w_mean":9.5333333333,"chatbot_rag_score_mean":9.5}
|
| 21 |
-
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","score_w_mean":7.1769547325,"score_mean":7.2045454545,"writing_score_w_mean":7.2222222222,"writing_score_mean":7.2,"roleplay_score_w_mean":7.3214285714,"roleplay_score_mean":7.2583333333,"reasoning_score_w_mean":7.3666666667,"reasoning_score_mean":7.2833333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.325,"coding_score_w_mean":6.7,"coding_score_mean":6.7,"extraction_score_w_mean":7.05,"extraction_score_mean":7.05,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.7222222222,"humanities_score_mean":8.7222222222,"persian_general_knowledge_score_w_mean":1.6666666667,"persian_general_knowledge_score_mean":1.7333333333,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.425}
|
| 22 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","score_w_mean":7.132231405,"score_mean":7.1725589226,"writing_score_w_mean":7.2962962963,"writing_score_mean":7.35,"roleplay_score_w_mean":7.1428571429,"roleplay_score_mean":6.9416666667,"reasoning_score_w_mean":6.8148148148,"reasoning_score_mean":6.962962963,"math_score_w_mean":8.0434782609,"math_score_mean":7.925,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.4,"extraction_score_mean":7.4,"stem_score_w_mean":7.25,"stem_score_mean":7.25,"humanities_score_w_mean":8.35,"humanities_score_mean":8.35,"persian_general_knowledge_score_w_mean":4.2962962963,"persian_general_knowledge_score_mean":4.75,"chatbot_rag_score_w_mean":8.8666666667,"chatbot_rag_score_mean":8.875}
|
| 23 |
-
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","score_w_mean":7.0367346939,"score_mean":7.0858333333,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1416666667,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":5.3,"reasoning_score_mean":5.2416666667,"math_score_w_mean":7.0434782609,"math_score_mean":7.0166666667,"coding_score_w_mean":6.0,"coding_score_mean":6.0,"extraction_score_w_mean":7.2,"extraction_score_mean":7.2,"stem_score_w_mean":7.95,"stem_score_mean":7.95,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":4.1851851852,"persian_general_knowledge_score_mean":4.6333333333,"chatbot_rag_score_w_mean":9.4,"chatbot_rag_score_mean":9.3666666667}
|
| 24 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","score_w_mean":6.8285714286,"score_mean":6.88,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1833333333,"roleplay_score_w_mean":6.8571428571,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":4.5666666667,"reasoning_score_mean":4.5416666667,"math_score_w_mean":5.4347826087,"math_score_mean":5.2833333333,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.35,"extraction_score_mean":7.35,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.6,"humanities_score_mean":8.6,"persian_general_knowledge_score_w_mean":4.7407407407,"persian_general_knowledge_score_mean":5.3,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.3333333333}
|
| 25 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","score_w_mean":6.7684729064,"score_mean":6.8639240506,"writing_score_w_mean":7.3333333333,"writing_score_mean":7.3416666667,"roleplay_score_w_mean":7.3214285714,"roleplay_score_mean":7.225,"reasoning_score_w_mean":5.4642857143,"reasoning_score_mean":5.4444444444,"math_score_w_mean":8.7826086957,"math_score_mean":8.85,"coding_score_w_mean":6.5714285714,"coding_score_mean":6.5714285714,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":7.75,"stem_score_mean":7.75,"humanities_score_w_mean":8.1666666667,"humanities_score_mean":8.1666666667,"persian_general_knowledge_score_w_mean":1.5185185185,"persian_general_knowledge_score_mean":1.6833333333,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.175}
|
| 26 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.6693877551,"score_mean":6.7416666667,"writing_score_w_mean":7.0,"writing_score_mean":6.75,"roleplay_score_w_mean":6.1071428571,"roleplay_score_mean":5.7666666667,"reasoning_score_w_mean":4.7666666667,"reasoning_score_mean":4.7583333333,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":7.3,"coding_score_mean":7.3,"extraction_score_w_mean":7.2,"extraction_score_mean":7.2,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":8.6,"humanities_score_mean":8.6,"persian_general_knowledge_score_w_mean":1.037037037,"persian_general_knowledge_score_mean":1.025,"chatbot_rag_score_w_mean":9.1,"chatbot_rag_score_mean":9.1166666667}
|
| 27 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","score_w_mean":6.612244898,"score_mean":6.6825,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.075,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":6.8666666667,"reasoning_score_w_mean":4.1666666667,"reasoning_score_mean":4.0333333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.4916666667,"coding_score_w_mean":6.65,"coding_score_mean":6.65,"extraction_score_w_mean":6.0,"extraction_score_mean":6.0,"stem_score_w_mean":7.75,"stem_score_mean":7.75,"humanities_score_w_mean":8.4,"humanities_score_mean":8.4,"persian_general_knowledge_score_w_mean":1.2962962963,"persian_general_knowledge_score_mean":1.4,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1583333333}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.5991189427,"score_mean":6.6075268817,"writing_score_w_mean":6.375,"writing_score_mean":6.2037037037,"roleplay_score_w_mean":6.5263157895,"roleplay_score_mean":6.2738095238,"reasoning_score_w_mean":6.2142857143,"reasoning_score_mean":6.1944444444,"math_score_w_mean":8.652173913,"math_score_mean":8.45,"coding_score_w_mean":5.95,"coding_score_mean":5.95,"extraction_score_w_mean":7.15,"extraction_score_mean":7.15,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":7.5555555556,"humanities_score_mean":7.5555555556,"persian_general_knowledge_score_w_mean":1.08,"persian_general_knowledge_score_mean":1.1111111111,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
| 29 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","score_w_mean":6.0816326531,"score_mean":6.0908333333,"writing_score_w_mean":7.8148148148,"writing_score_mean":7.925,"roleplay_score_w_mean":6.6071428571,"roleplay_score_mean":6.275,"reasoning_score_w_mean":3.8666666667,"reasoning_score_mean":3.7416666667,"math_score_w_mean":3.652173913,"math_score_mean":3.475,"coding_score_w_mean":5.2,"coding_score_mean":5.2,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":6.85,"stem_score_mean":6.85,"humanities_score_w_mean":8.45,"humanities_score_mean":8.45,"persian_general_knowledge_score_w_mean":3.2962962963,"persian_general_knowledge_score_mean":3.8583333333,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1833333333}
|
| 30 |
-
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","score_w_mean":5.6592920354,"score_mean":5.5994623656,"writing_score_w_mean":5.5416666667,"writing_score_mean":5.1944444444,"roleplay_score_w_mean":5.4230769231,"roleplay_score_mean":4.962962963,"reasoning_score_w_mean":3.6923076923,"reasoning_score_mean":3.9351851852,"math_score_w_mean":7.9565217391,"math_score_mean":7.9416666667,"coding_score_w_mean":6.3888888889,"coding_score_mean":6.3888888889,"extraction_score_w_mean":5.85,"extraction_score_mean":5.85,"stem_score_w_mean":4.85,"stem_score_mean":4.85,"humanities_score_w_mean":5.6111111111,"humanities_score_mean":5.6111111111,"persian_general_knowledge_score_w_mean":1.0476190476,"persian_general_knowledge_score_mean":1.0416666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
| 31 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","score_w_mean":4.3829787234,"score_mean":4.3333333333,"writing_score_w_mean":5.5185185185,"writing_score_mean":5.3083333333,"roleplay_score_w_mean":5.6428571429,"roleplay_score_mean":5.2916666667,"reasoning_score_w_mean":2.7666666667,"reasoning_score_mean":2.925,"math_score_w_mean":4.2857142857,"math_score_mean":4.4444444444,"coding_score_w_mean":3.35,"coding_score_mean":3.35,"extraction_score_w_mean":2.3333333333,"extraction_score_mean":2.3333333333,"stem_score_w_mean":4.6875,"stem_score_mean":4.6875,"humanities_score_w_mean":7.1111111111,"humanities_score_mean":7.1111111111,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2166666667,"chatbot_rag_score_w_mean":6.8333333333,"chatbot_rag_score_mean":6.825}
|
| 32 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","score_w_mean":3.9224806202,"score_mean":3.7564102564,"writing_score_w_mean":3.8333333333,"writing_score_mean":3.7261904762,"roleplay_score_w_mean":2.8571428571,"roleplay_score_mean":2.7333333333,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":4.1,"extraction_score_mean":4.1,"stem_score_w_mean":3.8125,"stem_score_mean":3.8125,"humanities_score_w_mean":2.6428571429,"humanities_score_mean":2.6428571429,"persian_general_knowledge_score_w_mean":1.1111111111,"persian_general_knowledge_score_mean":1.15,"chatbot_rag_score_w_mean":7.6,"chatbot_rag_score_mean":7.4583333333}
|
| 33 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","score_w_mean":3.6885245902,"score_mean":3.3982683983,"writing_score_w_mean":3.8461538462,"writing_score_mean":3.1166666667,"roleplay_score_w_mean":1.9375,"roleplay_score_mean":1.8333333333,"reasoning_score_w_mean":2.2173913043,"reasoning_score_mean":2.125,"math_score_w_mean":2.8260869565,"math_score_mean":2.675,"coding_score_w_mean":4.1666666667,"coding_score_mean":4.1666666667,"extraction_score_w_mean":3.2777777778,"extraction_score_mean":3.2777777778,"stem_score_w_mean":1.6666666667,"stem_score_mean":1.6666666667,"humanities_score_w_mean":2.125,"humanities_score_mean":2.125,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1333333333}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","score_w_mean":3.1510791367,"score_mean":2.9521604938,"writing_score_w_mean":1.6666666667,"writing_score_mean":1.6666666667,"roleplay_score_w_mean":1.7272727273,"roleplay_score_mean":1.625,"reasoning_score_w_mean":2.6086956522,"reasoning_score_mean":2.6145833333,"math_score_w_mean":1.7333333333,"math_score_mean":1.7857142857,"coding_score_w_mean":2.5,"coding_score_mean":2.5,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":1.0,"humanities_score_mean":1.0,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":8.1666666667,"chatbot_rag_score_mean":8.15}
|
| 35 |
-
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":null,"score_mean":null,"writing_score_w_mean":null,"writing_score_mean":null,"roleplay_score_w_mean":null,"roleplay_score_mean":null,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":null,"humanities_score_mean":null,"persian_general_knowledge_score_w_mean":null,"persian_general_knowledge_score_mean":null,"chatbot_rag_score_w_mean":null,"chatbot_rag_score_mean":null}
|
|
|
|
| 1 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":9.1428571429,"score_mean":9.1375,"writing_score_w_mean":8.8148148148,"writing_score_mean":8.8083333333,"roleplay_score_w_mean":8.5357142857,"roleplay_score_mean":8.55,"reasoning_score_w_mean":9.7,"reasoning_score_mean":9.675,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":9.15,"coding_score_mean":9.15,"extraction_score_w_mean":8.95,"extraction_score_mean":8.95,"stem_score_w_mean":9.35,"stem_score_mean":9.35,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":8.4444444444,"persian_general_knowledge_score_mean":8.5,"chatbot_rag_score_w_mean":9.7,"chatbot_rag_score_mean":9.6916666667}
|
| 2 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":9.1020408163,"score_mean":9.1075,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.6416666667,"roleplay_score_w_mean":8.7857142857,"roleplay_score_mean":8.7916666667,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.9666666667,"math_score_w_mean":9.6956521739,"math_score_mean":9.65,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":9.3,"extraction_score_mean":9.3,"stem_score_w_mean":8.75,"stem_score_mean":8.75,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":9.1851851852,"persian_general_knowledge_score_mean":9.2583333333,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.2166666667}
|
| 3 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7983539095,"score_mean":8.8122895623,"writing_score_w_mean":8.52,"writing_score_mean":8.5462962963,"roleplay_score_w_mean":8.2857142857,"roleplay_score_mean":8.2583333333,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.3666666667,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":8.8,"coding_score_mean":8.8,"extraction_score_w_mean":8.75,"extraction_score_mean":8.75,"stem_score_w_mean":8.7,"stem_score_mean":8.7,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.8148148148,"persian_general_knowledge_score_mean":8.9166666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1583333333}
|
| 4 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7428571429,"score_mean":8.7325,"writing_score_w_mean":8.4074074074,"writing_score_mean":8.3833333333,"roleplay_score_w_mean":8.6071428571,"roleplay_score_mean":8.625,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.9666666667,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":8.15,"coding_score_mean":8.15,"extraction_score_w_mean":8.45,"extraction_score_mean":8.45,"stem_score_w_mean":8.7,"stem_score_mean":8.7,"humanities_score_w_mean":9.1,"humanities_score_mean":9.1,"persian_general_knowledge_score_w_mean":7.7777777778,"persian_general_knowledge_score_mean":7.9333333333,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.2666666667}
|
| 5 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.7510204082,"score_mean":8.695,"writing_score_w_mean":8.5925925926,"writing_score_mean":8.625,"roleplay_score_w_mean":8.2142857143,"roleplay_score_mean":8.2166666667,"reasoning_score_w_mean":9.0666666667,"reasoning_score_mean":8.7833333333,"math_score_w_mean":9.7826086957,"math_score_mean":9.75,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.2,"humanities_score_mean":9.2,"persian_general_knowledge_score_w_mean":8.1481481481,"persian_general_knowledge_score_mean":8.0,"chatbot_rag_score_w_mean":9.3666666667,"chatbot_rag_score_mean":9.375}
|
| 6 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5795918367,"score_mean":8.6175,"writing_score_w_mean":8.6296296296,"writing_score_mean":8.65,"roleplay_score_w_mean":8.1785714286,"roleplay_score_mean":8.225,"reasoning_score_w_mean":8.9,"reasoning_score_mean":8.7416666667,"math_score_w_mean":9.3043478261,"math_score_mean":9.2,"coding_score_w_mean":8.75,"coding_score_mean":8.75,"extraction_score_w_mean":8.5,"extraction_score_mean":8.5,"stem_score_w_mean":8.55,"stem_score_mean":8.55,"humanities_score_w_mean":9.15,"humanities_score_mean":9.15,"persian_general_knowledge_score_w_mean":6.8148148148,"persian_general_knowledge_score_mean":7.2416666667,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1666666667}
|
| 7 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","score_w_mean":8.5102040816,"score_mean":8.6008333333,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.4916666667,"roleplay_score_w_mean":8.9285714286,"roleplay_score_mean":8.9666666667,"reasoning_score_w_mean":8.3666666667,"reasoning_score_mean":8.3083333333,"math_score_w_mean":9.1304347826,"math_score_mean":9.0,"coding_score_w_mean":9.35,"coding_score_mean":9.35,"extraction_score_w_mean":8.65,"extraction_score_mean":8.65,"stem_score_w_mean":9.05,"stem_score_mean":9.05,"humanities_score_w_mean":9.25,"humanities_score_mean":9.25,"persian_general_knowledge_score_w_mean":5.0740740741,"persian_general_knowledge_score_mean":5.4916666667,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.45}
|
| 8 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3183673469,"score_mean":8.4183333333,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.125,"roleplay_score_w_mean":8.0714285714,"roleplay_score_mean":8.0333333333,"reasoning_score_w_mean":8.1333333333,"reasoning_score_mean":8.0833333333,"math_score_w_mean":9.4347826087,"math_score_mean":9.35,"coding_score_w_mean":8.85,"coding_score_mean":8.85,"extraction_score_w_mean":8.6,"extraction_score_mean":8.6,"stem_score_w_mean":8.9,"stem_score_mean":8.9,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":5.2222222222,"persian_general_knowledge_score_mean":5.8083333333,"chatbot_rag_score_w_mean":9.4666666667,"chatbot_rag_score_mean":9.4333333333}
|
| 9 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.4522821577,"score_mean":8.3897306397,"writing_score_w_mean":8.4444444444,"writing_score_mean":8.475,"roleplay_score_w_mean":8.6785714286,"roleplay_score_mean":8.65,"reasoning_score_w_mean":8.4,"reasoning_score_mean":8.3333333333,"math_score_w_mean":9.0434782609,"math_score_mean":8.9,"coding_score_w_mean":7.05,"coding_score_mean":7.05,"extraction_score_w_mean":7.6,"extraction_score_mean":7.6,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":9.0,"humanities_score_mean":9.0,"persian_general_knowledge_score_w_mean":8.4074074074,"persian_general_knowledge_score_mean":8.4166666667,"chatbot_rag_score_w_mean":9.1538461538,"chatbot_rag_score_mean":9.1481481481}
|
| 10 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.3551020408,"score_mean":8.3716666667,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.175,"roleplay_score_w_mean":7.5,"roleplay_score_mean":7.45,"reasoning_score_w_mean":8.4666666667,"reasoning_score_mean":8.4833333333,"math_score_w_mean":8.7391304348,"math_score_mean":8.8416666667,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.25,"extraction_score_mean":8.25,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":7.8888888889,"persian_general_knowledge_score_mean":7.9,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
| 11 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":8.212244898,"score_mean":8.2441666667,"writing_score_w_mean":8.4074074074,"writing_score_mean":8.3166666667,"roleplay_score_w_mean":7.7857142857,"roleplay_score_mean":7.7583333333,"reasoning_score_w_mean":6.7666666667,"reasoning_score_mean":6.8,"math_score_w_mean":9.2608695652,"math_score_mean":9.3,"coding_score_w_mean":8.4,"coding_score_mean":8.4,"extraction_score_w_mean":8.1,"extraction_score_mean":8.1,"stem_score_w_mean":8.35,"stem_score_mean":8.35,"humanities_score_w_mean":8.8,"humanities_score_mean":8.8,"persian_general_knowledge_score_w_mean":7.5925925926,"persian_general_knowledge_score_mean":7.5083333333,"chatbot_rag_score_w_mean":9.1,"chatbot_rag_score_mean":9.1083333333}
|
| 12 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","score_w_mean":8.1795918367,"score_mean":8.2191666667,"writing_score_w_mean":8.1851851852,"writing_score_mean":8.25,"roleplay_score_w_mean":8.2857142857,"roleplay_score_mean":8.3333333333,"reasoning_score_w_mean":7.5333333333,"reasoning_score_mean":7.5416666667,"math_score_w_mean":9.0869565217,"math_score_mean":8.95,"coding_score_w_mean":8.3,"coding_score_mean":8.3,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.4,"stem_score_mean":8.4,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":6.2962962963,"persian_general_knowledge_score_mean":6.4916666667,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.325}
|
| 13 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","score_w_mean":7.8653061224,"score_mean":7.96,"writing_score_w_mean":8.3333333333,"writing_score_mean":8.3083333333,"roleplay_score_w_mean":7.6785714286,"roleplay_score_mean":7.5916666667,"reasoning_score_w_mean":7.0666666667,"reasoning_score_mean":7.2083333333,"math_score_w_mean":8.7826086957,"math_score_mean":8.65,"coding_score_w_mean":7.5,"coding_score_mean":7.5,"extraction_score_w_mean":8.4,"extraction_score_mean":8.4,"stem_score_w_mean":8.65,"stem_score_mean":8.65,"humanities_score_w_mean":8.85,"humanities_score_mean":8.85,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.3083333333,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1333333333}
|
| 14 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.8081632653,"score_mean":7.8916666667,"writing_score_w_mean":7.962962963,"writing_score_mean":7.9666666667,"roleplay_score_w_mean":7.6071428571,"roleplay_score_mean":7.5083333333,"reasoning_score_w_mean":6.7666666667,"reasoning_score_mean":6.7166666667,"math_score_w_mean":8.2608695652,"math_score_mean":8.225,"coding_score_w_mean":7.85,"coding_score_mean":7.85,"extraction_score_w_mean":8.3,"extraction_score_mean":8.3,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.8,"humanities_score_mean":8.8,"persian_general_knowledge_score_w_mean":5.3703703704,"persian_general_knowledge_score_mean":5.7666666667,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.2833333333}
|
| 15 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","score_w_mean":7.5371900826,"score_mean":7.632996633,"writing_score_w_mean":7.4074074074,"writing_score_mean":7.4083333333,"roleplay_score_w_mean":7.4642857143,"roleplay_score_mean":7.2833333333,"reasoning_score_w_mean":7.5185185185,"reasoning_score_mean":7.6296296296,"math_score_w_mean":9.0,"math_score_mean":8.85,"coding_score_w_mean":8.1,"coding_score_mean":8.1,"extraction_score_w_mean":7.75,"extraction_score_mean":7.75,"stem_score_w_mean":9.0,"stem_score_mean":9.0,"humanities_score_w_mean":9.05,"humanities_score_mean":9.05,"persian_general_knowledge_score_w_mean":1.8148148148,"persian_general_knowledge_score_mean":2.0083333333,"chatbot_rag_score_w_mean":9.2666666667,"chatbot_rag_score_mean":9.25}
|
| 16 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.4857142857,"score_mean":7.585,"writing_score_w_mean":8.1111111111,"writing_score_mean":8.0416666667,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":7.1833333333,"reasoning_score_w_mean":5.8,"reasoning_score_mean":5.65,"math_score_w_mean":8.4782608696,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":7.9,"extraction_score_mean":7.9,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.5,"humanities_score_mean":8.5,"persian_general_knowledge_score_w_mean":4.8518518519,"persian_general_knowledge_score_mean":5.375,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
| 17 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","score_w_mean":7.3918367347,"score_mean":7.5125,"writing_score_w_mean":7.8888888889,"writing_score_mean":7.95,"roleplay_score_w_mean":7.7142857143,"roleplay_score_mean":7.6416666667,"reasoning_score_w_mean":5.4333333333,"reasoning_score_mean":5.3166666667,"math_score_w_mean":8.7391304348,"math_score_mean":8.7,"coding_score_w_mean":7.55,"coding_score_mean":7.55,"extraction_score_w_mean":7.7,"extraction_score_mean":7.7,"stem_score_w_mean":8.6,"stem_score_mean":8.6,"humanities_score_w_mean":8.9,"humanities_score_mean":8.9,"persian_general_knowledge_score_w_mean":3.1851851852,"persian_general_knowledge_score_mean":3.5166666667,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
| 18 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","score_w_mean":7.3613445378,"score_mean":7.4312714777,"writing_score_w_mean":7.0,"writing_score_mean":6.9083333333,"roleplay_score_w_mean":7.2307692308,"roleplay_score_mean":7.0185185185,"reasoning_score_w_mean":6.8518518519,"reasoning_score_mean":6.8981481481,"math_score_w_mean":9.5652173913,"math_score_mean":9.5,"coding_score_w_mean":8.6,"coding_score_mean":8.6,"extraction_score_w_mean":6.75,"extraction_score_mean":6.75,"stem_score_w_mean":8.2,"stem_score_mean":8.2,"humanities_score_w_mean":8.3,"humanities_score_mean":8.3,"persian_general_knowledge_score_w_mean":2.16,"persian_general_knowledge_score_mean":2.1851851852,"chatbot_rag_score_w_mean":9.3,"chatbot_rag_score_mean":9.3333333333}
|
| 19 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":7.2367346939,"score_mean":7.3633333333,"writing_score_w_mean":7.7407407407,"writing_score_mean":7.6666666667,"roleplay_score_w_mean":7.6428571429,"roleplay_score_mean":7.5416666667,"reasoning_score_w_mean":5.7,"reasoning_score_mean":6.0416666667,"math_score_w_mean":8.1304347826,"math_score_mean":8.25,"coding_score_w_mean":8.05,"coding_score_mean":8.05,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":7.85,"stem_score_mean":7.85,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.9259259259,"persian_general_knowledge_score_mean":4.3833333333,"chatbot_rag_score_w_mean":9.1666666667,"chatbot_rag_score_mean":9.2}
|
| 20 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","score_w_mean":7.2448979592,"score_mean":7.2908333333,"writing_score_w_mean":7.2592592593,"writing_score_mean":7.2333333333,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6666666667,"reasoning_score_w_mean":6.3333333333,"reasoning_score_mean":6.2,"math_score_w_mean":8.5217391304,"math_score_mean":8.3,"coding_score_w_mean":7.6,"coding_score_mean":7.6,"extraction_score_w_mean":7.95,"extraction_score_mean":7.95,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":3.1481481481,"persian_general_knowledge_score_mean":3.4583333333,"chatbot_rag_score_w_mean":9.5333333333,"chatbot_rag_score_mean":9.5}
|
| 21 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","score_w_mean":7.1769547325,"score_mean":7.2045454545,"writing_score_w_mean":7.2222222222,"writing_score_mean":7.2,"roleplay_score_w_mean":7.3214285714,"roleplay_score_mean":7.2583333333,"reasoning_score_w_mean":7.3666666667,"reasoning_score_mean":7.2833333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.325,"coding_score_w_mean":6.7,"coding_score_mean":6.7,"extraction_score_w_mean":7.05,"extraction_score_mean":7.05,"stem_score_w_mean":8.5,"stem_score_mean":8.5,"humanities_score_w_mean":8.7222222222,"humanities_score_mean":8.7222222222,"persian_general_knowledge_score_w_mean":1.6666666667,"persian_general_knowledge_score_mean":1.7333333333,"chatbot_rag_score_w_mean":9.4333333333,"chatbot_rag_score_mean":9.425}
|
| 22 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","score_w_mean":7.132231405,"score_mean":7.1725589226,"writing_score_w_mean":7.2962962963,"writing_score_mean":7.35,"roleplay_score_w_mean":7.1428571429,"roleplay_score_mean":6.9416666667,"reasoning_score_w_mean":6.8148148148,"reasoning_score_mean":6.962962963,"math_score_w_mean":8.0434782609,"math_score_mean":7.925,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.4,"extraction_score_mean":7.4,"stem_score_w_mean":7.25,"stem_score_mean":7.25,"humanities_score_w_mean":8.35,"humanities_score_mean":8.35,"persian_general_knowledge_score_w_mean":4.2962962963,"persian_general_knowledge_score_mean":4.75,"chatbot_rag_score_w_mean":8.8666666667,"chatbot_rag_score_mean":8.875}
|
| 23 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","score_w_mean":7.0367346939,"score_mean":7.0858333333,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1416666667,"roleplay_score_w_mean":6.8214285714,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":5.3,"reasoning_score_mean":5.2416666667,"math_score_w_mean":7.0434782609,"math_score_mean":7.0166666667,"coding_score_w_mean":6.0,"coding_score_mean":6.0,"extraction_score_w_mean":7.2,"extraction_score_mean":7.2,"stem_score_w_mean":7.95,"stem_score_mean":7.95,"humanities_score_w_mean":8.7,"humanities_score_mean":8.7,"persian_general_knowledge_score_w_mean":4.1851851852,"persian_general_knowledge_score_mean":4.6333333333,"chatbot_rag_score_w_mean":9.4,"chatbot_rag_score_mean":9.3666666667}
|
| 24 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","score_w_mean":6.8285714286,"score_mean":6.88,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.1833333333,"roleplay_score_w_mean":6.8571428571,"roleplay_score_mean":6.6083333333,"reasoning_score_w_mean":4.5666666667,"reasoning_score_mean":4.5416666667,"math_score_w_mean":5.4347826087,"math_score_mean":5.2833333333,"coding_score_w_mean":5.9,"coding_score_mean":5.9,"extraction_score_w_mean":7.35,"extraction_score_mean":7.35,"stem_score_w_mean":7.7,"stem_score_mean":7.7,"humanities_score_w_mean":8.6,"humanities_score_mean":8.6,"persian_general_knowledge_score_w_mean":4.7407407407,"persian_general_knowledge_score_mean":5.3,"chatbot_rag_score_w_mean":9.3333333333,"chatbot_rag_score_mean":9.3333333333}
|
| 25 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","score_w_mean":6.7684729064,"score_mean":6.8639240506,"writing_score_w_mean":7.3333333333,"writing_score_mean":7.3416666667,"roleplay_score_w_mean":7.3214285714,"roleplay_score_mean":7.225,"reasoning_score_w_mean":5.4642857143,"reasoning_score_mean":5.4444444444,"math_score_w_mean":8.7826086957,"math_score_mean":8.85,"coding_score_w_mean":6.5714285714,"coding_score_mean":6.5714285714,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":7.75,"stem_score_mean":7.75,"humanities_score_w_mean":8.1666666667,"humanities_score_mean":8.1666666667,"persian_general_knowledge_score_w_mean":1.5185185185,"persian_general_knowledge_score_mean":1.6833333333,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.175}
|
| 26 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.6693877551,"score_mean":6.7416666667,"writing_score_w_mean":7.0,"writing_score_mean":6.75,"roleplay_score_w_mean":6.1071428571,"roleplay_score_mean":5.7666666667,"reasoning_score_w_mean":4.7666666667,"reasoning_score_mean":4.7583333333,"math_score_w_mean":9.652173913,"math_score_mean":9.6,"coding_score_w_mean":7.3,"coding_score_mean":7.3,"extraction_score_w_mean":7.2,"extraction_score_mean":7.2,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":8.6,"humanities_score_mean":8.6,"persian_general_knowledge_score_w_mean":1.037037037,"persian_general_knowledge_score_mean":1.025,"chatbot_rag_score_w_mean":9.1,"chatbot_rag_score_mean":9.1166666667}
|
| 27 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","score_w_mean":6.612244898,"score_mean":6.6825,"writing_score_w_mean":8.1481481481,"writing_score_mean":8.075,"roleplay_score_w_mean":7.0357142857,"roleplay_score_mean":6.8666666667,"reasoning_score_w_mean":4.1666666667,"reasoning_score_mean":4.0333333333,"math_score_w_mean":8.3913043478,"math_score_mean":8.4916666667,"coding_score_w_mean":6.65,"coding_score_mean":6.65,"extraction_score_w_mean":6.0,"extraction_score_mean":6.0,"stem_score_w_mean":7.75,"stem_score_mean":7.75,"humanities_score_w_mean":8.4,"humanities_score_mean":8.4,"persian_general_knowledge_score_w_mean":1.2962962963,"persian_general_knowledge_score_mean":1.4,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1583333333}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","score_w_mean":6.5991189427,"score_mean":6.6075268817,"writing_score_w_mean":6.375,"writing_score_mean":6.2037037037,"roleplay_score_w_mean":6.5263157895,"roleplay_score_mean":6.2738095238,"reasoning_score_w_mean":6.2142857143,"reasoning_score_mean":6.1944444444,"math_score_w_mean":8.652173913,"math_score_mean":8.45,"coding_score_w_mean":5.95,"coding_score_mean":5.95,"extraction_score_w_mean":7.15,"extraction_score_mean":7.15,"stem_score_w_mean":7.3,"stem_score_mean":7.3,"humanities_score_w_mean":7.5555555556,"humanities_score_mean":7.5555555556,"persian_general_knowledge_score_w_mean":1.08,"persian_general_knowledge_score_mean":1.1111111111,"chatbot_rag_score_w_mean":9.2333333333,"chatbot_rag_score_mean":9.25}
|
| 29 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","score_w_mean":6.0816326531,"score_mean":6.0908333333,"writing_score_w_mean":7.8148148148,"writing_score_mean":7.925,"roleplay_score_w_mean":6.6071428571,"roleplay_score_mean":6.275,"reasoning_score_w_mean":3.8666666667,"reasoning_score_mean":3.7416666667,"math_score_w_mean":3.652173913,"math_score_mean":3.475,"coding_score_w_mean":5.2,"coding_score_mean":5.2,"extraction_score_w_mean":5.95,"extraction_score_mean":5.95,"stem_score_w_mean":6.85,"stem_score_mean":6.85,"humanities_score_w_mean":8.45,"humanities_score_mean":8.45,"persian_general_knowledge_score_w_mean":3.2962962963,"persian_general_knowledge_score_mean":3.8583333333,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1833333333}
|
| 30 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","score_w_mean":5.6592920354,"score_mean":5.5994623656,"writing_score_w_mean":5.5416666667,"writing_score_mean":5.1944444444,"roleplay_score_w_mean":5.4230769231,"roleplay_score_mean":4.962962963,"reasoning_score_w_mean":3.6923076923,"reasoning_score_mean":3.9351851852,"math_score_w_mean":7.9565217391,"math_score_mean":7.9416666667,"coding_score_w_mean":6.3888888889,"coding_score_mean":6.3888888889,"extraction_score_w_mean":5.85,"extraction_score_mean":5.85,"stem_score_w_mean":4.85,"stem_score_mean":4.85,"humanities_score_w_mean":5.6111111111,"humanities_score_mean":5.6111111111,"persian_general_knowledge_score_w_mean":1.0476190476,"persian_general_knowledge_score_mean":1.0416666667,"chatbot_rag_score_w_mean":9.1333333333,"chatbot_rag_score_mean":9.1166666667}
|
| 31 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","score_w_mean":4.3829787234,"score_mean":4.3333333333,"writing_score_w_mean":5.5185185185,"writing_score_mean":5.3083333333,"roleplay_score_w_mean":5.6428571429,"roleplay_score_mean":5.2916666667,"reasoning_score_w_mean":2.7666666667,"reasoning_score_mean":2.925,"math_score_w_mean":4.2857142857,"math_score_mean":4.4444444444,"coding_score_w_mean":3.35,"coding_score_mean":3.35,"extraction_score_w_mean":2.3333333333,"extraction_score_mean":2.3333333333,"stem_score_w_mean":4.6875,"stem_score_mean":4.6875,"humanities_score_w_mean":7.1111111111,"humanities_score_mean":7.1111111111,"persian_general_knowledge_score_w_mean":1.2222222222,"persian_general_knowledge_score_mean":1.2166666667,"chatbot_rag_score_w_mean":6.8333333333,"chatbot_rag_score_mean":6.825}
|
| 32 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","score_w_mean":3.9224806202,"score_mean":3.7564102564,"writing_score_w_mean":3.8333333333,"writing_score_mean":3.7261904762,"roleplay_score_w_mean":2.8571428571,"roleplay_score_mean":2.7333333333,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":4.1,"extraction_score_mean":4.1,"stem_score_w_mean":3.8125,"stem_score_mean":3.8125,"humanities_score_w_mean":2.6428571429,"humanities_score_mean":2.6428571429,"persian_general_knowledge_score_w_mean":1.1111111111,"persian_general_knowledge_score_mean":1.15,"chatbot_rag_score_w_mean":7.6,"chatbot_rag_score_mean":7.4583333333}
|
| 33 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","score_w_mean":3.6885245902,"score_mean":3.3982683983,"writing_score_w_mean":3.8461538462,"writing_score_mean":3.1166666667,"roleplay_score_w_mean":1.9375,"roleplay_score_mean":1.8333333333,"reasoning_score_w_mean":2.2173913043,"reasoning_score_mean":2.125,"math_score_w_mean":2.8260869565,"math_score_mean":2.675,"coding_score_w_mean":4.1666666667,"coding_score_mean":4.1666666667,"extraction_score_w_mean":3.2777777778,"extraction_score_mean":3.2777777778,"stem_score_w_mean":1.6666666667,"stem_score_mean":1.6666666667,"humanities_score_w_mean":2.125,"humanities_score_mean":2.125,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":9.2,"chatbot_rag_score_mean":9.1333333333}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","score_w_mean":3.1510791367,"score_mean":2.9521604938,"writing_score_w_mean":1.6666666667,"writing_score_mean":1.6666666667,"roleplay_score_w_mean":1.7272727273,"roleplay_score_mean":1.625,"reasoning_score_w_mean":2.6086956522,"reasoning_score_mean":2.6145833333,"math_score_w_mean":1.7333333333,"math_score_mean":1.7857142857,"coding_score_w_mean":2.5,"coding_score_mean":2.5,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":1.0,"humanities_score_mean":1.0,"persian_general_knowledge_score_w_mean":1.0,"persian_general_knowledge_score_mean":1.0,"chatbot_rag_score_w_mean":8.1666666667,"chatbot_rag_score_mean":8.15}
|
| 35 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","score_w_mean":null,"score_mean":null,"writing_score_w_mean":null,"writing_score_mean":null,"roleplay_score_w_mean":null,"roleplay_score_mean":null,"reasoning_score_w_mean":null,"reasoning_score_mean":null,"math_score_w_mean":null,"math_score_mean":null,"coding_score_w_mean":null,"coding_score_mean":null,"extraction_score_w_mean":null,"extraction_score_mean":null,"stem_score_w_mean":null,"stem_score_mean":null,"humanities_score_w_mean":null,"humanities_score_mean":null,"persian_general_knowledge_score_w_mean":null,"persian_general_knowledge_score_mean":null,"chatbot_rag_score_w_mean":null,"chatbot_rag_score_mean":null}
|
leaderboard/boards_data/ner_arman.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.578306047,"ner_arman_precision_mean":0.5583631307,"ner_arman_recall_mean":0.6250099325,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","ner_arman_f1_mean":0.4764396046,"ner_arman_precision_mean":0.4205999205,"ner_arman_recall_mean":0.5997417561,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","ner_arman_f1_mean":0.5091463761,"ner_arman_precision_mean":0.4719705999,"ner_arman_recall_mean":0.5898887565,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","ner_arman_f1_mean":0.4408498401,"ner_arman_precision_mean":0.4206197855,"ner_arman_recall_mean":0.487067938,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0134154417,"ner_arman_precision_mean":0.0131505761,"ner_arman_recall_mean":0.0147993643,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1404403172,"ner_arman_precision_mean":0.1243629037,"ner_arman_recall_mean":0.1826181963,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4737820913,"ner_arman_precision_mean":0.4382598331,"ner_arman_recall_mean":0.5517481128,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","ner_arman_f1_mean":0.3426542402,"ner_arman_precision_mean":0.3283122387,"ner_arman_recall_mean":0.3950735002,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","ner_arman_f1_mean":0.369949366,"ner_arman_precision_mean":0.3251050003,"ner_arman_recall_mean":0.4785061581,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","ner_arman_f1_mean":0.46241695,"ner_arman_precision_mean":0.4338001589,"ner_arman_recall_mean":0.5298768375,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3636093611,"ner_arman_precision_mean":0.3377433453,"ner_arman_recall_mean":0.4240365515,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4897596643,"ner_arman_precision_mean":0.4627021965,"ner_arman_recall_mean":0.5499602702,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","ner_arman_f1_mean":0.492822101,"ner_arman_precision_mean":0.4530827367,"ner_arman_recall_mean":0.580708035,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5838038137,"ner_arman_precision_mean":0.5621374652,"ner_arman_recall_mean":0.6348629321,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.6048141039,"ner_arman_precision_mean":0.5945967422,"ner_arman_recall_mean":0.6437822805,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5777782078,"ner_arman_precision_mean":0.5722089789,"ner_arman_recall_mean":0.6065156933,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4520824626,"ner_arman_precision_mean":0.4047789318,"ner_arman_recall_mean":0.5640246325,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","ner_arman_f1_mean":0.1625858448,"ner_arman_precision_mean":0.158174414,"ner_arman_recall_mean":0.1884982122,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5492720496,"ner_arman_precision_mean":0.5296185936,"ner_arman_recall_mean":0.5959078268,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","ner_arman_f1_mean":0.0638846321,"ner_arman_precision_mean":0.0494466201,"ner_arman_recall_mean":0.1084425904,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4666381162,"ner_arman_precision_mean":0.4301038651,"ner_arman_recall_mean":0.5461462058,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.578306047,"ner_arman_precision_mean":0.5583631307,"ner_arman_recall_mean":0.6250099325,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","ner_arman_f1_mean":0.3839211973,"ner_arman_precision_mean":0.3292326466,"ner_arman_recall_mean":0.5049662296,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0031613599,"ner_arman_precision_mean":0.0024235201,"ner_arman_recall_mean":0.0047675805,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","ner_arman_f1_mean":0.5030535945,"ner_arman_precision_mean":0.4617288155,"ner_arman_recall_mean":0.5952522845,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3097820535,"ner_arman_precision_mean":0.2833333333,"ner_arman_recall_mean":0.3710568137,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":null,"ner_arman_precision_mean":null,"ner_arman_recall_mean":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","ner_arman_f1_mean":0.4764396046,"ner_arman_precision_mean":0.4205999205,"ner_arman_recall_mean":0.5997417561,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","ner_arman_f1_mean":0.5091463761,"ner_arman_precision_mean":0.4719705999,"ner_arman_recall_mean":0.5898887565,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","ner_arman_f1_mean":0.492138652,"ner_arman_precision_mean":0.4553833929,"ner_arman_recall_mean":0.5783671037,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","ner_arman_f1_mean":0.4408498401,"ner_arman_precision_mean":0.4206197855,"ner_arman_recall_mean":0.487067938,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0134154417,"ner_arman_precision_mean":0.0131505761,"ner_arman_recall_mean":0.0147993643,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","ner_arman_f1_mean":0.028185021,"ner_arman_precision_mean":0.0278440732,"ner_arman_recall_mean":0.0304295943,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1404403172,"ner_arman_precision_mean":0.1243629037,"ner_arman_recall_mean":0.1826181963,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.4737820913,"ner_arman_precision_mean":0.4382598331,"ner_arman_recall_mean":0.5517481128,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","ner_arman_f1_mean":0.3426542402,"ner_arman_precision_mean":0.3283122387,"ner_arman_recall_mean":0.3950735002,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","ner_arman_f1_mean":0.369949366,"ner_arman_precision_mean":0.3251050003,"ner_arman_recall_mean":0.4785061581,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","ner_arman_f1_mean":0.46241695,"ner_arman_precision_mean":0.4338001589,"ner_arman_recall_mean":0.5298768375,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.3636093611,"ner_arman_precision_mean":0.3377433453,"ner_arman_recall_mean":0.4240365515,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4897596643,"ner_arman_precision_mean":0.4627021965,"ner_arman_recall_mean":0.5499602702,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.0374396958,"ner_arman_precision_mean":0.0342669845,"ner_arman_recall_mean":0.0448549861,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","ner_arman_f1_mean":0.492822101,"ner_arman_precision_mean":0.4530827367,"ner_arman_recall_mean":0.580708035,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5838038137,"ner_arman_precision_mean":0.5621374652,"ner_arman_recall_mean":0.6348629321,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.6048141039,"ner_arman_precision_mean":0.5945967422,"ner_arman_recall_mean":0.6437822805,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5777782078,"ner_arman_precision_mean":0.5722089789,"ner_arman_recall_mean":0.6065156933,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4520824626,"ner_arman_precision_mean":0.4047789318,"ner_arman_recall_mean":0.5640246325,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","ner_arman_f1_mean":0.1587859697,"ner_arman_precision_mean":0.1553465009,"ner_arman_recall_mean":0.1764799364,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","ner_arman_f1_mean":0.1625858448,"ner_arman_precision_mean":0.158174414,"ner_arman_recall_mean":0.1884982122,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.5492720496,"ner_arman_precision_mean":0.5296185936,"ner_arman_recall_mean":0.5959078268,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","ner_arman_f1_mean":0.247080201,"ner_arman_precision_mean":0.2176003178,"ner_arman_recall_mean":0.3168653159,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","ner_arman_f1_mean":0.5000495531,"ner_arman_precision_mean":0.4607965832,"ner_arman_recall_mean":0.5927493047,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","ner_arman_f1_mean":0.0638846321,"ner_arman_precision_mean":0.0494466201,"ner_arman_recall_mean":0.1084425904,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","ner_arman_f1_mean":0.0,"ner_arman_precision_mean":0.0,"ner_arman_recall_mean":0.0,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","ner_arman_f1_mean":0.4666381162,"ner_arman_precision_mean":0.4301038651,"ner_arman_recall_mean":0.5461462058,"nlu_score":0.6992555201}
|
leaderboard/boards_data/nli_farstail.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.868286445,"nli_farstail_precision_modified":0.8795611895,"nli_farstail_recall_modified":0.8694171245,"nli_farstail_fscore_modified":0.8680818161,"nli_farstail_acc":0.868286445,"nli_farstail_precision":0.8795611895,"nli_farstail_recall":0.8694171245,"nli_farstail_fscore":0.8680818161,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6534526854,"nli_farstail_precision_modified":0.7279477253,"nli_farstail_recall_modified":0.6559403118,"nli_farstail_fscore_modified":0.6402480245,"nli_farstail_acc":0.6534526854,"nli_farstail_precision":0.7279477253,"nli_farstail_recall":0.6559403118,"nli_farstail_fscore":0.6402480245,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6918158568,"nli_farstail_precision_modified":0.76120773,"nli_farstail_recall_modified":0.696633339,"nli_farstail_fscore_modified":0.6637995215,"nli_farstail_acc":0.6918158568,"nli_farstail_precision":0.76120773,"nli_farstail_recall":0.696633339,"nli_farstail_fscore":0.6637995215,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.716112532,"nli_farstail_precision_modified":0.7794942647,"nli_farstail_recall_modified":0.7185458002,"nli_farstail_fscore_modified":0.7094139725,"nli_farstail_acc":0.716112532,"nli_farstail_precision":0.7794942647,"nli_farstail_recall":0.7185458002,"nli_farstail_fscore":0.7094139725,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.0249360614,"nli_farstail_precision_modified":0.0160900081,"nli_farstail_recall_modified":0.0174126172,"nli_farstail_fscore_modified":0.0167242212,"nli_farstail_acc":0.9069767442,"nli_farstail_precision":0.5852272727,"nli_farstail_recall":0.6333333333,"nli_farstail_fscore":0.6082949309,"nli_farstail_valid_output_ratio":0.0274936061,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.618286445,"nli_farstail_precision_modified":0.6499905475,"nli_farstail_recall_modified":0.6180562888,"nli_farstail_fscore_modified":0.612547215,"nli_farstail_acc":0.6254851229,"nli_farstail_precision":0.6575583547,"nli_farstail_recall":0.625252287,"nli_farstail_fscore":0.6196790713,"nli_farstail_valid_output_ratio":0.9884910486,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7405509289,"nli_farstail_precision_modified":0.79378989,"nli_farstail_recall_modified":0.7441180803,"nli_farstail_fscore_modified":0.7266455427,"nli_farstail_acc":0.7405509289,"nli_farstail_precision":0.79378989,"nli_farstail_recall":0.7441180803,"nli_farstail_fscore":0.7266455427,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","nli_farstail_acc_modified":0.1726342711,"nli_farstail_precision_modified":0.1856398147,"nli_farstail_recall_modified":0.156398243,"nli_farstail_fscore_modified":0.1549950666,"nli_farstail_acc":0.7277628032,"nli_farstail_precision":0.7825894076,"nli_farstail_recall":0.6593176606,"nli_farstail_fscore":0.6534023831,"nli_farstail_valid_output_ratio":0.2372122762,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.3433503836,"nli_farstail_precision_modified":0.5618320225,"nli_farstail_recall_modified":0.3440157631,"nli_farstail_fscore_modified":0.279029917,"nli_farstail_acc":0.3435700576,"nli_farstail_precision":0.56219148,"nli_farstail_recall":0.3442358627,"nli_farstail_fscore":0.279208439,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.378516624,"nli_farstail_precision_modified":0.4433198503,"nli_farstail_recall_modified":0.3422920715,"nli_farstail_fscore_modified":0.347492956,"nli_farstail_acc":0.6932084309,"nli_farstail_precision":0.8118878757,"nli_farstail_recall":0.626867447,"nli_farstail_fscore":0.636392252,"nli_farstail_valid_output_ratio":0.5460358056,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","nli_farstail_acc_modified":0.726342711,"nli_farstail_precision_modified":0.8062451443,"nli_farstail_recall_modified":0.7314466615,"nli_farstail_fscore_modified":0.6980605986,"nli_farstail_acc":0.726342711,"nli_farstail_precision":0.8062451443,"nli_farstail_recall":0.7314466615,"nli_farstail_fscore":0.6980605986,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7410485934,"nli_farstail_precision_modified":0.7633275849,"nli_farstail_recall_modified":0.7423464162,"nli_farstail_fscore_modified":0.7375659033,"nli_farstail_acc":0.7410485934,"nli_farstail_precision":0.7633275849,"nli_farstail_recall":0.7423464162,"nli_farstail_fscore":0.7375659033,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8689258312,"nli_farstail_precision_modified":0.8809296764,"nli_farstail_recall_modified":0.8678121788,"nli_farstail_fscore_modified":0.8682707156,"nli_farstail_acc":0.8689258312,"nli_farstail_precision":0.8809296764,"nli_farstail_recall":0.8678121788,"nli_farstail_fscore":0.8682707156,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5051150895,"nli_farstail_precision_modified":0.5526701994,"nli_farstail_recall_modified":0.4383899815,"nli_farstail_fscore_modified":0.4569544839,"nli_farstail_acc":0.8085977482,"nli_farstail_precision":0.8847248637,"nli_farstail_recall":0.7017829387,"nli_farstail_fscore":0.7315013438,"nli_farstail_valid_output_ratio":0.6246803069,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7566862174,"nli_farstail_recall_modified":0.698049667,"nli_farstail_fscore_modified":0.679445114,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7566862174,"nli_farstail_recall":0.698049667,"nli_farstail_fscore":0.679445114,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7333759591,"nli_farstail_precision_modified":0.7691251939,"nli_farstail_recall_modified":0.7368411575,"nli_farstail_fscore_modified":0.7229770101,"nli_farstail_acc":0.7333759591,"nli_farstail_precision":0.7691251939,"nli_farstail_recall":0.7368411575,"nli_farstail_fscore":0.7229770101,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6617647059,"nli_farstail_precision_modified":0.7729519221,"nli_farstail_recall_modified":0.6672320962,"nli_farstail_fscore_modified":0.6191223906,"nli_farstail_acc":0.6617647059,"nli_farstail_precision":0.7729519221,"nli_farstail_recall":0.6672320962,"nli_farstail_fscore":0.6191223906,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7001278772,"nli_farstail_precision_modified":0.7089877668,"nli_farstail_recall_modified":0.701635311,"nli_farstail_fscore_modified":0.6963810855,"nli_farstail_acc":0.7001278772,"nli_farstail_precision":0.7089877668,"nli_farstail_recall":0.701635311,"nli_farstail_fscore":0.6963810855,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5421994885,"nli_farstail_precision_modified":0.5647711826,"nli_farstail_recall_modified":0.5444660793,"nli_farstail_fscore_modified":0.5273172992,"nli_farstail_acc":0.5449871465,"nli_farstail_precision":0.5676748904,"nli_farstail_recall":0.5472653908,"nli_farstail_fscore":0.5300284421,"nli_farstail_valid_output_ratio":0.9948849105,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8254475703,"nli_farstail_precision_modified":0.8384109819,"nli_farstail_recall_modified":0.8264814456,"nli_farstail_fscore_modified":0.8238714462,"nli_farstail_acc":0.8254475703,"nli_farstail_precision":0.8384109819,"nli_farstail_recall":0.8264814456,"nli_farstail_fscore":0.8238714462,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7677543186,"nli_farstail_precision_modified":0.8124976099,"nli_farstail_recall_modified":0.77106749,"nli_farstail_fscore_modified":0.7600055287,"nli_farstail_acc":0.7677543186,"nli_farstail_precision":0.8124976099,"nli_farstail_recall":0.77106749,"nli_farstail_fscore":0.7600055287,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.868286445,"nli_farstail_precision_modified":0.8795611895,"nli_farstail_recall_modified":0.8694171245,"nli_farstail_fscore_modified":0.8680818161,"nli_farstail_acc":0.868286445,"nli_farstail_precision":0.8795611895,"nli_farstail_recall":0.8694171245,"nli_farstail_fscore":0.8680818161,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6361892583,"nli_farstail_precision_modified":0.6743240456,"nli_farstail_recall_modified":0.6374538968,"nli_farstail_fscore_modified":0.621131875,"nli_farstail_acc":0.6370038412,"nli_farstail_precision":0.6751874567,"nli_farstail_recall":0.638270099,"nli_farstail_fscore":0.6219271782,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6898976982,"nli_farstail_precision_modified":0.752223151,"nli_farstail_recall_modified":0.691698665,"nli_farstail_fscore_modified":0.6834607357,"nli_farstail_acc":0.6898976982,"nli_farstail_precision":0.752223151,"nli_farstail_recall":0.691698665,"nli_farstail_fscore":0.6834607357,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7078005115,"nli_farstail_precision_modified":0.7418983007,"nli_farstail_recall_modified":0.70995102,"nli_farstail_fscore_modified":0.6987179454,"nli_farstail_acc":0.7091607944,"nli_farstail_precision":0.7433241143,"nli_farstail_recall":0.7113154358,"nli_farstail_fscore":0.700060773,"nli_farstail_valid_output_ratio":0.9980818414,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8069053708,"nli_farstail_precision_modified":0.8274480721,"nli_farstail_recall_modified":0.8078020735,"nli_farstail_fscore_modified":0.8055860349,"nli_farstail_acc":0.8069053708,"nli_farstail_precision":0.8274480721,"nli_farstail_recall":0.8078020735,"nli_farstail_fscore":0.8055860349,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":null,"nli_farstail_precision_modified":null,"nli_farstail_recall_modified":null,"nli_farstail_fscore_modified":null,"nli_farstail_acc":null,"nli_farstail_precision":null,"nli_farstail_recall":null,"nli_farstail_fscore":null,"nli_farstail_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6534526854,"nli_farstail_precision_modified":0.7279477253,"nli_farstail_recall_modified":0.6559403118,"nli_farstail_fscore_modified":0.6402480245,"nli_farstail_acc":0.6534526854,"nli_farstail_precision":0.7279477253,"nli_farstail_recall":0.6559403118,"nli_farstail_fscore":0.6402480245,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6918158568,"nli_farstail_precision_modified":0.76120773,"nli_farstail_recall_modified":0.696633339,"nli_farstail_fscore_modified":0.6637995215,"nli_farstail_acc":0.6918158568,"nli_farstail_precision":0.76120773,"nli_farstail_recall":0.696633339,"nli_farstail_fscore":0.6637995215,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7007672634,"nli_farstail_precision_modified":0.7596784307,"nli_farstail_recall_modified":0.7039816989,"nli_farstail_fscore_modified":0.6834876952,"nli_farstail_acc":0.7007672634,"nli_farstail_precision":0.7596784307,"nli_farstail_recall":0.7039816989,"nli_farstail_fscore":0.6834876952,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","nli_farstail_acc_modified":0.716112532,"nli_farstail_precision_modified":0.7794942647,"nli_farstail_recall_modified":0.7185458002,"nli_farstail_fscore_modified":0.7094139725,"nli_farstail_acc":0.716112532,"nli_farstail_precision":0.7794942647,"nli_farstail_recall":0.7185458002,"nli_farstail_fscore":0.7094139725,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.0249360614,"nli_farstail_precision_modified":0.0160900081,"nli_farstail_recall_modified":0.0174126172,"nli_farstail_fscore_modified":0.0167242212,"nli_farstail_acc":0.9069767442,"nli_farstail_precision":0.5852272727,"nli_farstail_recall":0.6333333333,"nli_farstail_fscore":0.6082949309,"nli_farstail_valid_output_ratio":0.0274936061,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7461636829,"nli_farstail_precision_modified":0.8279044878,"nli_farstail_recall_modified":0.7431719278,"nli_farstail_fscore_modified":0.7484099134,"nli_farstail_acc":0.7461636829,"nli_farstail_precision":0.8279044878,"nli_farstail_recall":0.7431719278,"nli_farstail_fscore":0.7484099134,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.618286445,"nli_farstail_precision_modified":0.6499905475,"nli_farstail_recall_modified":0.6180562888,"nli_farstail_fscore_modified":0.612547215,"nli_farstail_acc":0.6254851229,"nli_farstail_precision":0.6575583547,"nli_farstail_recall":0.625252287,"nli_farstail_fscore":0.6196790713,"nli_farstail_valid_output_ratio":0.9884910486,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.7405509289,"nli_farstail_precision_modified":0.79378989,"nli_farstail_recall_modified":0.7441180803,"nli_farstail_fscore_modified":0.7266455427,"nli_farstail_acc":0.7405509289,"nli_farstail_precision":0.79378989,"nli_farstail_recall":0.7441180803,"nli_farstail_fscore":0.7266455427,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","nli_farstail_acc_modified":0.1726342711,"nli_farstail_precision_modified":0.1856398147,"nli_farstail_recall_modified":0.156398243,"nli_farstail_fscore_modified":0.1549950666,"nli_farstail_acc":0.7277628032,"nli_farstail_precision":0.7825894076,"nli_farstail_recall":0.6593176606,"nli_farstail_fscore":0.6534023831,"nli_farstail_valid_output_ratio":0.2372122762,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.3433503836,"nli_farstail_precision_modified":0.5618320225,"nli_farstail_recall_modified":0.3440157631,"nli_farstail_fscore_modified":0.279029917,"nli_farstail_acc":0.3435700576,"nli_farstail_precision":0.56219148,"nli_farstail_recall":0.3442358627,"nli_farstail_fscore":0.279208439,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","nli_farstail_acc_modified":0.378516624,"nli_farstail_precision_modified":0.4433198503,"nli_farstail_recall_modified":0.3422920715,"nli_farstail_fscore_modified":0.347492956,"nli_farstail_acc":0.6932084309,"nli_farstail_precision":0.8118878757,"nli_farstail_recall":0.626867447,"nli_farstail_fscore":0.636392252,"nli_farstail_valid_output_ratio":0.5460358056,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","nli_farstail_acc_modified":0.726342711,"nli_farstail_precision_modified":0.8062451443,"nli_farstail_recall_modified":0.7314466615,"nli_farstail_fscore_modified":0.6980605986,"nli_farstail_acc":0.726342711,"nli_farstail_precision":0.8062451443,"nli_farstail_recall":0.7314466615,"nli_farstail_fscore":0.6980605986,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7410485934,"nli_farstail_precision_modified":0.7633275849,"nli_farstail_recall_modified":0.7423464162,"nli_farstail_fscore_modified":0.7375659033,"nli_farstail_acc":0.7410485934,"nli_farstail_precision":0.7633275849,"nli_farstail_recall":0.7423464162,"nli_farstail_fscore":0.7375659033,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8689258312,"nli_farstail_precision_modified":0.8809296764,"nli_farstail_recall_modified":0.8678121788,"nli_farstail_fscore_modified":0.8682707156,"nli_farstail_acc":0.8689258312,"nli_farstail_precision":0.8809296764,"nli_farstail_recall":0.8678121788,"nli_farstail_fscore":0.8682707156,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7647058824,"nli_farstail_precision_modified":0.7814499507,"nli_farstail_recall_modified":0.7670439826,"nli_farstail_fscore_modified":0.7573199649,"nli_farstail_acc":0.7656850192,"nli_farstail_precision":0.7824505269,"nli_farstail_recall":0.7680261132,"nli_farstail_fscore":0.7582896447,"nli_farstail_valid_output_ratio":0.9987212276,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5051150895,"nli_farstail_precision_modified":0.5526701994,"nli_farstail_recall_modified":0.4383899815,"nli_farstail_fscore_modified":0.4569544839,"nli_farstail_acc":0.8085977482,"nli_farstail_precision":0.8847248637,"nli_farstail_recall":0.7017829387,"nli_farstail_fscore":0.7315013438,"nli_farstail_valid_output_ratio":0.6246803069,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6943734015,"nli_farstail_precision_modified":0.7566862174,"nli_farstail_recall_modified":0.698049667,"nli_farstail_fscore_modified":0.679445114,"nli_farstail_acc":0.6943734015,"nli_farstail_precision":0.7566862174,"nli_farstail_recall":0.698049667,"nli_farstail_fscore":0.679445114,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7333759591,"nli_farstail_precision_modified":0.7691251939,"nli_farstail_recall_modified":0.7368411575,"nli_farstail_fscore_modified":0.7229770101,"nli_farstail_acc":0.7333759591,"nli_farstail_precision":0.7691251939,"nli_farstail_recall":0.7368411575,"nli_farstail_fscore":0.7229770101,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.6617647059,"nli_farstail_precision_modified":0.7729519221,"nli_farstail_recall_modified":0.6672320962,"nli_farstail_fscore_modified":0.6191223906,"nli_farstail_acc":0.6617647059,"nli_farstail_precision":0.7729519221,"nli_farstail_recall":0.6672320962,"nli_farstail_fscore":0.6191223906,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.537084399,"nli_farstail_precision_modified":0.7429511025,"nli_farstail_recall_modified":0.5428343437,"nli_farstail_fscore_modified":0.4522202373,"nli_farstail_acc":0.537084399,"nli_farstail_precision":0.7429511025,"nli_farstail_recall":0.5428343437,"nli_farstail_fscore":0.4522202373,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7001278772,"nli_farstail_precision_modified":0.7089877668,"nli_farstail_recall_modified":0.701635311,"nli_farstail_fscore_modified":0.6963810855,"nli_farstail_acc":0.7001278772,"nli_farstail_precision":0.7089877668,"nli_farstail_recall":0.701635311,"nli_farstail_fscore":0.6963810855,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6592071611,"nli_farstail_precision_modified":0.7292371837,"nli_farstail_recall_modified":0.6555663858,"nli_farstail_fscore_modified":0.6172863539,"nli_farstail_acc":0.6621708414,"nli_farstail_precision":0.7325157067,"nli_farstail_recall":0.6585136977,"nli_farstail_fscore":0.6200615655,"nli_farstail_valid_output_ratio":0.9955242967,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","nli_farstail_acc_modified":0.5421994885,"nli_farstail_precision_modified":0.5647711826,"nli_farstail_recall_modified":0.5444660793,"nli_farstail_fscore_modified":0.5273172992,"nli_farstail_acc":0.5449871465,"nli_farstail_precision":0.5676748904,"nli_farstail_recall":0.5472653908,"nli_farstail_fscore":0.5300284421,"nli_farstail_valid_output_ratio":0.9948849105,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.8254475703,"nli_farstail_precision_modified":0.8384109819,"nli_farstail_recall_modified":0.8264814456,"nli_farstail_fscore_modified":0.8238714462,"nli_farstail_acc":0.8254475703,"nli_farstail_precision":0.8384109819,"nli_farstail_recall":0.8264814456,"nli_farstail_fscore":0.8238714462,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6854219949,"nli_farstail_precision_modified":0.7452254514,"nli_farstail_recall_modified":0.6884495258,"nli_farstail_fscore_modified":0.6690112082,"nli_farstail_acc":0.6858605246,"nli_farstail_precision":0.7457022432,"nli_farstail_recall":0.6888899926,"nli_farstail_fscore":0.6694392384,"nli_farstail_valid_output_ratio":0.9993606138,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","nli_farstail_acc_modified":0.6086956522,"nli_farstail_precision_modified":0.6940003558,"nli_farstail_recall_modified":0.6092669096,"nli_farstail_fscore_modified":0.5908473619,"nli_farstail_acc":0.6110397946,"nli_farstail_precision":0.6966730144,"nli_farstail_recall":0.611613252,"nli_farstail_fscore":0.593122769,"nli_farstail_valid_output_ratio":0.9961636829,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","nli_farstail_acc_modified":0.0,"nli_farstail_precision_modified":0.0,"nli_farstail_recall_modified":0.0,"nli_farstail_fscore_modified":0.0,"nli_farstail_acc":0.0,"nli_farstail_precision":0.0,"nli_farstail_recall":0.0,"nli_farstail_fscore":0.0,"nli_farstail_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","nli_farstail_acc_modified":0.7677543186,"nli_farstail_precision_modified":0.8124976099,"nli_farstail_recall_modified":0.77106749,"nli_farstail_fscore_modified":0.7600055287,"nli_farstail_acc":0.7677543186,"nli_farstail_precision":0.8124976099,"nli_farstail_recall":0.77106749,"nli_farstail_fscore":0.7600055287,"nli_farstail_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/paraphrase-detection_FarsiParaphraseDetection.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8960759725,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8314059789,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8433777185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8634020619,"paraphrase-detection_FarsiParaphraseDetection_precision":0.904159132,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8389057751,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8509855072,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.3627075351,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.3434772816,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.3695590113,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.353505411,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9220779221,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8731906218,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9394957983,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8986842105,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.3933588761,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_acc":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5823754789,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6860358387,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5643977685,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5413041169,"paraphrase-detection_FarsiParaphraseDetection_acc":0.6940639269,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8176043557,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6726384365,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.6451158653,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.8390804598,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_acc":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5210727969,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.2605363985,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.4559386973,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.3315917799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5714285714,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2857142857,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3636363636,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9118773946,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.091954023,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0459770115,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0791826309,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0581749941,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5806451613,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2903225806,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3673469388,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.1583652618,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8697318008,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057190558,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8481376599,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8593214965,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8901660281,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9194756554,"paraphrase-detection_FarsiParaphraseDetection_recall":0.871641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8825816756,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8595146871,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9014336918,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8358208955,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8471137461,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":null,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":null,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":null,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":null,"paraphrase-detection_FarsiParaphraseDetection_acc":null,"paraphrase-detection_FarsiParaphraseDetection_precision":null,"paraphrase-detection_FarsiParaphraseDetection_recall":null,"paraphrase-detection_FarsiParaphraseDetection_fscore":null,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8812260536,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9126075915,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8615704957,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8725766572,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9029374202,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9262452107,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8869436301,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.897066353,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8390804598,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8902439024,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8119402985,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8225473409,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8952745849,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9226415094,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8776119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8883498185,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8960759725,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8314059789,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8433777185,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8634020619,"paraphrase-detection_FarsiParaphraseDetection_precision":0.904159132,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8389057751,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8509855072,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_acc":0.680715198,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8209169054,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6268656716,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.5933059088,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.3627075351,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.3434772816,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.3695590113,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.353505411,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9220779221,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8731906218,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9394957983,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8986842105,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.3933588761,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_acc":0.846743295,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8943661972,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8208955224,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8318539728,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5823754789,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6860358387,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5643977685,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.5413041169,"paraphrase-detection_FarsiParaphraseDetection_acc":0.6940639269,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8176043557,"paraphrase-detection_FarsiParaphraseDetection_recall":0.6726384365,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.6451158653,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.8390804598,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5095785441,"paraphrase-detection_FarsiParaphraseDetection_precision":0.6043173519,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5590051972,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.4734880854,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_acc":0.908045977,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9307692308,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8925373134,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9026092485,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_acc":0.877394636,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9117647059,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8567164179,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8679892098,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8326947637,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8868739206,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8044776119,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.81470067,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8659003831,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9050632911,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8432835821,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8546320936,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8607918263,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9021543986,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8373134328,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8486231942,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8876117497,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9179104478,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8686567164,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8796831783,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8888888889,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9186915888,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8701492537,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8811336459,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8212005109,"paraphrase-detection_FarsiParaphraseDetection_precision":0.880952381,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7910447761,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8003569607,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8939974457,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9218455744,"paraphrase-detection_FarsiParaphraseDetection_recall":0.876119403,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8869113391,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9169859515,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9233165065,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.9095332885,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9144938271,"paraphrase-detection_FarsiParaphraseDetection_acc":0.925257732,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9316453926,"paraphrase-detection_FarsiParaphraseDetection_recall":0.9177378414,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9227431271,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9910600255,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8045977011,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8727121464,"paraphrase-detection_FarsiParaphraseDetection_recall":0.771641791,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.779104351,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_acc":0.7982120051,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8696369637,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7641791045,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7707505633,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8722860792,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9057423702,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8514992004,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8624887603,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8799489144,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9132841328,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8597014925,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8709280303,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_acc":0.9131545338,"paraphrase-detection_FarsiParaphraseDetection_precision":0.9329661667,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8988839286,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.9083655739,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.7777777778,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8343364681,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.7575227312,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.7618590799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8152610442,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8745454545,"paraphrase-detection_FarsiParaphraseDetection_recall":0.7940298507,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.7985751802,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9540229885,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.5210727969,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.2605363985,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.4559386973,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.3315917799,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5714285714,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2857142857,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3636363636,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.9118773946,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.091954023,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.0459770115,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.0791826309,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.0581749941,"paraphrase-detection_FarsiParaphraseDetection_acc":0.5806451613,"paraphrase-detection_FarsiParaphraseDetection_precision":0.2903225806,"paraphrase-detection_FarsiParaphraseDetection_recall":0.5,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.3673469388,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":0.1583652618,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_FarsiParaphraseDetection_acc_modified":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision_modified":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall_modified":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore_modified":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_acc":0.8556832695,"paraphrase-detection_FarsiParaphraseDetection_precision":0.8992869875,"paraphrase-detection_FarsiParaphraseDetection_recall":0.8313432836,"paraphrase-detection_FarsiParaphraseDetection_fscore":0.8425676986,"paraphrase-detection_FarsiParaphraseDetection_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/paraphrase-detection_parsinlu.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.844,"paraphrase-detection_parsinlu_precision_modified":0.8671683358,"paraphrase-detection_parsinlu_recall_modified":0.8248878009,"paraphrase-detection_parsinlu_fscore_modified":0.8334216056,"paraphrase-detection_parsinlu_acc":0.844,"paraphrase-detection_parsinlu_precision":0.8671683358,"paraphrase-detection_parsinlu_recall":0.8248878009,"paraphrase-detection_parsinlu_fscore":0.8334216056,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.878,"paraphrase-detection_parsinlu_precision_modified":0.8823390152,"paraphrase-detection_parsinlu_recall_modified":0.8689922481,"paraphrase-detection_parsinlu_fscore_modified":0.8736280355,"paraphrase-detection_parsinlu_acc":0.878,"paraphrase-detection_parsinlu_precision":0.8823390152,"paraphrase-detection_parsinlu_recall":0.8689922481,"paraphrase-detection_parsinlu_fscore":0.8736280355,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8806824921,"paraphrase-detection_parsinlu_recall_modified":0.8666666667,"paraphrase-detection_parsinlu_fscore_modified":0.871456768,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8806824921,"paraphrase-detection_parsinlu_recall":0.8666666667,"paraphrase-detection_parsinlu_fscore":0.871456768,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8735210118,"paraphrase-detection_parsinlu_recall_modified":0.8735210118,"paraphrase-detection_parsinlu_fscore_modified":0.8735210118,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8735210118,"paraphrase-detection_parsinlu_recall":0.8735210118,"paraphrase-detection_parsinlu_fscore":0.8735210118,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.744,"paraphrase-detection_parsinlu_precision_modified":0.7517357255,"paraphrase-detection_parsinlu_recall_modified":0.7357334824,"paraphrase-detection_parsinlu_fscore_modified":0.7395,"paraphrase-detection_parsinlu_acc":0.8416289593,"paraphrase-detection_parsinlu_precision":0.85037978,"paraphrase-detection_parsinlu_recall":0.832277695,"paraphrase-detection_parsinlu_fscore":0.8365384615,"paraphrase-detection_parsinlu_valid_output_ratio":0.884,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.274,"paraphrase-detection_parsinlu_precision_modified":0.2627667984,"paraphrase-detection_parsinlu_recall_modified":0.2313142857,"paraphrase-detection_parsinlu_fscore_modified":0.2414462011,"paraphrase-detection_parsinlu_acc":0.85625,"paraphrase-detection_parsinlu_precision":0.8211462451,"paraphrase-detection_parsinlu_recall":0.7228571429,"paraphrase-detection_parsinlu_fscore":0.7545193783,"paraphrase-detection_parsinlu_valid_output_ratio":0.32,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.8406006212,"paraphrase-detection_parsinlu_recall_modified":0.7272949816,"paraphrase-detection_parsinlu_fscore_modified":0.7279551449,"paraphrase-detection_parsinlu_acc":0.764,"paraphrase-detection_parsinlu_precision":0.8406006212,"paraphrase-detection_parsinlu_recall":0.7272949816,"paraphrase-detection_parsinlu_fscore":0.7279551449,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.672,"paraphrase-detection_parsinlu_precision_modified":0.7868195779,"paraphrase-detection_parsinlu_recall_modified":0.6265486372,"paraphrase-detection_parsinlu_fscore_modified":0.5973050157,"paraphrase-detection_parsinlu_acc":0.6801619433,"paraphrase-detection_parsinlu_precision":0.796376091,"paraphrase-detection_parsinlu_recall":0.6341585396,"paraphrase-detection_parsinlu_fscore":0.6045597325,"paraphrase-detection_parsinlu_valid_output_ratio":0.988,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.576,"paraphrase-detection_parsinlu_precision_modified":0.6352777945,"paraphrase-detection_parsinlu_recall_modified":0.6097919217,"paraphrase-detection_parsinlu_fscore_modified":0.5654241624,"paraphrase-detection_parsinlu_acc":0.576,"paraphrase-detection_parsinlu_precision":0.6352777945,"paraphrase-detection_parsinlu_recall":0.6097919217,"paraphrase-detection_parsinlu_fscore":0.5654241624,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.864,"paraphrase-detection_parsinlu_precision_modified":0.8615596015,"paraphrase-detection_parsinlu_recall_modified":0.8607099143,"paraphrase-detection_parsinlu_fscore_modified":0.8611201882,"paraphrase-detection_parsinlu_acc":0.864,"paraphrase-detection_parsinlu_precision":0.8615596015,"paraphrase-detection_parsinlu_recall":0.8607099143,"paraphrase-detection_parsinlu_fscore":0.8611201882,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.828,"paraphrase-detection_parsinlu_precision_modified":0.8504577445,"paraphrase-detection_parsinlu_recall_modified":0.807996736,"paraphrase-detection_parsinlu_fscore_modified":0.8159372646,"paraphrase-detection_parsinlu_acc":0.828,"paraphrase-detection_parsinlu_precision":0.8504577445,"paraphrase-detection_parsinlu_recall":0.807996736,"paraphrase-detection_parsinlu_fscore":0.8159372646,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.804,"paraphrase-detection_parsinlu_precision_modified":0.8556500813,"paraphrase-detection_parsinlu_recall_modified":0.7749490004,"paraphrase-detection_parsinlu_fscore_modified":0.7822570611,"paraphrase-detection_parsinlu_acc":0.804,"paraphrase-detection_parsinlu_precision":0.8556500813,"paraphrase-detection_parsinlu_recall":0.7749490004,"paraphrase-detection_parsinlu_fscore":0.7822570611,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.85,"paraphrase-detection_parsinlu_precision_modified":0.8811252269,"paraphrase-detection_parsinlu_recall_modified":0.8290085679,"paraphrase-detection_parsinlu_fscore_modified":0.8385784573,"paraphrase-detection_parsinlu_acc":0.85,"paraphrase-detection_parsinlu_precision":0.8811252269,"paraphrase-detection_parsinlu_recall":0.8290085679,"paraphrase-detection_parsinlu_fscore":0.8385784573,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.88,"paraphrase-detection_parsinlu_precision_modified":0.8945054945,"paraphrase-detection_parsinlu_recall_modified":0.8661770706,"paraphrase-detection_parsinlu_fscore_modified":0.8738965952,"paraphrase-detection_parsinlu_acc":0.88,"paraphrase-detection_parsinlu_precision":0.8945054945,"paraphrase-detection_parsinlu_recall":0.8661770706,"paraphrase-detection_parsinlu_fscore":0.8738965952,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.868,"paraphrase-detection_parsinlu_precision_modified":0.8786613063,"paraphrase-detection_parsinlu_recall_modified":0.8550795594,"paraphrase-detection_parsinlu_fscore_modified":0.8617882093,"paraphrase-detection_parsinlu_acc":0.868,"paraphrase-detection_parsinlu_precision":0.8786613063,"paraphrase-detection_parsinlu_recall":0.8550795594,"paraphrase-detection_parsinlu_fscore":0.8617882093,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.802,"paraphrase-detection_parsinlu_precision_modified":0.854490281,"paraphrase-detection_parsinlu_recall_modified":0.772623419,"paraphrase-detection_parsinlu_fscore_modified":0.7797249009,"paraphrase-detection_parsinlu_acc":0.802,"paraphrase-detection_parsinlu_precision":0.854490281,"paraphrase-detection_parsinlu_recall":0.772623419,"paraphrase-detection_parsinlu_fscore":0.7797249009,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.888,"paraphrase-detection_parsinlu_precision_modified":0.8885737506,"paraphrase-detection_parsinlu_recall_modified":0.8823337413,"paraphrase-detection_parsinlu_fscore_modified":0.8849043884,"paraphrase-detection_parsinlu_acc":0.888,"paraphrase-detection_parsinlu_precision":0.8885737506,"paraphrase-detection_parsinlu_recall":0.8823337413,"paraphrase-detection_parsinlu_fscore":0.8849043884,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8426666667,"paraphrase-detection_parsinlu_recall_modified":0.7621379029,"paraphrase-detection_parsinlu_fscore_modified":0.7682709447,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8426666667,"paraphrase-detection_parsinlu_recall":0.7621379029,"paraphrase-detection_parsinlu_fscore":0.7682709447,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.634,"paraphrase-detection_parsinlu_precision_modified":0.7237713267,"paraphrase-detection_parsinlu_recall_modified":0.6720930233,"paraphrase-detection_parsinlu_fscore_modified":0.6220242152,"paraphrase-detection_parsinlu_acc":0.634,"paraphrase-detection_parsinlu_precision":0.7237713267,"paraphrase-detection_parsinlu_recall":0.6720930233,"paraphrase-detection_parsinlu_fscore":0.6220242152,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.848,"paraphrase-detection_parsinlu_precision_modified":0.8717792656,"paraphrase-detection_parsinlu_recall_modified":0.8289677683,"paraphrase-detection_parsinlu_fscore_modified":0.8376928465,"paraphrase-detection_parsinlu_acc":0.848,"paraphrase-detection_parsinlu_precision":0.8717792656,"paraphrase-detection_parsinlu_recall":0.8289677683,"paraphrase-detection_parsinlu_fscore":0.8376928465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.57,"paraphrase-detection_parsinlu_precision_modified":0.285,"paraphrase-detection_parsinlu_recall_modified":0.5,"paraphrase-detection_parsinlu_fscore_modified":0.3630573248,"paraphrase-detection_parsinlu_acc":0.57,"paraphrase-detection_parsinlu_precision":0.285,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3630573248,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.072,"paraphrase-detection_parsinlu_precision_modified":0.036,"paraphrase-detection_parsinlu_recall_modified":0.066,"paraphrase-detection_parsinlu_fscore_modified":0.0465882353,"paraphrase-detection_parsinlu_acc":0.5454545455,"paraphrase-detection_parsinlu_precision":0.2727272727,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3529411765,"paraphrase-detection_parsinlu_valid_output_ratio":0.132,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.814,"paraphrase-detection_parsinlu_precision_modified":0.8488669447,"paraphrase-detection_parsinlu_recall_modified":0.7894328845,"paraphrase-detection_parsinlu_fscore_modified":0.7974306363,"paraphrase-detection_parsinlu_acc":0.814,"paraphrase-detection_parsinlu_precision":0.8488669447,"paraphrase-detection_parsinlu_recall":0.7894328845,"paraphrase-detection_parsinlu_fscore":0.7974306363,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.844,"paraphrase-detection_parsinlu_precision_modified":0.8671683358,"paraphrase-detection_parsinlu_recall_modified":0.8248878009,"paraphrase-detection_parsinlu_fscore_modified":0.8334216056,"paraphrase-detection_parsinlu_acc":0.844,"paraphrase-detection_parsinlu_precision":0.8671683358,"paraphrase-detection_parsinlu_recall":0.8248878009,"paraphrase-detection_parsinlu_fscore":0.8334216056,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8416530278,"paraphrase-detection_parsinlu_recall_modified":0.8270501836,"paraphrase-detection_parsinlu_fscore_modified":0.8316645261,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8416530278,"paraphrase-detection_parsinlu_recall":0.8270501836,"paraphrase-detection_parsinlu_fscore":0.8316645261,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.872,"paraphrase-detection_parsinlu_precision_modified":0.872593554,"paraphrase-detection_parsinlu_recall_modified":0.8654426765,"paraphrase-detection_parsinlu_fscore_modified":0.8682824025,"paraphrase-detection_parsinlu_acc":0.872,"paraphrase-detection_parsinlu_precision":0.872593554,"paraphrase-detection_parsinlu_recall":0.8654426765,"paraphrase-detection_parsinlu_fscore":0.8682824025,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.866,"paraphrase-detection_parsinlu_precision_modified":0.8771198269,"paraphrase-detection_parsinlu_recall_modified":0.852753978,"paraphrase-detection_parsinlu_fscore_modified":0.8595688134,"paraphrase-detection_parsinlu_acc":0.866,"paraphrase-detection_parsinlu_precision":0.8771198269,"paraphrase-detection_parsinlu_recall":0.852753978,"paraphrase-detection_parsinlu_fscore":0.8595688134,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.861267166,"paraphrase-detection_parsinlu_recall_modified":0.802243982,"paraphrase-detection_parsinlu_fscore_modified":0.8109673691,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.861267166,"paraphrase-detection_parsinlu_recall":0.802243982,"paraphrase-detection_parsinlu_fscore":0.8109673691,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":null,"paraphrase-detection_parsinlu_precision_modified":null,"paraphrase-detection_parsinlu_recall_modified":null,"paraphrase-detection_parsinlu_fscore_modified":null,"paraphrase-detection_parsinlu_acc":null,"paraphrase-detection_parsinlu_precision":null,"paraphrase-detection_parsinlu_recall":null,"paraphrase-detection_parsinlu_fscore":null,"paraphrase-detection_parsinlu_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.878,"paraphrase-detection_parsinlu_precision_modified":0.8823390152,"paraphrase-detection_parsinlu_recall_modified":0.8689922481,"paraphrase-detection_parsinlu_fscore_modified":0.8736280355,"paraphrase-detection_parsinlu_acc":0.878,"paraphrase-detection_parsinlu_precision":0.8823390152,"paraphrase-detection_parsinlu_recall":0.8689922481,"paraphrase-detection_parsinlu_fscore":0.8736280355,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8806824921,"paraphrase-detection_parsinlu_recall_modified":0.8666666667,"paraphrase-detection_parsinlu_fscore_modified":0.871456768,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8806824921,"paraphrase-detection_parsinlu_recall":0.8666666667,"paraphrase-detection_parsinlu_fscore":0.871456768,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.854,"paraphrase-detection_parsinlu_precision_modified":0.8742015099,"paraphrase-detection_parsinlu_recall_modified":0.8365157079,"paraphrase-detection_parsinlu_fscore_modified":0.8449177639,"paraphrase-detection_parsinlu_acc":0.854,"paraphrase-detection_parsinlu_precision":0.8742015099,"paraphrase-detection_parsinlu_recall":0.8365157079,"paraphrase-detection_parsinlu_fscore":0.8449177639,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.876,"paraphrase-detection_parsinlu_precision_modified":0.8735210118,"paraphrase-detection_parsinlu_recall_modified":0.8735210118,"paraphrase-detection_parsinlu_fscore_modified":0.8735210118,"paraphrase-detection_parsinlu_acc":0.876,"paraphrase-detection_parsinlu_precision":0.8735210118,"paraphrase-detection_parsinlu_recall":0.8735210118,"paraphrase-detection_parsinlu_fscore":0.8735210118,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.744,"paraphrase-detection_parsinlu_precision_modified":0.7517357255,"paraphrase-detection_parsinlu_recall_modified":0.7357334824,"paraphrase-detection_parsinlu_fscore_modified":0.7395,"paraphrase-detection_parsinlu_acc":0.8416289593,"paraphrase-detection_parsinlu_precision":0.85037978,"paraphrase-detection_parsinlu_recall":0.832277695,"paraphrase-detection_parsinlu_fscore":0.8365384615,"paraphrase-detection_parsinlu_valid_output_ratio":0.884,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.66,"paraphrase-detection_parsinlu_precision_modified":0.7933390651,"paraphrase-detection_parsinlu_recall_modified":0.6057935537,"paraphrase-detection_parsinlu_fscore_modified":0.5625411726,"paraphrase-detection_parsinlu_acc":0.66,"paraphrase-detection_parsinlu_precision":0.7933390651,"paraphrase-detection_parsinlu_recall":0.6057935537,"paraphrase-detection_parsinlu_fscore":0.5625411726,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.274,"paraphrase-detection_parsinlu_precision_modified":0.2627667984,"paraphrase-detection_parsinlu_recall_modified":0.2313142857,"paraphrase-detection_parsinlu_fscore_modified":0.2414462011,"paraphrase-detection_parsinlu_acc":0.85625,"paraphrase-detection_parsinlu_precision":0.8211462451,"paraphrase-detection_parsinlu_recall":0.7228571429,"paraphrase-detection_parsinlu_fscore":0.7545193783,"paraphrase-detection_parsinlu_valid_output_ratio":0.32,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.764,"paraphrase-detection_parsinlu_precision_modified":0.8406006212,"paraphrase-detection_parsinlu_recall_modified":0.7272949816,"paraphrase-detection_parsinlu_fscore_modified":0.7279551449,"paraphrase-detection_parsinlu_acc":0.764,"paraphrase-detection_parsinlu_precision":0.8406006212,"paraphrase-detection_parsinlu_recall":0.7272949816,"paraphrase-detection_parsinlu_fscore":0.7279551449,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.672,"paraphrase-detection_parsinlu_precision_modified":0.7868195779,"paraphrase-detection_parsinlu_recall_modified":0.6265486372,"paraphrase-detection_parsinlu_fscore_modified":0.5973050157,"paraphrase-detection_parsinlu_acc":0.6801619433,"paraphrase-detection_parsinlu_precision":0.796376091,"paraphrase-detection_parsinlu_recall":0.6341585396,"paraphrase-detection_parsinlu_fscore":0.6045597325,"paraphrase-detection_parsinlu_valid_output_ratio":0.988,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.576,"paraphrase-detection_parsinlu_precision_modified":0.6352777945,"paraphrase-detection_parsinlu_recall_modified":0.6097919217,"paraphrase-detection_parsinlu_fscore_modified":0.5654241624,"paraphrase-detection_parsinlu_acc":0.576,"paraphrase-detection_parsinlu_precision":0.6352777945,"paraphrase-detection_parsinlu_recall":0.6097919217,"paraphrase-detection_parsinlu_fscore":0.5654241624,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.864,"paraphrase-detection_parsinlu_precision_modified":0.8615596015,"paraphrase-detection_parsinlu_recall_modified":0.8607099143,"paraphrase-detection_parsinlu_fscore_modified":0.8611201882,"paraphrase-detection_parsinlu_acc":0.864,"paraphrase-detection_parsinlu_precision":0.8615596015,"paraphrase-detection_parsinlu_recall":0.8607099143,"paraphrase-detection_parsinlu_fscore":0.8611201882,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.828,"paraphrase-detection_parsinlu_precision_modified":0.8504577445,"paraphrase-detection_parsinlu_recall_modified":0.807996736,"paraphrase-detection_parsinlu_fscore_modified":0.8159372646,"paraphrase-detection_parsinlu_acc":0.828,"paraphrase-detection_parsinlu_precision":0.8504577445,"paraphrase-detection_parsinlu_recall":0.807996736,"paraphrase-detection_parsinlu_fscore":0.8159372646,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.804,"paraphrase-detection_parsinlu_precision_modified":0.8556500813,"paraphrase-detection_parsinlu_recall_modified":0.7749490004,"paraphrase-detection_parsinlu_fscore_modified":0.7822570611,"paraphrase-detection_parsinlu_acc":0.804,"paraphrase-detection_parsinlu_precision":0.8556500813,"paraphrase-detection_parsinlu_recall":0.7749490004,"paraphrase-detection_parsinlu_fscore":0.7822570611,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.85,"paraphrase-detection_parsinlu_precision_modified":0.8811252269,"paraphrase-detection_parsinlu_recall_modified":0.8290085679,"paraphrase-detection_parsinlu_fscore_modified":0.8385784573,"paraphrase-detection_parsinlu_acc":0.85,"paraphrase-detection_parsinlu_precision":0.8811252269,"paraphrase-detection_parsinlu_recall":0.8290085679,"paraphrase-detection_parsinlu_fscore":0.8385784573,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.838,"paraphrase-detection_parsinlu_precision_modified":0.8576776974,"paraphrase-detection_parsinlu_recall_modified":0.819624643,"paraphrase-detection_parsinlu_fscore_modified":0.8275649186,"paraphrase-detection_parsinlu_acc":0.838,"paraphrase-detection_parsinlu_precision":0.8576776974,"paraphrase-detection_parsinlu_recall":0.819624643,"paraphrase-detection_parsinlu_fscore":0.8275649186,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.88,"paraphrase-detection_parsinlu_precision_modified":0.8945054945,"paraphrase-detection_parsinlu_recall_modified":0.8661770706,"paraphrase-detection_parsinlu_fscore_modified":0.8738965952,"paraphrase-detection_parsinlu_acc":0.88,"paraphrase-detection_parsinlu_precision":0.8945054945,"paraphrase-detection_parsinlu_recall":0.8661770706,"paraphrase-detection_parsinlu_fscore":0.8738965952,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.868,"paraphrase-detection_parsinlu_precision_modified":0.8786613063,"paraphrase-detection_parsinlu_recall_modified":0.8550795594,"paraphrase-detection_parsinlu_fscore_modified":0.8617882093,"paraphrase-detection_parsinlu_acc":0.868,"paraphrase-detection_parsinlu_precision":0.8786613063,"paraphrase-detection_parsinlu_recall":0.8550795594,"paraphrase-detection_parsinlu_fscore":0.8617882093,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.802,"paraphrase-detection_parsinlu_precision_modified":0.854490281,"paraphrase-detection_parsinlu_recall_modified":0.772623419,"paraphrase-detection_parsinlu_fscore_modified":0.7797249009,"paraphrase-detection_parsinlu_acc":0.802,"paraphrase-detection_parsinlu_precision":0.854490281,"paraphrase-detection_parsinlu_recall":0.772623419,"paraphrase-detection_parsinlu_fscore":0.7797249009,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.888,"paraphrase-detection_parsinlu_precision_modified":0.8885737506,"paraphrase-detection_parsinlu_recall_modified":0.8823337413,"paraphrase-detection_parsinlu_fscore_modified":0.8849043884,"paraphrase-detection_parsinlu_acc":0.888,"paraphrase-detection_parsinlu_precision":0.8885737506,"paraphrase-detection_parsinlu_recall":0.8823337413,"paraphrase-detection_parsinlu_fscore":0.8849043884,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.778,"paraphrase-detection_parsinlu_precision_modified":0.8054432653,"paraphrase-detection_parsinlu_recall_modified":0.7971712985,"paraphrase-detection_parsinlu_fscore_modified":0.7776855183,"paraphrase-detection_parsinlu_acc":0.7842741935,"paraphrase-detection_parsinlu_precision":0.8119387755,"paraphrase-detection_parsinlu_recall":0.8036000993,"paraphrase-detection_parsinlu_fscore":0.7839571757,"paraphrase-detection_parsinlu_valid_output_ratio":0.992,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.792,"paraphrase-detection_parsinlu_precision_modified":0.8426666667,"paraphrase-detection_parsinlu_recall_modified":0.7621379029,"paraphrase-detection_parsinlu_fscore_modified":0.7682709447,"paraphrase-detection_parsinlu_acc":0.792,"paraphrase-detection_parsinlu_precision":0.8426666667,"paraphrase-detection_parsinlu_recall":0.7621379029,"paraphrase-detection_parsinlu_fscore":0.7682709447,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.798,"paraphrase-detection_parsinlu_precision_modified":0.8383696273,"paraphrase-detection_parsinlu_recall_modified":0.7708282334,"paraphrase-detection_parsinlu_fscore_modified":0.7777278949,"paraphrase-detection_parsinlu_acc":0.798,"paraphrase-detection_parsinlu_precision":0.8383696273,"paraphrase-detection_parsinlu_recall":0.7708282334,"paraphrase-detection_parsinlu_fscore":0.7777278949,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.634,"paraphrase-detection_parsinlu_precision_modified":0.7237713267,"paraphrase-detection_parsinlu_recall_modified":0.6720930233,"paraphrase-detection_parsinlu_fscore_modified":0.6220242152,"paraphrase-detection_parsinlu_acc":0.634,"paraphrase-detection_parsinlu_precision":0.7237713267,"paraphrase-detection_parsinlu_recall":0.6720930233,"paraphrase-detection_parsinlu_fscore":0.6220242152,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.848,"paraphrase-detection_parsinlu_precision_modified":0.8717792656,"paraphrase-detection_parsinlu_recall_modified":0.8289677683,"paraphrase-detection_parsinlu_fscore_modified":0.8376928465,"paraphrase-detection_parsinlu_acc":0.848,"paraphrase-detection_parsinlu_precision":0.8717792656,"paraphrase-detection_parsinlu_recall":0.8289677683,"paraphrase-detection_parsinlu_fscore":0.8376928465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.826,"paraphrase-detection_parsinlu_precision_modified":0.8267131595,"paraphrase-detection_parsinlu_recall_modified":0.8165238678,"paraphrase-detection_parsinlu_fscore_modified":0.8200389709,"paraphrase-detection_parsinlu_acc":0.826,"paraphrase-detection_parsinlu_precision":0.8267131595,"paraphrase-detection_parsinlu_recall":0.8165238678,"paraphrase-detection_parsinlu_fscore":0.8200389709,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.824,"paraphrase-detection_parsinlu_precision_modified":0.8599831541,"paraphrase-detection_parsinlu_recall_modified":0.7999184007,"paraphrase-detection_parsinlu_fscore_modified":0.8085591465,"paraphrase-detection_parsinlu_acc":0.824,"paraphrase-detection_parsinlu_precision":0.8599831541,"paraphrase-detection_parsinlu_recall":0.7999184007,"paraphrase-detection_parsinlu_fscore":0.8085591465,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.57,"paraphrase-detection_parsinlu_precision_modified":0.285,"paraphrase-detection_parsinlu_recall_modified":0.5,"paraphrase-detection_parsinlu_fscore_modified":0.3630573248,"paraphrase-detection_parsinlu_acc":0.57,"paraphrase-detection_parsinlu_precision":0.285,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3630573248,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","paraphrase-detection_parsinlu_acc_modified":0.072,"paraphrase-detection_parsinlu_precision_modified":0.036,"paraphrase-detection_parsinlu_recall_modified":0.066,"paraphrase-detection_parsinlu_fscore_modified":0.0465882353,"paraphrase-detection_parsinlu_acc":0.5454545455,"paraphrase-detection_parsinlu_precision":0.2727272727,"paraphrase-detection_parsinlu_recall":0.5,"paraphrase-detection_parsinlu_fscore":0.3529411765,"paraphrase-detection_parsinlu_valid_output_ratio":0.132,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","paraphrase-detection_parsinlu_acc_modified":0.814,"paraphrase-detection_parsinlu_precision_modified":0.8488669447,"paraphrase-detection_parsinlu_recall_modified":0.7894328845,"paraphrase-detection_parsinlu_fscore_modified":0.7974306363,"paraphrase-detection_parsinlu_acc":0.814,"paraphrase-detection_parsinlu_precision":0.8488669447,"paraphrase-detection_parsinlu_recall":0.7894328845,"paraphrase-detection_parsinlu_fscore":0.7974306363,"paraphrase-detection_parsinlu_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/persian_csr.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9427585507,"acc_strict":0.9427585507,"donyaeeqtesad_acc":0.9449541284,"isna_acc":0.9488188976,"ninisite_article_acc":0.9371584699,"virgool_4_acc":0.9601226994,"khabaronline_acc":0.9390243902,"digiato_acc":0.9493670886,"doctoreto_acc":0.9242424242,"sarzamindownload_acc":0.8926174497,"hamgardi_acc":0.9139465875,"bigbangpage_acc":0.9803921569,"wiki_ahlolbait_acc":0.96,"virgool_3_acc":0.9606060606,"virgool_2_acc":0.9566563467,"virgool_1_acc":0.9516129032,"hamshahrionline_acc":0.9566666667,"tabnak_acc":0.9535864979,"alibaba_acc":0.9370860927,"digikala_mag_acc":0.9693877551,"yjc_acc":0.9298245614,"beytoote_acc":0.9555555556,"asriran_acc":0.9215686275,"ecoiran_acc":0.9163987138,"hawzah_acc":0.9745762712,"zoomit_acc":0.9593301435,"wikipedia_acc":0.9854368932,"namnak_acc":0.9168975069,"khodro45_acc":0.9248120301,"fidibo_acc":0.9513274336,"newmiind_acc":0.8776223776,"taaghche_acc":0.9675324675,"motamem_acc":0.9680851064,"varzesh3_acc":0.9652777778,"mehrnews_acc":0.9306122449,"tasnim_acc":0.9365079365,"magerta_acc":0.8675213675,"radiokodak_book_acc":0.8181818182,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.9523809524,"farsroid_acc":0.8333333333,"parsiday_acc":0.9322033898,"soft98_acc":0.9,"ninisite_discussion_acc":0.9}
|
| 2 |
-
{"Model Name":"o3","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9218,"acc_strict":0.9218,"donyaeeqtesad_acc":0.9214501511,"isna_acc":0.90234375,"ninisite_article_acc":0.9027027027,"virgool_4_acc":0.9335347432,"khabaronline_acc":0.92,"digiato_acc":0.9185803758,"doctoreto_acc":0.92,"sarzamindownload_acc":0.9346405229,"hamgardi_acc":0.8879056047,"bigbangpage_acc":0.949044586,"wiki_ahlolbait_acc":0.9539473684,"virgool_3_acc":0.9373134328,"virgool_2_acc":0.9418960245,"virgool_1_acc":0.9240506329,"hamshahrionline_acc":0.9278688525,"tabnak_acc":0.9294605809,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9458917836,"yjc_acc":0.908045977,"beytoote_acc":0.9335180055,"asriran_acc":0.9130434783,"ecoiran_acc":0.8825396825,"hawzah_acc":0.9578651685,"zoomit_acc":0.9577464789,"wikipedia_acc":0.9761904762,"namnak_acc":0.9019073569,"khodro45_acc":0.9191176471,"fidibo_acc":0.9471365639,"newmiind_acc":0.8784722222,"taaghche_acc":0.9487179487,"motamem_acc":0.9894736842,"varzesh3_acc":0.9565217391,"mehrnews_acc":0.8991935484,"tasnim_acc":0.8884615385,"magerta_acc":0.8403361345,"radiokodak_book_acc":0.8260869565,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9696969697,"voolak_acc":0.8372093023,"farsroid_acc":0.8157894737,"parsiday_acc":0.8916666667,"soft98_acc":1.0,"ninisite_discussion_acc":0.6}
|
| 3 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9117647059,"acc_strict":0.9117647059,"donyaeeqtesad_acc":0.918429003,"isna_acc":0.90625,"ninisite_article_acc":0.9027027027,"virgool_4_acc":0.9244712991,"khabaronline_acc":0.936,"digiato_acc":0.9184100418,"doctoreto_acc":0.89,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.8702064897,"bigbangpage_acc":0.9363057325,"wiki_ahlolbait_acc":0.9671052632,"virgool_3_acc":0.9223880597,"virgool_2_acc":0.9235474006,"virgool_1_acc":0.914556962,"hamshahrionline_acc":0.9440789474,"tabnak_acc":0.9253112033,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9198396794,"yjc_acc":0.908045977,"beytoote_acc":0.9168975069,"asriran_acc":0.8888888889,"ecoiran_acc":0.8825396825,"hawzah_acc":0.9382022472,"zoomit_acc":0.9341176471,"wikipedia_acc":0.9476190476,"namnak_acc":0.8882833787,"khodro45_acc":0.9117647059,"fidibo_acc":0.9162995595,"newmiind_acc":0.8257839721,"taaghche_acc":0.9358974359,"motamem_acc":0.9684210526,"varzesh3_acc":0.9397993311,"mehrnews_acc":0.9233870968,"tasnim_acc":0.9115384615,"magerta_acc":0.8571428571,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8604651163,"farsroid_acc":0.7894736842,"parsiday_acc":0.8333333333,"soft98_acc":1.0,"ninisite_discussion_acc":0.8}
|
| 4 |
-
{"Model Name":"gpt-4.1","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8839,"acc_strict":0.8839,"donyaeeqtesad_acc":0.8610271903,"isna_acc":0.8359375,"ninisite_article_acc":0.8702702703,"virgool_4_acc":0.9033232628,"khabaronline_acc":0.884,"digiato_acc":0.8705636743,"doctoreto_acc":0.885,"sarzamindownload_acc":0.8562091503,"hamgardi_acc":0.8377581121,"bigbangpage_acc":0.923566879,"wiki_ahlolbait_acc":0.9276315789,"virgool_3_acc":0.9104477612,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8607594937,"hamshahrionline_acc":0.9114754098,"tabnak_acc":0.887966805,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9118236473,"yjc_acc":0.867816092,"beytoote_acc":0.91966759,"asriran_acc":0.8792270531,"ecoiran_acc":0.8634920635,"hawzah_acc":0.9269662921,"zoomit_acc":0.9154929577,"wikipedia_acc":0.9428571429,"namnak_acc":0.8583106267,"khodro45_acc":0.8602941176,"fidibo_acc":0.9074889868,"newmiind_acc":0.8020833333,"taaghche_acc":0.8974358974,"motamem_acc":0.9263157895,"varzesh3_acc":0.9096989967,"mehrnews_acc":0.8508064516,"tasnim_acc":0.8307692308,"magerta_acc":0.8403361345,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8837209302,"farsroid_acc":0.8421052632,"parsiday_acc":0.8833333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.9}
|
| 5 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8717,"acc_strict":0.8714,"donyaeeqtesad_acc":0.8700906344,"isna_acc":0.8671875,"ninisite_article_acc":0.8621621622,"virgool_4_acc":0.9033232628,"khabaronline_acc":0.852,"digiato_acc":0.8747390397,"doctoreto_acc":0.9,"sarzamindownload_acc":0.8562091503,"hamgardi_acc":0.8230088496,"bigbangpage_acc":0.898089172,"wiki_ahlolbait_acc":0.8881578947,"virgool_3_acc":0.9104477612,"virgool_2_acc":0.8929663609,"virgool_1_acc":0.8765822785,"hamshahrionline_acc":0.8786885246,"tabnak_acc":0.8755186722,"alibaba_acc":0.9183006536,"digikala_mag_acc":0.8977955912,"yjc_acc":0.8563218391,"beytoote_acc":0.8864265928,"asriran_acc":0.8212560386,"ecoiran_acc":0.8476190476,"hawzah_acc":0.8960674157,"zoomit_acc":0.9084507042,"wikipedia_acc":0.9238095238,"namnak_acc":0.8310626703,"khodro45_acc":0.8382352941,"fidibo_acc":0.9207048458,"newmiind_acc":0.8194444444,"taaghche_acc":0.9038461538,"motamem_acc":0.9368421053,"varzesh3_acc":0.8829431438,"mehrnews_acc":0.8064516129,"tasnim_acc":0.8692307692,"magerta_acc":0.7605042017,"radiokodak_book_acc":0.8260869565,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.7894736842,"parsiday_acc":0.775,"soft98_acc":0.9,"ninisite_discussion_acc":0.8}
|
| 6 |
-
{"Model Name":"gpt-4o","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8665,"acc_strict":0.7832,"donyaeeqtesad_acc":0.8459214502,"isna_acc":0.8359375,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8882175227,"khabaronline_acc":0.896,"digiato_acc":0.8496868476,"doctoreto_acc":0.855,"sarzamindownload_acc":0.8235294118,"hamgardi_acc":0.8200589971,"bigbangpage_acc":0.9171974522,"wiki_ahlolbait_acc":0.9342105263,"virgool_3_acc":0.8656716418,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.9409836066,"tabnak_acc":0.887966805,"alibaba_acc":0.8529411765,"digikala_mag_acc":0.8817635271,"yjc_acc":0.8103448276,"beytoote_acc":0.8891966759,"asriran_acc":0.8550724638,"ecoiran_acc":0.8126984127,"hawzah_acc":0.9129213483,"zoomit_acc":0.9084507042,"wikipedia_acc":0.9285714286,"namnak_acc":0.8446866485,"khodro45_acc":0.8455882353,"fidibo_acc":0.872246696,"newmiind_acc":0.8125,"taaghche_acc":0.9038461538,"motamem_acc":0.9157894737,"varzesh3_acc":0.8996655518,"mehrnews_acc":0.814516129,"tasnim_acc":0.8461538462,"magerta_acc":0.781512605,"radiokodak_book_acc":0.7826086957,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9696969697,"voolak_acc":0.7674418605,"farsroid_acc":0.7105263158,"parsiday_acc":0.85,"soft98_acc":0.8,"ninisite_discussion_acc":0.9}
|
| 7 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8637863786,"acc_strict":0.8637863786,"donyaeeqtesad_acc":0.8398791541,"isna_acc":0.828125,"ninisite_article_acc":0.8243243243,"virgool_4_acc":0.8851963746,"khabaronline_acc":0.84,"digiato_acc":0.8747390397,"doctoreto_acc":0.895,"sarzamindownload_acc":0.8366013072,"hamgardi_acc":0.802359882,"bigbangpage_acc":0.898089172,"wiki_ahlolbait_acc":0.9407894737,"virgool_3_acc":0.8955223881,"virgool_2_acc":0.8899082569,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.8754098361,"tabnak_acc":0.8755186722,"alibaba_acc":0.8426229508,"digikala_mag_acc":0.877755511,"yjc_acc":0.816091954,"beytoote_acc":0.8836565097,"asriran_acc":0.8888888889,"ecoiran_acc":0.8126984127,"hawzah_acc":0.8904494382,"zoomit_acc":0.9084507042,"wikipedia_acc":0.919047619,"namnak_acc":0.8392370572,"khodro45_acc":0.8823529412,"fidibo_acc":0.9030837004,"newmiind_acc":0.8125,"taaghche_acc":0.9423076923,"motamem_acc":0.9157894737,"varzesh3_acc":0.8929765886,"mehrnews_acc":0.8427419355,"tasnim_acc":0.8153846154,"magerta_acc":0.7773109244,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":1.0,"wikishia_acc":0.9696969697,"voolak_acc":0.7441860465,"farsroid_acc":0.7894736842,"parsiday_acc":0.8166666667,"soft98_acc":1.0,"ninisite_discussion_acc":0.9}
|
| 8 |
-
{"Model Name":"o4-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8551,"acc_strict":0.8551,"donyaeeqtesad_acc":0.8429003021,"isna_acc":0.828125,"ninisite_article_acc":0.8540540541,"virgool_4_acc":0.8610271903,"khabaronline_acc":0.84,"digiato_acc":0.8830897704,"doctoreto_acc":0.87,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.796460177,"bigbangpage_acc":0.8853503185,"wiki_ahlolbait_acc":0.9013157895,"virgool_3_acc":0.871641791,"virgool_2_acc":0.9051987768,"virgool_1_acc":0.8481012658,"hamshahrionline_acc":0.8786885246,"tabnak_acc":0.8713692946,"alibaba_acc":0.8758169935,"digikala_mag_acc":0.879759519,"yjc_acc":0.7988505747,"beytoote_acc":0.8753462604,"asriran_acc":0.8260869565,"ecoiran_acc":0.8031746032,"hawzah_acc":0.8511235955,"zoomit_acc":0.8849765258,"wikipedia_acc":0.9285714286,"namnak_acc":0.8310626703,"khodro45_acc":0.8897058824,"fidibo_acc":0.872246696,"newmiind_acc":0.7881944444,"taaghche_acc":0.8974358974,"motamem_acc":0.9157894737,"varzesh3_acc":0.8762541806,"mehrnews_acc":0.8346774194,"tasnim_acc":0.8269230769,"magerta_acc":0.7941176471,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.7692307692,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.8157894737,"parsiday_acc":0.7583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.3}
|
| 9 |
-
{"Model Name":"deepseek-reasoner","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.825165033,"acc_strict":0.825165033,"donyaeeqtesad_acc":0.8247734139,"isna_acc":0.8046875,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.78,"digiato_acc":0.8267223382,"doctoreto_acc":0.835,"sarzamindownload_acc":0.7908496732,"hamgardi_acc":0.808259587,"bigbangpage_acc":0.9044585987,"wiki_ahlolbait_acc":0.9006622517,"virgool_3_acc":0.8268656716,"virgool_2_acc":0.8348623853,"virgool_1_acc":0.7911392405,"hamshahrionline_acc":0.8651315789,"tabnak_acc":0.8215767635,"alibaba_acc":0.8169934641,"digikala_mag_acc":0.8336673347,"yjc_acc":0.7873563218,"beytoote_acc":0.8310249307,"asriran_acc":0.8212560386,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8735955056,"zoomit_acc":0.8450704225,"wikipedia_acc":0.9,"namnak_acc":0.8337874659,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7604166667,"taaghche_acc":0.891025641,"motamem_acc":0.8947368421,"varzesh3_acc":0.8093645485,"mehrnews_acc":0.7782258065,"tasnim_acc":0.8115384615,"magerta_acc":0.7647058824,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.7368421053,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 10 |
-
{"Model Name":"deepseek-chat","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.8241,"acc_strict":0.5115,"donyaeeqtesad_acc":0.8096676737,"isna_acc":0.7734375,"ninisite_article_acc":0.8216216216,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.812,"digiato_acc":0.8141962422,"doctoreto_acc":0.845,"sarzamindownload_acc":0.7843137255,"hamgardi_acc":0.7669616519,"bigbangpage_acc":0.8598726115,"wiki_ahlolbait_acc":0.8947368421,"virgool_3_acc":0.8298507463,"virgool_2_acc":0.8532110092,"virgool_1_acc":0.8164556962,"hamshahrionline_acc":0.868852459,"tabnak_acc":0.8174273859,"alibaba_acc":0.8464052288,"digikala_mag_acc":0.8236472946,"yjc_acc":0.7931034483,"beytoote_acc":0.8282548476,"asriran_acc":0.8019323671,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8651685393,"zoomit_acc":0.8568075117,"wikipedia_acc":0.9,"namnak_acc":0.8310626703,"khodro45_acc":0.8161764706,"fidibo_acc":0.8810572687,"newmiind_acc":0.7569444444,"taaghche_acc":0.9166666667,"motamem_acc":0.9052631579,"varzesh3_acc":0.8327759197,"mehrnews_acc":0.7822580645,"tasnim_acc":0.8038461538,"magerta_acc":0.768907563,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9696969697,"voolak_acc":0.7906976744,"farsroid_acc":0.7368421053,"parsiday_acc":0.7666666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 11 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8143,"acc_strict":0.8143,"donyaeeqtesad_acc":0.7975830816,"isna_acc":0.77734375,"ninisite_article_acc":0.8,"virgool_4_acc":0.833836858,"khabaronline_acc":0.796,"digiato_acc":0.8037578288,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7669616519,"bigbangpage_acc":0.8598726115,"wiki_ahlolbait_acc":0.8947368421,"virgool_3_acc":0.8298507463,"virgool_2_acc":0.8409785933,"virgool_1_acc":0.8196202532,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8215767635,"alibaba_acc":0.8202614379,"digikala_mag_acc":0.8176352705,"yjc_acc":0.8045977011,"beytoote_acc":0.811634349,"asriran_acc":0.7874396135,"ecoiran_acc":0.7682539683,"hawzah_acc":0.8511235955,"zoomit_acc":0.8474178404,"wikipedia_acc":0.8952380952,"namnak_acc":0.7847411444,"khodro45_acc":0.8382352941,"fidibo_acc":0.845814978,"newmiind_acc":0.7708333333,"taaghche_acc":0.8525641026,"motamem_acc":0.9157894737,"varzesh3_acc":0.8394648829,"mehrnews_acc":0.7459677419,"tasnim_acc":0.8230769231,"magerta_acc":0.7478991597,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.7906976744,"farsroid_acc":0.7105263158,"parsiday_acc":0.7416666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.6}
|
| 12 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.798859772,"acc_strict":0.7983596719,"donyaeeqtesad_acc":0.7673716012,"isna_acc":0.76953125,"ninisite_article_acc":0.7696476965,"virgool_4_acc":0.8398791541,"khabaronline_acc":0.78,"digiato_acc":0.7870563674,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7610619469,"bigbangpage_acc":0.8789808917,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.8119402985,"virgool_2_acc":0.8226299694,"virgool_1_acc":0.8037974684,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8298755187,"alibaba_acc":0.8562091503,"digikala_mag_acc":0.8152610442,"yjc_acc":0.7471264368,"beytoote_acc":0.8005540166,"asriran_acc":0.7922705314,"ecoiran_acc":0.7333333333,"hawzah_acc":0.8342696629,"zoomit_acc":0.8427230047,"wikipedia_acc":0.9095238095,"namnak_acc":0.7738419619,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7222222222,"taaghche_acc":0.8397435897,"motamem_acc":0.8947368421,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.7338709677,"tasnim_acc":0.7730769231,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.6578947368,"parsiday_acc":0.7166666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 13 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.7956,"acc_strict":0.1123,"donyaeeqtesad_acc":0.7764350453,"isna_acc":0.76171875,"ninisite_article_acc":0.772972973,"virgool_4_acc":0.8549848943,"khabaronline_acc":0.78,"digiato_acc":0.7954070981,"doctoreto_acc":0.755,"sarzamindownload_acc":0.7385620915,"hamgardi_acc":0.7492625369,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8287461774,"virgool_1_acc":0.8259493671,"hamshahrionline_acc":0.862295082,"tabnak_acc":0.8257261411,"alibaba_acc":0.8366013072,"digikala_mag_acc":0.8076152305,"yjc_acc":0.7356321839,"beytoote_acc":0.7922437673,"asriran_acc":0.7874396135,"ecoiran_acc":0.7142857143,"hawzah_acc":0.845505618,"zoomit_acc":0.8403755869,"wikipedia_acc":0.9047619048,"namnak_acc":0.7874659401,"khodro45_acc":0.7941176471,"fidibo_acc":0.8414096916,"newmiind_acc":0.7465277778,"taaghche_acc":0.8076923077,"motamem_acc":0.8947368421,"varzesh3_acc":0.7959866221,"mehrnews_acc":0.7419354839,"tasnim_acc":0.7346153846,"magerta_acc":0.6848739496,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8787878788,"voolak_acc":0.7906976744,"farsroid_acc":0.6578947368,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 14 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7712,"acc_strict":0.7712,"donyaeeqtesad_acc":0.7703927492,"isna_acc":0.75390625,"ninisite_article_acc":0.7648648649,"virgool_4_acc":0.8247734139,"khabaronline_acc":0.76,"digiato_acc":0.7745302714,"doctoreto_acc":0.785,"sarzamindownload_acc":0.7581699346,"hamgardi_acc":0.6784660767,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8223684211,"virgool_3_acc":0.7910447761,"virgool_2_acc":0.7920489297,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8295081967,"tabnak_acc":0.7634854772,"alibaba_acc":0.7973856209,"digikala_mag_acc":0.8056112224,"yjc_acc":0.724137931,"beytoote_acc":0.7783933518,"asriran_acc":0.7777777778,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7724719101,"zoomit_acc":0.8098591549,"wikipedia_acc":0.8761904762,"namnak_acc":0.7547683924,"khodro45_acc":0.7941176471,"fidibo_acc":0.7841409692,"newmiind_acc":0.6875,"taaghche_acc":0.8269230769,"motamem_acc":0.8631578947,"varzesh3_acc":0.7926421405,"mehrnews_acc":0.7056451613,"tasnim_acc":0.7076923077,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8181818182,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.7083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 15 |
-
{"Model Name":"Qwen3-32B","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.7654,"acc_strict":0.7653,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.75,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.724,"digiato_acc":0.8037578288,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7450980392,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8,"tabnak_acc":0.7634854772,"alibaba_acc":0.8039215686,"digikala_mag_acc":0.7875751503,"yjc_acc":0.6896551724,"beytoote_acc":0.7783933518,"asriran_acc":0.7632850242,"ecoiran_acc":0.6793650794,"hawzah_acc":0.7724719101,"zoomit_acc":0.8215962441,"wikipedia_acc":0.8523809524,"namnak_acc":0.7520435967,"khodro45_acc":0.8088235294,"fidibo_acc":0.7665198238,"newmiind_acc":0.6909722222,"taaghche_acc":0.7564102564,"motamem_acc":0.8736842105,"varzesh3_acc":0.762541806,"mehrnews_acc":0.689516129,"tasnim_acc":0.7192307692,"magerta_acc":0.7268907563,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.6744186047,"farsroid_acc":0.6578947368,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.7}
|
| 16 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.7628,"acc_strict":0.7628,"donyaeeqtesad_acc":0.6978851964,"isna_acc":0.7265625,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.8187311178,"khabaronline_acc":0.74,"digiato_acc":0.7661795407,"doctoreto_acc":0.78,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7109144543,"bigbangpage_acc":0.821656051,"wiki_ahlolbait_acc":0.8026315789,"virgool_3_acc":0.7940298507,"virgool_2_acc":0.755351682,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8229508197,"tabnak_acc":0.8174273859,"alibaba_acc":0.7843137255,"digikala_mag_acc":0.7975951904,"yjc_acc":0.7126436782,"beytoote_acc":0.7534626039,"asriran_acc":0.7391304348,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7752808989,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8428571429,"namnak_acc":0.7493188011,"khodro45_acc":0.7867647059,"fidibo_acc":0.8237885463,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8315789474,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.6975806452,"tasnim_acc":0.7307692308,"magerta_acc":0.6722689076,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9393939394,"voolak_acc":0.6976744186,"farsroid_acc":0.6315789474,"parsiday_acc":0.7,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 17 |
-
{"Model Name":"gpt-4o-mini","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7598,"acc_strict":0.7598,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.71484375,"ninisite_article_acc":0.7351351351,"virgool_4_acc":0.8006042296,"khabaronline_acc":0.736,"digiato_acc":0.7599164927,"doctoreto_acc":0.775,"sarzamindownload_acc":0.6535947712,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.7961783439,"wiki_ahlolbait_acc":0.8289473684,"virgool_3_acc":0.7492537313,"virgool_2_acc":0.7828746177,"virgool_1_acc":0.8006329114,"hamshahrionline_acc":0.8131147541,"tabnak_acc":0.7427385892,"alibaba_acc":0.7810457516,"digikala_mag_acc":0.7615230461,"yjc_acc":0.7643678161,"beytoote_acc":0.7783933518,"asriran_acc":0.7536231884,"ecoiran_acc":0.6952380952,"hawzah_acc":0.7668539326,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8761904762,"namnak_acc":0.7765667575,"khodro45_acc":0.7573529412,"fidibo_acc":0.7621145374,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8736842105,"varzesh3_acc":0.8060200669,"mehrnews_acc":0.6733870968,"tasnim_acc":0.75,"magerta_acc":0.6764705882,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.7083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 18 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.7364,"acc_strict":0.7364,"donyaeeqtesad_acc":0.752265861,"isna_acc":0.67578125,"ninisite_article_acc":0.7054054054,"virgool_4_acc":0.746223565,"khabaronline_acc":0.724,"digiato_acc":0.7223382046,"doctoreto_acc":0.7,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7020648968,"bigbangpage_acc":0.8089171975,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7339449541,"virgool_1_acc":0.7246835443,"hamshahrionline_acc":0.8262295082,"tabnak_acc":0.7178423237,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7183908046,"beytoote_acc":0.7479224377,"asriran_acc":0.768115942,"ecoiran_acc":0.6698412698,"hawzah_acc":0.7415730337,"zoomit_acc":0.79342723,"wikipedia_acc":0.819047619,"namnak_acc":0.7220708447,"khodro45_acc":0.75,"fidibo_acc":0.7665198238,"newmiind_acc":0.6631944444,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7525083612,"mehrnews_acc":0.6653225806,"tasnim_acc":0.75,"magerta_acc":0.6134453782,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5,"parsiday_acc":0.7083333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 19 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7160432086,"acc_strict":0.7160432086,"donyaeeqtesad_acc":0.6888217523,"isna_acc":0.69140625,"ninisite_article_acc":0.7,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.712,"digiato_acc":0.7181628392,"doctoreto_acc":0.745,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.6755162242,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8684210526,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7064220183,"virgool_1_acc":0.7056962025,"hamshahrionline_acc":0.7803278689,"tabnak_acc":0.6887966805,"alibaba_acc":0.7483660131,"digikala_mag_acc":0.7434869739,"yjc_acc":0.6724137931,"beytoote_acc":0.728531856,"asriran_acc":0.7487922705,"ecoiran_acc":0.6761904762,"hawzah_acc":0.7584269663,"zoomit_acc":0.7558685446,"wikipedia_acc":0.780952381,"namnak_acc":0.7002724796,"khodro45_acc":0.7279411765,"fidibo_acc":0.7665198238,"newmiind_acc":0.6202090592,"taaghche_acc":0.7628205128,"motamem_acc":0.8404255319,"varzesh3_acc":0.7324414716,"mehrnews_acc":0.6169354839,"tasnim_acc":0.6923076923,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.7878787879,"voolak_acc":0.6511627907,"farsroid_acc":0.7105263158,"parsiday_acc":0.575,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
| 20 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.7094,"acc_strict":0.7094,"donyaeeqtesad_acc":0.6586102719,"isna_acc":0.65625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7643504532,"khabaronline_acc":0.704,"digiato_acc":0.7369519833,"doctoreto_acc":0.76,"sarzamindownload_acc":0.6797385621,"hamgardi_acc":0.6666666667,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7223880597,"virgool_2_acc":0.7584097859,"virgool_1_acc":0.7183544304,"hamshahrionline_acc":0.7213114754,"tabnak_acc":0.7219917012,"alibaba_acc":0.6830065359,"digikala_mag_acc":0.7354709419,"yjc_acc":0.6206896552,"beytoote_acc":0.7146814404,"asriran_acc":0.7198067633,"ecoiran_acc":0.6603174603,"hawzah_acc":0.702247191,"zoomit_acc":0.7323943662,"wikipedia_acc":0.7714285714,"namnak_acc":0.7329700272,"khodro45_acc":0.7352941176,"fidibo_acc":0.718061674,"newmiind_acc":0.6493055556,"taaghche_acc":0.7564102564,"motamem_acc":0.8210526316,"varzesh3_acc":0.7157190635,"mehrnews_acc":0.6088709677,"tasnim_acc":0.6576923077,"magerta_acc":0.6302521008,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7368421053,"parsiday_acc":0.6583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.8}
|
| 21 |
-
{"Model Name":"Qwen3-14B","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.6958,"acc_strict":0.6958,"donyaeeqtesad_acc":0.6495468278,"isna_acc":0.62890625,"ninisite_article_acc":0.6972972973,"virgool_4_acc":0.7069486405,"khabaronline_acc":0.652,"digiato_acc":0.7202505219,"doctoreto_acc":0.77,"sarzamindownload_acc":0.614379085,"hamgardi_acc":0.6430678466,"bigbangpage_acc":0.7579617834,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7373134328,"virgool_2_acc":0.7155963303,"virgool_1_acc":0.7278481013,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6970954357,"alibaba_acc":0.7254901961,"digikala_mag_acc":0.7074148297,"yjc_acc":0.6379310345,"beytoote_acc":0.6842105263,"asriran_acc":0.6859903382,"ecoiran_acc":0.653968254,"hawzah_acc":0.7078651685,"zoomit_acc":0.7676056338,"wikipedia_acc":0.8142857143,"namnak_acc":0.6621253406,"khodro45_acc":0.7647058824,"fidibo_acc":0.731277533,"newmiind_acc":0.6597222222,"taaghche_acc":0.6987179487,"motamem_acc":0.8105263158,"varzesh3_acc":0.6220735786,"mehrnews_acc":0.625,"tasnim_acc":0.6692307692,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6279069767,"farsroid_acc":0.6052631579,"parsiday_acc":0.5666666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 22 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.6894,"acc_strict":0.6894,"donyaeeqtesad_acc":0.670694864,"isna_acc":0.63671875,"ninisite_article_acc":0.6945945946,"virgool_4_acc":0.7039274924,"khabaronline_acc":0.664,"digiato_acc":0.6826722338,"doctoreto_acc":0.755,"sarzamindownload_acc":0.6339869281,"hamgardi_acc":0.6342182891,"bigbangpage_acc":0.7452229299,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.7014925373,"virgool_2_acc":0.7125382263,"virgool_1_acc":0.7341772152,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6307053942,"alibaba_acc":0.7647058824,"digikala_mag_acc":0.7174348697,"yjc_acc":0.5804597701,"beytoote_acc":0.6814404432,"asriran_acc":0.6811594203,"ecoiran_acc":0.6158730159,"hawzah_acc":0.6994382022,"zoomit_acc":0.7441314554,"wikipedia_acc":0.8333333333,"namnak_acc":0.659400545,"khodro45_acc":0.7058823529,"fidibo_acc":0.7268722467,"newmiind_acc":0.6527777778,"taaghche_acc":0.7051282051,"motamem_acc":0.8526315789,"varzesh3_acc":0.6789297659,"mehrnews_acc":0.5887096774,"tasnim_acc":0.6692307692,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.6923076923,"wikishia_acc":0.7575757576,"voolak_acc":0.6511627907,"farsroid_acc":0.6842105263,"parsiday_acc":0.55,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 23 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.688,"acc_strict":0.688,"donyaeeqtesad_acc":0.6465256798,"isna_acc":0.6640625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.668,"digiato_acc":0.6764091858,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7058823529,"hamgardi_acc":0.6519174041,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7368421053,"virgool_3_acc":0.7134328358,"virgool_2_acc":0.7003058104,"virgool_1_acc":0.7025316456,"hamshahrionline_acc":0.6819672131,"tabnak_acc":0.7012448133,"alibaba_acc":0.7189542484,"digikala_mag_acc":0.6753507014,"yjc_acc":0.632183908,"beytoote_acc":0.6703601108,"asriran_acc":0.652173913,"ecoiran_acc":0.6126984127,"hawzah_acc":0.7387640449,"zoomit_acc":0.7300469484,"wikipedia_acc":0.7904761905,"namnak_acc":0.6920980926,"khodro45_acc":0.7279411765,"fidibo_acc":0.6872246696,"newmiind_acc":0.6631944444,"taaghche_acc":0.6858974359,"motamem_acc":0.8,"varzesh3_acc":0.6120401338,"mehrnews_acc":0.6129032258,"tasnim_acc":0.65,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.4782608696,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.6578947368,"parsiday_acc":0.6,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 24 |
-
{"Model Name":"aya-expanse-32b","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.6327,"acc_strict":0.0585,"donyaeeqtesad_acc":0.6223564955,"isna_acc":0.5703125,"ninisite_article_acc":0.6621621622,"virgool_4_acc":0.6435045317,"khabaronline_acc":0.632,"digiato_acc":0.6346555324,"doctoreto_acc":0.65,"sarzamindownload_acc":0.5620915033,"hamgardi_acc":0.6194690265,"bigbangpage_acc":0.7070063694,"wiki_ahlolbait_acc":0.6776315789,"virgool_3_acc":0.6208955224,"virgool_2_acc":0.626911315,"virgool_1_acc":0.6518987342,"hamshahrionline_acc":0.6557377049,"tabnak_acc":0.6639004149,"alibaba_acc":0.6666666667,"digikala_mag_acc":0.627254509,"yjc_acc":0.6206896552,"beytoote_acc":0.6675900277,"asriran_acc":0.6231884058,"ecoiran_acc":0.5904761905,"hawzah_acc":0.6797752809,"zoomit_acc":0.5915492958,"wikipedia_acc":0.7333333333,"namnak_acc":0.6403269755,"khodro45_acc":0.6102941176,"fidibo_acc":0.704845815,"newmiind_acc":0.5416666667,"taaghche_acc":0.6217948718,"motamem_acc":0.7684210526,"varzesh3_acc":0.6254180602,"mehrnews_acc":0.5927419355,"tasnim_acc":0.6230769231,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.6153846154,"wikishia_acc":0.7575757576,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.5083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 25 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.6,"acc_strict":0.6,"donyaeeqtesad_acc":0.6042296073,"isna_acc":0.56640625,"ninisite_article_acc":0.572972973,"virgool_4_acc":0.5951661631,"khabaronline_acc":0.6,"digiato_acc":0.5908141962,"doctoreto_acc":0.605,"sarzamindownload_acc":0.5882352941,"hamgardi_acc":0.5722713864,"bigbangpage_acc":0.6369426752,"wiki_ahlolbait_acc":0.6578947368,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.623853211,"virgool_1_acc":0.6139240506,"hamshahrionline_acc":0.6295081967,"tabnak_acc":0.6514522822,"alibaba_acc":0.6307189542,"digikala_mag_acc":0.6152304609,"yjc_acc":0.5747126437,"beytoote_acc":0.5900277008,"asriran_acc":0.5314009662,"ecoiran_acc":0.5619047619,"hawzah_acc":0.6292134831,"zoomit_acc":0.5915492958,"wikipedia_acc":0.6571428571,"namnak_acc":0.6267029973,"khodro45_acc":0.6397058824,"fidibo_acc":0.6872246696,"newmiind_acc":0.4895833333,"taaghche_acc":0.6217948718,"motamem_acc":0.6736842105,"varzesh3_acc":0.635451505,"mehrnews_acc":0.5725806452,"tasnim_acc":0.6115384615,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.6060606061,"voolak_acc":0.6046511628,"farsroid_acc":0.4736842105,"parsiday_acc":0.5,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 26 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","acc":0.5915,"acc_strict":0.5915,"donyaeeqtesad_acc":0.5528700906,"isna_acc":0.578125,"ninisite_article_acc":0.5945945946,"virgool_4_acc":0.5981873112,"khabaronline_acc":0.588,"digiato_acc":0.6283924843,"doctoreto_acc":0.615,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.5516224189,"bigbangpage_acc":0.6242038217,"wiki_ahlolbait_acc":0.625,"virgool_3_acc":0.6119402985,"virgool_2_acc":0.6085626911,"virgool_1_acc":0.6297468354,"hamshahrionline_acc":0.606557377,"tabnak_acc":0.6141078838,"alibaba_acc":0.6045751634,"digikala_mag_acc":0.6132264529,"yjc_acc":0.6264367816,"beytoote_acc":0.5734072022,"asriran_acc":0.61352657,"ecoiran_acc":0.5428571429,"hawzah_acc":0.5758426966,"zoomit_acc":0.5821596244,"wikipedia_acc":0.6476190476,"namnak_acc":0.6049046322,"khodro45_acc":0.6397058824,"fidibo_acc":0.5594713656,"newmiind_acc":0.5451388889,"taaghche_acc":0.5897435897,"motamem_acc":0.6842105263,"varzesh3_acc":0.602006689,"mehrnews_acc":0.4879032258,"tasnim_acc":0.6153846154,"magerta_acc":0.5420168067,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.6153846154,"wikishia_acc":0.6666666667,"voolak_acc":0.5813953488,"farsroid_acc":0.6052631579,"parsiday_acc":0.425,"soft98_acc":0.6,"ninisite_discussion_acc":0.4}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5494,"acc_strict":0.5494,"donyaeeqtesad_acc":0.5347432024,"isna_acc":0.4921875,"ninisite_article_acc":0.5864864865,"virgool_4_acc":0.5921450151,"khabaronline_acc":0.556,"digiato_acc":0.5469728601,"doctoreto_acc":0.6,"sarzamindownload_acc":0.522875817,"hamgardi_acc":0.5044247788,"bigbangpage_acc":0.6305732484,"wiki_ahlolbait_acc":0.6644736842,"virgool_3_acc":0.5582089552,"virgool_2_acc":0.5107033639,"virgool_1_acc":0.5949367089,"hamshahrionline_acc":0.5639344262,"tabnak_acc":0.510373444,"alibaba_acc":0.6078431373,"digikala_mag_acc":0.5611222445,"yjc_acc":0.591954023,"beytoote_acc":0.5567867036,"asriran_acc":0.5265700483,"ecoiran_acc":0.4920634921,"hawzah_acc":0.547752809,"zoomit_acc":0.5821596244,"wikipedia_acc":0.6380952381,"namnak_acc":0.5449591281,"khodro45_acc":0.6102941176,"fidibo_acc":0.5726872247,"newmiind_acc":0.5,"taaghche_acc":0.5,"motamem_acc":0.6210526316,"varzesh3_acc":0.4816053512,"mehrnews_acc":0.4838709677,"tasnim_acc":0.5692307692,"magerta_acc":0.5042016807,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5757575758,"voolak_acc":0.488372093,"farsroid_acc":0.4736842105,"parsiday_acc":0.4,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.5437,"acc_strict":0.5437,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.515625,"ninisite_article_acc":0.527027027,"virgool_4_acc":0.5649546828,"khabaronline_acc":0.508,"digiato_acc":0.5469728601,"doctoreto_acc":0.605,"sarzamindownload_acc":0.4836601307,"hamgardi_acc":0.5014749263,"bigbangpage_acc":0.6560509554,"wiki_ahlolbait_acc":0.5723684211,"virgool_3_acc":0.6119402985,"virgool_2_acc":0.5626911315,"virgool_1_acc":0.5696202532,"hamshahrionline_acc":0.5540983607,"tabnak_acc":0.5643153527,"alibaba_acc":0.6045751634,"digikala_mag_acc":0.5711422846,"yjc_acc":0.5172413793,"beytoote_acc":0.5152354571,"asriran_acc":0.5555555556,"ecoiran_acc":0.4761904762,"hawzah_acc":0.5926966292,"zoomit_acc":0.5938967136,"wikipedia_acc":0.6761904762,"namnak_acc":0.4741144414,"khodro45_acc":0.5441176471,"fidibo_acc":0.5682819383,"newmiind_acc":0.5104166667,"taaghche_acc":0.5320512821,"motamem_acc":0.6526315789,"varzesh3_acc":0.4648829431,"mehrnews_acc":0.4475806452,"tasnim_acc":0.5153846154,"magerta_acc":0.5630252101,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.5757575758,"voolak_acc":0.511627907,"farsroid_acc":0.5789473684,"parsiday_acc":0.3916666667,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
| 29 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.5272636318,"acc_strict":0.5271635818,"donyaeeqtesad_acc":0.4652567976,"isna_acc":0.5215686275,"ninisite_article_acc":0.5567567568,"virgool_4_acc":0.5649546828,"khabaronline_acc":0.5,"digiato_acc":0.5260960334,"doctoreto_acc":0.585,"sarzamindownload_acc":0.568627451,"hamgardi_acc":0.4955752212,"bigbangpage_acc":0.6242038217,"wiki_ahlolbait_acc":0.5894039735,"virgool_3_acc":0.5628742515,"virgool_2_acc":0.5351681957,"virgool_1_acc":0.5696202532,"hamshahrionline_acc":0.5245901639,"tabnak_acc":0.5020746888,"alibaba_acc":0.5424836601,"digikala_mag_acc":0.5130260521,"yjc_acc":0.5402298851,"beytoote_acc":0.5318559557,"asriran_acc":0.5388349515,"ecoiran_acc":0.4666666667,"hawzah_acc":0.5561797753,"zoomit_acc":0.5680751174,"wikipedia_acc":0.6,"namnak_acc":0.5476839237,"khodro45_acc":0.4485294118,"fidibo_acc":0.5374449339,"newmiind_acc":0.4756944444,"taaghche_acc":0.4871794872,"motamem_acc":0.6105263158,"varzesh3_acc":0.4581939799,"mehrnews_acc":0.4331983806,"tasnim_acc":0.5115384615,"magerta_acc":0.5168067227,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.3846153846,"wikishia_acc":0.6666666667,"voolak_acc":0.511627907,"farsroid_acc":0.5263157895,"parsiday_acc":0.45,"soft98_acc":0.7,"ninisite_discussion_acc":0.3}
|
| 30 |
-
{"Model Name":"Qwen3-4B","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.5033,"acc_strict":0.5033,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.48046875,"ninisite_article_acc":0.4810810811,"virgool_4_acc":0.5256797583,"khabaronline_acc":0.504,"digiato_acc":0.5073068894,"doctoreto_acc":0.615,"sarzamindownload_acc":0.4901960784,"hamgardi_acc":0.4601769912,"bigbangpage_acc":0.5414012739,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.5565749235,"virgool_1_acc":0.5094936709,"hamshahrionline_acc":0.4655737705,"tabnak_acc":0.5145228216,"alibaba_acc":0.5098039216,"digikala_mag_acc":0.5230460922,"yjc_acc":0.5114942529,"beytoote_acc":0.4764542936,"asriran_acc":0.4782608696,"ecoiran_acc":0.4253968254,"hawzah_acc":0.5028089888,"zoomit_acc":0.5328638498,"wikipedia_acc":0.6047619048,"namnak_acc":0.4795640327,"khodro45_acc":0.6102941176,"fidibo_acc":0.550660793,"newmiind_acc":0.4895833333,"taaghche_acc":0.5064102564,"motamem_acc":0.5894736842,"varzesh3_acc":0.3913043478,"mehrnews_acc":0.439516129,"tasnim_acc":0.4807692308,"magerta_acc":0.5546218487,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5454545455,"voolak_acc":0.488372093,"farsroid_acc":0.5263157895,"parsiday_acc":0.3083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
| 31 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.4832,"acc_strict":0.4832,"donyaeeqtesad_acc":0.498489426,"isna_acc":0.44140625,"ninisite_article_acc":0.4486486486,"virgool_4_acc":0.5075528701,"khabaronline_acc":0.504,"digiato_acc":0.4822546973,"doctoreto_acc":0.5,"sarzamindownload_acc":0.4117647059,"hamgardi_acc":0.5250737463,"bigbangpage_acc":0.5031847134,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.4985074627,"virgool_2_acc":0.4495412844,"virgool_1_acc":0.5063291139,"hamshahrionline_acc":0.5344262295,"tabnak_acc":0.4605809129,"alibaba_acc":0.5032679739,"digikala_mag_acc":0.4729458918,"yjc_acc":0.4482758621,"beytoote_acc":0.4903047091,"asriran_acc":0.4734299517,"ecoiran_acc":0.419047619,"hawzah_acc":0.4831460674,"zoomit_acc":0.5305164319,"wikipedia_acc":0.5666666667,"namnak_acc":0.4931880109,"khodro45_acc":0.5294117647,"fidibo_acc":0.4801762115,"newmiind_acc":0.4479166667,"taaghche_acc":0.4230769231,"motamem_acc":0.6421052632,"varzesh3_acc":0.4515050167,"mehrnews_acc":0.4072580645,"tasnim_acc":0.5,"magerta_acc":0.4453781513,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.6363636364,"voolak_acc":0.4651162791,"farsroid_acc":0.4736842105,"parsiday_acc":0.35,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 32 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.3015,"acc_strict":0.3011,"donyaeeqtesad_acc":0.2990936556,"isna_acc":0.29296875,"ninisite_article_acc":0.2864864865,"virgool_4_acc":0.2839879154,"khabaronline_acc":0.276,"digiato_acc":0.2922755741,"doctoreto_acc":0.345,"sarzamindownload_acc":0.3267973856,"hamgardi_acc":0.3215339233,"bigbangpage_acc":0.2547770701,"wiki_ahlolbait_acc":0.2894736842,"virgool_3_acc":0.3313432836,"virgool_2_acc":0.3058103976,"virgool_1_acc":0.3512658228,"hamshahrionline_acc":0.262295082,"tabnak_acc":0.3278008299,"alibaba_acc":0.3235294118,"digikala_mag_acc":0.3206412826,"yjc_acc":0.2816091954,"beytoote_acc":0.2991689751,"asriran_acc":0.3188405797,"ecoiran_acc":0.2698412698,"hawzah_acc":0.3174157303,"zoomit_acc":0.3028169014,"wikipedia_acc":0.3380952381,"namnak_acc":0.2888283379,"khodro45_acc":0.3308823529,"fidibo_acc":0.3259911894,"newmiind_acc":0.2916666667,"taaghche_acc":0.2371794872,"motamem_acc":0.4,"varzesh3_acc":0.2240802676,"mehrnews_acc":0.2459677419,"tasnim_acc":0.3346153846,"magerta_acc":0.3361344538,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.4615384615,"wikishia_acc":0.2727272727,"voolak_acc":0.4418604651,"farsroid_acc":0.1578947368,"parsiday_acc":0.2083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
| 33 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2599,"acc_strict":0.2599,"donyaeeqtesad_acc":0.2719033233,"isna_acc":0.25,"ninisite_article_acc":0.2567567568,"virgool_4_acc":0.3202416918,"khabaronline_acc":0.276,"digiato_acc":0.2630480167,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2418300654,"hamgardi_acc":0.2979351032,"bigbangpage_acc":0.2484076433,"wiki_ahlolbait_acc":0.2631578947,"virgool_3_acc":0.2507462687,"virgool_2_acc":0.247706422,"virgool_1_acc":0.2594936709,"hamshahrionline_acc":0.2852459016,"tabnak_acc":0.2489626556,"alibaba_acc":0.2712418301,"digikala_mag_acc":0.2705410822,"yjc_acc":0.275862069,"beytoote_acc":0.2603878116,"asriran_acc":0.2608695652,"ecoiran_acc":0.2634920635,"hawzah_acc":0.2724719101,"zoomit_acc":0.2511737089,"wikipedia_acc":0.2857142857,"namnak_acc":0.2098092643,"khodro45_acc":0.2720588235,"fidibo_acc":0.2466960352,"newmiind_acc":0.2222222222,"taaghche_acc":0.25,"motamem_acc":0.2947368421,"varzesh3_acc":0.2441471572,"mehrnews_acc":0.25,"tasnim_acc":0.2692307692,"magerta_acc":0.2352941176,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.3846153846,"wikishia_acc":0.1515151515,"voolak_acc":0.2558139535,"farsroid_acc":0.2631578947,"parsiday_acc":0.1916666667,"soft98_acc":0.1,"ninisite_discussion_acc":0.4}
|
| 34 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.2521,"acc_strict":0.2517,"donyaeeqtesad_acc":0.2779456193,"isna_acc":0.29296875,"ninisite_article_acc":0.2594594595,"virgool_4_acc":0.2235649547,"khabaronline_acc":0.2,"digiato_acc":0.24217119,"doctoreto_acc":0.24,"sarzamindownload_acc":0.2352941176,"hamgardi_acc":0.2684365782,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2368421053,"virgool_3_acc":0.2298507463,"virgool_2_acc":0.2599388379,"virgool_1_acc":0.2689873418,"hamshahrionline_acc":0.2327868852,"tabnak_acc":0.2697095436,"alibaba_acc":0.2124183007,"digikala_mag_acc":0.246492986,"yjc_acc":0.2586206897,"beytoote_acc":0.2631578947,"asriran_acc":0.2898550725,"ecoiran_acc":0.2603174603,"hawzah_acc":0.2556179775,"zoomit_acc":0.2887323944,"wikipedia_acc":0.2238095238,"namnak_acc":0.2561307902,"khodro45_acc":0.25,"fidibo_acc":0.2202643172,"newmiind_acc":0.25,"taaghche_acc":0.2692307692,"motamem_acc":0.2842105263,"varzesh3_acc":0.2107023411,"mehrnews_acc":0.2338709677,"tasnim_acc":0.2307692308,"magerta_acc":0.3235294118,"radiokodak_book_acc":0.1739130435,"vipofilm_acc":0.4615384615,"wikishia_acc":0.3333333333,"voolak_acc":0.2790697674,"farsroid_acc":0.2368421053,"parsiday_acc":0.1833333333,"soft98_acc":0.3,"ninisite_discussion_acc":0.5}
|
| 35 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.2412,"acc_strict":0.0079,"donyaeeqtesad_acc":0.253776435,"isna_acc":0.25390625,"ninisite_article_acc":0.2486486486,"virgool_4_acc":0.2809667674,"khabaronline_acc":0.248,"digiato_acc":0.2192066806,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2483660131,"hamgardi_acc":0.2507374631,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2434210526,"virgool_3_acc":0.2208955224,"virgool_2_acc":0.2140672783,"virgool_1_acc":0.2373417722,"hamshahrionline_acc":0.2983606557,"tabnak_acc":0.2282157676,"alibaba_acc":0.2581699346,"digikala_mag_acc":0.2224448898,"yjc_acc":0.2701149425,"beytoote_acc":0.2520775623,"asriran_acc":0.1884057971,"ecoiran_acc":0.2349206349,"hawzah_acc":0.2696629213,"zoomit_acc":0.2558685446,"wikipedia_acc":0.1761904762,"namnak_acc":0.2343324251,"khodro45_acc":0.2279411765,"fidibo_acc":0.2907488987,"newmiind_acc":0.28125,"taaghche_acc":0.1987179487,"motamem_acc":0.2736842105,"varzesh3_acc":0.2307692308,"mehrnews_acc":0.2096774194,"tasnim_acc":0.2269230769,"magerta_acc":0.1848739496,"radiokodak_book_acc":0.2173913043,"vipofilm_acc":0.1538461538,"wikishia_acc":0.1515151515,"voolak_acc":0.2790697674,"farsroid_acc":0.2105263158,"parsiday_acc":0.225,"soft98_acc":0.2,"ninisite_discussion_acc":0.4}
|
|
|
|
| 1 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9427585507,"acc_strict":0.9427585507,"donyaeeqtesad_acc":0.9449541284,"isna_acc":0.9488188976,"ninisite_article_acc":0.9371584699,"virgool_4_acc":0.9601226994,"khabaronline_acc":0.9390243902,"digiato_acc":0.9493670886,"doctoreto_acc":0.9242424242,"sarzamindownload_acc":0.8926174497,"hamgardi_acc":0.9139465875,"bigbangpage_acc":0.9803921569,"wiki_ahlolbait_acc":0.96,"virgool_3_acc":0.9606060606,"virgool_2_acc":0.9566563467,"virgool_1_acc":0.9516129032,"hamshahrionline_acc":0.9566666667,"tabnak_acc":0.9535864979,"alibaba_acc":0.9370860927,"digikala_mag_acc":0.9693877551,"yjc_acc":0.9298245614,"beytoote_acc":0.9555555556,"asriran_acc":0.9215686275,"ecoiran_acc":0.9163987138,"hawzah_acc":0.9745762712,"zoomit_acc":0.9593301435,"wikipedia_acc":0.9854368932,"namnak_acc":0.9168975069,"khodro45_acc":0.9248120301,"fidibo_acc":0.9513274336,"newmiind_acc":0.8776223776,"taaghche_acc":0.9675324675,"motamem_acc":0.9680851064,"varzesh3_acc":0.9652777778,"mehrnews_acc":0.9306122449,"tasnim_acc":0.9365079365,"magerta_acc":0.8675213675,"radiokodak_book_acc":0.8181818182,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.9523809524,"farsroid_acc":0.8333333333,"parsiday_acc":0.9322033898,"soft98_acc":0.9,"ninisite_discussion_acc":0.9}
|
| 2 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9218,"acc_strict":0.9218,"donyaeeqtesad_acc":0.9214501511,"isna_acc":0.90234375,"ninisite_article_acc":0.9027027027,"virgool_4_acc":0.9335347432,"khabaronline_acc":0.92,"digiato_acc":0.9185803758,"doctoreto_acc":0.92,"sarzamindownload_acc":0.9346405229,"hamgardi_acc":0.8879056047,"bigbangpage_acc":0.949044586,"wiki_ahlolbait_acc":0.9539473684,"virgool_3_acc":0.9373134328,"virgool_2_acc":0.9418960245,"virgool_1_acc":0.9240506329,"hamshahrionline_acc":0.9278688525,"tabnak_acc":0.9294605809,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9458917836,"yjc_acc":0.908045977,"beytoote_acc":0.9335180055,"asriran_acc":0.9130434783,"ecoiran_acc":0.8825396825,"hawzah_acc":0.9578651685,"zoomit_acc":0.9577464789,"wikipedia_acc":0.9761904762,"namnak_acc":0.9019073569,"khodro45_acc":0.9191176471,"fidibo_acc":0.9471365639,"newmiind_acc":0.8784722222,"taaghche_acc":0.9487179487,"motamem_acc":0.9894736842,"varzesh3_acc":0.9565217391,"mehrnews_acc":0.8991935484,"tasnim_acc":0.8884615385,"magerta_acc":0.8403361345,"radiokodak_book_acc":0.8260869565,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9696969697,"voolak_acc":0.8372093023,"farsroid_acc":0.8157894737,"parsiday_acc":0.8916666667,"soft98_acc":1.0,"ninisite_discussion_acc":0.6}
|
| 3 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.9117647059,"acc_strict":0.9117647059,"donyaeeqtesad_acc":0.918429003,"isna_acc":0.90625,"ninisite_article_acc":0.9027027027,"virgool_4_acc":0.9244712991,"khabaronline_acc":0.936,"digiato_acc":0.9184100418,"doctoreto_acc":0.89,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.8702064897,"bigbangpage_acc":0.9363057325,"wiki_ahlolbait_acc":0.9671052632,"virgool_3_acc":0.9223880597,"virgool_2_acc":0.9235474006,"virgool_1_acc":0.914556962,"hamshahrionline_acc":0.9440789474,"tabnak_acc":0.9253112033,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9198396794,"yjc_acc":0.908045977,"beytoote_acc":0.9168975069,"asriran_acc":0.8888888889,"ecoiran_acc":0.8825396825,"hawzah_acc":0.9382022472,"zoomit_acc":0.9341176471,"wikipedia_acc":0.9476190476,"namnak_acc":0.8882833787,"khodro45_acc":0.9117647059,"fidibo_acc":0.9162995595,"newmiind_acc":0.8257839721,"taaghche_acc":0.9358974359,"motamem_acc":0.9684210526,"varzesh3_acc":0.9397993311,"mehrnews_acc":0.9233870968,"tasnim_acc":0.9115384615,"magerta_acc":0.8571428571,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8604651163,"farsroid_acc":0.7894736842,"parsiday_acc":0.8333333333,"soft98_acc":1.0,"ninisite_discussion_acc":0.8}
|
| 4 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8839,"acc_strict":0.8839,"donyaeeqtesad_acc":0.8610271903,"isna_acc":0.8359375,"ninisite_article_acc":0.8702702703,"virgool_4_acc":0.9033232628,"khabaronline_acc":0.884,"digiato_acc":0.8705636743,"doctoreto_acc":0.885,"sarzamindownload_acc":0.8562091503,"hamgardi_acc":0.8377581121,"bigbangpage_acc":0.923566879,"wiki_ahlolbait_acc":0.9276315789,"virgool_3_acc":0.9104477612,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8607594937,"hamshahrionline_acc":0.9114754098,"tabnak_acc":0.887966805,"alibaba_acc":0.9117647059,"digikala_mag_acc":0.9118236473,"yjc_acc":0.867816092,"beytoote_acc":0.91966759,"asriran_acc":0.8792270531,"ecoiran_acc":0.8634920635,"hawzah_acc":0.9269662921,"zoomit_acc":0.9154929577,"wikipedia_acc":0.9428571429,"namnak_acc":0.8583106267,"khodro45_acc":0.8602941176,"fidibo_acc":0.9074889868,"newmiind_acc":0.8020833333,"taaghche_acc":0.8974358974,"motamem_acc":0.9263157895,"varzesh3_acc":0.9096989967,"mehrnews_acc":0.8508064516,"tasnim_acc":0.8307692308,"magerta_acc":0.8403361345,"radiokodak_book_acc":0.8695652174,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.8837209302,"farsroid_acc":0.8421052632,"parsiday_acc":0.8833333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.9}
|
| 5 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8717,"acc_strict":0.8714,"donyaeeqtesad_acc":0.8700906344,"isna_acc":0.8671875,"ninisite_article_acc":0.8621621622,"virgool_4_acc":0.9033232628,"khabaronline_acc":0.852,"digiato_acc":0.8747390397,"doctoreto_acc":0.9,"sarzamindownload_acc":0.8562091503,"hamgardi_acc":0.8230088496,"bigbangpage_acc":0.898089172,"wiki_ahlolbait_acc":0.8881578947,"virgool_3_acc":0.9104477612,"virgool_2_acc":0.8929663609,"virgool_1_acc":0.8765822785,"hamshahrionline_acc":0.8786885246,"tabnak_acc":0.8755186722,"alibaba_acc":0.9183006536,"digikala_mag_acc":0.8977955912,"yjc_acc":0.8563218391,"beytoote_acc":0.8864265928,"asriran_acc":0.8212560386,"ecoiran_acc":0.8476190476,"hawzah_acc":0.8960674157,"zoomit_acc":0.9084507042,"wikipedia_acc":0.9238095238,"namnak_acc":0.8310626703,"khodro45_acc":0.8382352941,"fidibo_acc":0.9207048458,"newmiind_acc":0.8194444444,"taaghche_acc":0.9038461538,"motamem_acc":0.9368421053,"varzesh3_acc":0.8829431438,"mehrnews_acc":0.8064516129,"tasnim_acc":0.8692307692,"magerta_acc":0.7605042017,"radiokodak_book_acc":0.8260869565,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.7894736842,"parsiday_acc":0.775,"soft98_acc":0.9,"ninisite_discussion_acc":0.8}
|
| 6 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8665,"acc_strict":0.7832,"donyaeeqtesad_acc":0.8459214502,"isna_acc":0.8359375,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8882175227,"khabaronline_acc":0.896,"digiato_acc":0.8496868476,"doctoreto_acc":0.855,"sarzamindownload_acc":0.8235294118,"hamgardi_acc":0.8200589971,"bigbangpage_acc":0.9171974522,"wiki_ahlolbait_acc":0.9342105263,"virgool_3_acc":0.8656716418,"virgool_2_acc":0.8837920489,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.9409836066,"tabnak_acc":0.887966805,"alibaba_acc":0.8529411765,"digikala_mag_acc":0.8817635271,"yjc_acc":0.8103448276,"beytoote_acc":0.8891966759,"asriran_acc":0.8550724638,"ecoiran_acc":0.8126984127,"hawzah_acc":0.9129213483,"zoomit_acc":0.9084507042,"wikipedia_acc":0.9285714286,"namnak_acc":0.8446866485,"khodro45_acc":0.8455882353,"fidibo_acc":0.872246696,"newmiind_acc":0.8125,"taaghche_acc":0.9038461538,"motamem_acc":0.9157894737,"varzesh3_acc":0.8996655518,"mehrnews_acc":0.814516129,"tasnim_acc":0.8461538462,"magerta_acc":0.781512605,"radiokodak_book_acc":0.7826086957,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9696969697,"voolak_acc":0.7674418605,"farsroid_acc":0.7105263158,"parsiday_acc":0.85,"soft98_acc":0.8,"ninisite_discussion_acc":0.9}
|
| 7 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8637863786,"acc_strict":0.8637863786,"donyaeeqtesad_acc":0.8398791541,"isna_acc":0.828125,"ninisite_article_acc":0.8243243243,"virgool_4_acc":0.8851963746,"khabaronline_acc":0.84,"digiato_acc":0.8747390397,"doctoreto_acc":0.895,"sarzamindownload_acc":0.8366013072,"hamgardi_acc":0.802359882,"bigbangpage_acc":0.898089172,"wiki_ahlolbait_acc":0.9407894737,"virgool_3_acc":0.8955223881,"virgool_2_acc":0.8899082569,"virgool_1_acc":0.8892405063,"hamshahrionline_acc":0.8754098361,"tabnak_acc":0.8755186722,"alibaba_acc":0.8426229508,"digikala_mag_acc":0.877755511,"yjc_acc":0.816091954,"beytoote_acc":0.8836565097,"asriran_acc":0.8888888889,"ecoiran_acc":0.8126984127,"hawzah_acc":0.8904494382,"zoomit_acc":0.9084507042,"wikipedia_acc":0.919047619,"namnak_acc":0.8392370572,"khodro45_acc":0.8823529412,"fidibo_acc":0.9030837004,"newmiind_acc":0.8125,"taaghche_acc":0.9423076923,"motamem_acc":0.9157894737,"varzesh3_acc":0.8929765886,"mehrnews_acc":0.8427419355,"tasnim_acc":0.8153846154,"magerta_acc":0.7773109244,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":1.0,"wikishia_acc":0.9696969697,"voolak_acc":0.7441860465,"farsroid_acc":0.7894736842,"parsiday_acc":0.8166666667,"soft98_acc":1.0,"ninisite_discussion_acc":0.9}
|
| 8 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8551,"acc_strict":0.8551,"donyaeeqtesad_acc":0.8429003021,"isna_acc":0.828125,"ninisite_article_acc":0.8540540541,"virgool_4_acc":0.8610271903,"khabaronline_acc":0.84,"digiato_acc":0.8830897704,"doctoreto_acc":0.87,"sarzamindownload_acc":0.8758169935,"hamgardi_acc":0.796460177,"bigbangpage_acc":0.8853503185,"wiki_ahlolbait_acc":0.9013157895,"virgool_3_acc":0.871641791,"virgool_2_acc":0.9051987768,"virgool_1_acc":0.8481012658,"hamshahrionline_acc":0.8786885246,"tabnak_acc":0.8713692946,"alibaba_acc":0.8758169935,"digikala_mag_acc":0.879759519,"yjc_acc":0.7988505747,"beytoote_acc":0.8753462604,"asriran_acc":0.8260869565,"ecoiran_acc":0.8031746032,"hawzah_acc":0.8511235955,"zoomit_acc":0.8849765258,"wikipedia_acc":0.9285714286,"namnak_acc":0.8310626703,"khodro45_acc":0.8897058824,"fidibo_acc":0.872246696,"newmiind_acc":0.7881944444,"taaghche_acc":0.8974358974,"motamem_acc":0.9157894737,"varzesh3_acc":0.8762541806,"mehrnews_acc":0.8346774194,"tasnim_acc":0.8269230769,"magerta_acc":0.7941176471,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.7692307692,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.8157894737,"parsiday_acc":0.7583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.3}
|
| 9 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.825165033,"acc_strict":0.825165033,"donyaeeqtesad_acc":0.8247734139,"isna_acc":0.8046875,"ninisite_article_acc":0.8594594595,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.78,"digiato_acc":0.8267223382,"doctoreto_acc":0.835,"sarzamindownload_acc":0.7908496732,"hamgardi_acc":0.808259587,"bigbangpage_acc":0.9044585987,"wiki_ahlolbait_acc":0.9006622517,"virgool_3_acc":0.8268656716,"virgool_2_acc":0.8348623853,"virgool_1_acc":0.7911392405,"hamshahrionline_acc":0.8651315789,"tabnak_acc":0.8215767635,"alibaba_acc":0.8169934641,"digikala_mag_acc":0.8336673347,"yjc_acc":0.7873563218,"beytoote_acc":0.8310249307,"asriran_acc":0.8212560386,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8735955056,"zoomit_acc":0.8450704225,"wikipedia_acc":0.9,"namnak_acc":0.8337874659,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7604166667,"taaghche_acc":0.891025641,"motamem_acc":0.8947368421,"varzesh3_acc":0.8093645485,"mehrnews_acc":0.7782258065,"tasnim_acc":0.8115384615,"magerta_acc":0.7647058824,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.9230769231,"wikishia_acc":0.9393939394,"voolak_acc":0.8372093023,"farsroid_acc":0.7368421053,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 10 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https_google.com","parameters_count":"671000000000","source_type":"Open-Source","acc":0.8241,"acc_strict":0.5115,"donyaeeqtesad_acc":0.8096676737,"isna_acc":0.7734375,"ninisite_article_acc":0.8216216216,"virgool_4_acc":0.8580060423,"khabaronline_acc":0.812,"digiato_acc":0.8141962422,"doctoreto_acc":0.845,"sarzamindownload_acc":0.7843137255,"hamgardi_acc":0.7669616519,"bigbangpage_acc":0.8598726115,"wiki_ahlolbait_acc":0.8947368421,"virgool_3_acc":0.8298507463,"virgool_2_acc":0.8532110092,"virgool_1_acc":0.8164556962,"hamshahrionline_acc":0.868852459,"tabnak_acc":0.8174273859,"alibaba_acc":0.8464052288,"digikala_mag_acc":0.8236472946,"yjc_acc":0.7931034483,"beytoote_acc":0.8282548476,"asriran_acc":0.8019323671,"ecoiran_acc":0.7523809524,"hawzah_acc":0.8651685393,"zoomit_acc":0.8568075117,"wikipedia_acc":0.9,"namnak_acc":0.8310626703,"khodro45_acc":0.8161764706,"fidibo_acc":0.8810572687,"newmiind_acc":0.7569444444,"taaghche_acc":0.9166666667,"motamem_acc":0.9052631579,"varzesh3_acc":0.8327759197,"mehrnews_acc":0.7822580645,"tasnim_acc":0.8038461538,"magerta_acc":0.768907563,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9696969697,"voolak_acc":0.7906976744,"farsroid_acc":0.7368421053,"parsiday_acc":0.7666666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 11 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.8143,"acc_strict":0.8143,"donyaeeqtesad_acc":0.7975830816,"isna_acc":0.77734375,"ninisite_article_acc":0.8,"virgool_4_acc":0.833836858,"khabaronline_acc":0.796,"digiato_acc":0.8037578288,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7669616519,"bigbangpage_acc":0.8598726115,"wiki_ahlolbait_acc":0.8947368421,"virgool_3_acc":0.8298507463,"virgool_2_acc":0.8409785933,"virgool_1_acc":0.8196202532,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8215767635,"alibaba_acc":0.8202614379,"digikala_mag_acc":0.8176352705,"yjc_acc":0.8045977011,"beytoote_acc":0.811634349,"asriran_acc":0.7874396135,"ecoiran_acc":0.7682539683,"hawzah_acc":0.8511235955,"zoomit_acc":0.8474178404,"wikipedia_acc":0.8952380952,"namnak_acc":0.7847411444,"khodro45_acc":0.8382352941,"fidibo_acc":0.845814978,"newmiind_acc":0.7708333333,"taaghche_acc":0.8525641026,"motamem_acc":0.9157894737,"varzesh3_acc":0.8394648829,"mehrnews_acc":0.7459677419,"tasnim_acc":0.8230769231,"magerta_acc":0.7478991597,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":1.0,"wikishia_acc":1.0,"voolak_acc":0.7906976744,"farsroid_acc":0.7105263158,"parsiday_acc":0.7416666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.6}
|
| 12 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https_google.com","parameters_count":"111000000000","source_type":"Open-Source","acc":0.798859772,"acc_strict":0.7983596719,"donyaeeqtesad_acc":0.7673716012,"isna_acc":0.76953125,"ninisite_article_acc":0.7696476965,"virgool_4_acc":0.8398791541,"khabaronline_acc":0.78,"digiato_acc":0.7870563674,"doctoreto_acc":0.82,"sarzamindownload_acc":0.7712418301,"hamgardi_acc":0.7610619469,"bigbangpage_acc":0.8789808917,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.8119402985,"virgool_2_acc":0.8226299694,"virgool_1_acc":0.8037974684,"hamshahrionline_acc":0.8557377049,"tabnak_acc":0.8298755187,"alibaba_acc":0.8562091503,"digikala_mag_acc":0.8152610442,"yjc_acc":0.7471264368,"beytoote_acc":0.8005540166,"asriran_acc":0.7922705314,"ecoiran_acc":0.7333333333,"hawzah_acc":0.8342696629,"zoomit_acc":0.8427230047,"wikipedia_acc":0.9095238095,"namnak_acc":0.7738419619,"khodro45_acc":0.8088235294,"fidibo_acc":0.845814978,"newmiind_acc":0.7222222222,"taaghche_acc":0.8397435897,"motamem_acc":0.8947368421,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.7338709677,"tasnim_acc":0.7730769231,"magerta_acc":0.6974789916,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.6578947368,"parsiday_acc":0.7166666667,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 13 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"70600000000","source_type":"Open-Source","acc":0.7956,"acc_strict":0.1123,"donyaeeqtesad_acc":0.7764350453,"isna_acc":0.76171875,"ninisite_article_acc":0.772972973,"virgool_4_acc":0.8549848943,"khabaronline_acc":0.78,"digiato_acc":0.7954070981,"doctoreto_acc":0.755,"sarzamindownload_acc":0.7385620915,"hamgardi_acc":0.7492625369,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8421052632,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8287461774,"virgool_1_acc":0.8259493671,"hamshahrionline_acc":0.862295082,"tabnak_acc":0.8257261411,"alibaba_acc":0.8366013072,"digikala_mag_acc":0.8076152305,"yjc_acc":0.7356321839,"beytoote_acc":0.7922437673,"asriran_acc":0.7874396135,"ecoiran_acc":0.7142857143,"hawzah_acc":0.845505618,"zoomit_acc":0.8403755869,"wikipedia_acc":0.9047619048,"namnak_acc":0.7874659401,"khodro45_acc":0.7941176471,"fidibo_acc":0.8414096916,"newmiind_acc":0.7465277778,"taaghche_acc":0.8076923077,"motamem_acc":0.8947368421,"varzesh3_acc":0.7959866221,"mehrnews_acc":0.7419354839,"tasnim_acc":0.7346153846,"magerta_acc":0.6848739496,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8787878788,"voolak_acc":0.7906976744,"farsroid_acc":0.6578947368,"parsiday_acc":0.7583333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 14 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7712,"acc_strict":0.7712,"donyaeeqtesad_acc":0.7703927492,"isna_acc":0.75390625,"ninisite_article_acc":0.7648648649,"virgool_4_acc":0.8247734139,"khabaronline_acc":0.76,"digiato_acc":0.7745302714,"doctoreto_acc":0.785,"sarzamindownload_acc":0.7581699346,"hamgardi_acc":0.6784660767,"bigbangpage_acc":0.8407643312,"wiki_ahlolbait_acc":0.8223684211,"virgool_3_acc":0.7910447761,"virgool_2_acc":0.7920489297,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8295081967,"tabnak_acc":0.7634854772,"alibaba_acc":0.7973856209,"digikala_mag_acc":0.8056112224,"yjc_acc":0.724137931,"beytoote_acc":0.7783933518,"asriran_acc":0.7777777778,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7724719101,"zoomit_acc":0.8098591549,"wikipedia_acc":0.8761904762,"namnak_acc":0.7547683924,"khodro45_acc":0.7941176471,"fidibo_acc":0.7841409692,"newmiind_acc":0.6875,"taaghche_acc":0.8269230769,"motamem_acc":0.8631578947,"varzesh3_acc":0.7926421405,"mehrnews_acc":0.7056451613,"tasnim_acc":0.7076923077,"magerta_acc":0.6890756303,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8181818182,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.7083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.5}
|
| 15 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32800000000","source_type":"Open-Source","acc":0.7654,"acc_strict":0.7653,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.75,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.7824773414,"khabaronline_acc":0.724,"digiato_acc":0.8037578288,"doctoreto_acc":0.8,"sarzamindownload_acc":0.7450980392,"hamgardi_acc":0.6991150442,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.8157894737,"virgool_3_acc":0.8029850746,"virgool_2_acc":0.8073394495,"virgool_1_acc":0.7943037975,"hamshahrionline_acc":0.8,"tabnak_acc":0.7634854772,"alibaba_acc":0.8039215686,"digikala_mag_acc":0.7875751503,"yjc_acc":0.6896551724,"beytoote_acc":0.7783933518,"asriran_acc":0.7632850242,"ecoiran_acc":0.6793650794,"hawzah_acc":0.7724719101,"zoomit_acc":0.8215962441,"wikipedia_acc":0.8523809524,"namnak_acc":0.7520435967,"khodro45_acc":0.8088235294,"fidibo_acc":0.7665198238,"newmiind_acc":0.6909722222,"taaghche_acc":0.7564102564,"motamem_acc":0.8736842105,"varzesh3_acc":0.762541806,"mehrnews_acc":0.689516129,"tasnim_acc":0.7192307692,"magerta_acc":0.7268907563,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8484848485,"voolak_acc":0.6744186047,"farsroid_acc":0.6578947368,"parsiday_acc":0.675,"soft98_acc":0.7,"ninisite_discussion_acc":0.7}
|
| 16 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"27400000000","source_type":"Open-Source","acc":0.7628,"acc_strict":0.7628,"donyaeeqtesad_acc":0.6978851964,"isna_acc":0.7265625,"ninisite_article_acc":0.7621621622,"virgool_4_acc":0.8187311178,"khabaronline_acc":0.74,"digiato_acc":0.7661795407,"doctoreto_acc":0.78,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7109144543,"bigbangpage_acc":0.821656051,"wiki_ahlolbait_acc":0.8026315789,"virgool_3_acc":0.7940298507,"virgool_2_acc":0.755351682,"virgool_1_acc":0.7784810127,"hamshahrionline_acc":0.8229508197,"tabnak_acc":0.8174273859,"alibaba_acc":0.7843137255,"digikala_mag_acc":0.7975951904,"yjc_acc":0.7126436782,"beytoote_acc":0.7534626039,"asriran_acc":0.7391304348,"ecoiran_acc":0.7079365079,"hawzah_acc":0.7752808989,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8428571429,"namnak_acc":0.7493188011,"khodro45_acc":0.7867647059,"fidibo_acc":0.8237885463,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8315789474,"varzesh3_acc":0.7993311037,"mehrnews_acc":0.6975806452,"tasnim_acc":0.7307692308,"magerta_acc":0.6722689076,"radiokodak_book_acc":0.6956521739,"vipofilm_acc":0.8461538462,"wikishia_acc":0.9393939394,"voolak_acc":0.6976744186,"farsroid_acc":0.6315789474,"parsiday_acc":0.7,"soft98_acc":0.8,"ninisite_discussion_acc":0.7}
|
| 17 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7598,"acc_strict":0.7598,"donyaeeqtesad_acc":0.749244713,"isna_acc":0.71484375,"ninisite_article_acc":0.7351351351,"virgool_4_acc":0.8006042296,"khabaronline_acc":0.736,"digiato_acc":0.7599164927,"doctoreto_acc":0.775,"sarzamindownload_acc":0.6535947712,"hamgardi_acc":0.7197640118,"bigbangpage_acc":0.7961783439,"wiki_ahlolbait_acc":0.8289473684,"virgool_3_acc":0.7492537313,"virgool_2_acc":0.7828746177,"virgool_1_acc":0.8006329114,"hamshahrionline_acc":0.8131147541,"tabnak_acc":0.7427385892,"alibaba_acc":0.7810457516,"digikala_mag_acc":0.7615230461,"yjc_acc":0.7643678161,"beytoote_acc":0.7783933518,"asriran_acc":0.7536231884,"ecoiran_acc":0.6952380952,"hawzah_acc":0.7668539326,"zoomit_acc":0.7957746479,"wikipedia_acc":0.8761904762,"namnak_acc":0.7765667575,"khodro45_acc":0.7573529412,"fidibo_acc":0.7621145374,"newmiind_acc":0.6909722222,"taaghche_acc":0.7820512821,"motamem_acc":0.8736842105,"varzesh3_acc":0.8060200669,"mehrnews_acc":0.6733870968,"tasnim_acc":0.75,"magerta_acc":0.6764705882,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.8461538462,"wikishia_acc":0.8484848485,"voolak_acc":0.7441860465,"farsroid_acc":0.7631578947,"parsiday_acc":0.7083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 18 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https_google.com","parameters_count":"104000000000","source_type":"Open-Source","acc":0.7364,"acc_strict":0.7364,"donyaeeqtesad_acc":0.752265861,"isna_acc":0.67578125,"ninisite_article_acc":0.7054054054,"virgool_4_acc":0.746223565,"khabaronline_acc":0.724,"digiato_acc":0.7223382046,"doctoreto_acc":0.7,"sarzamindownload_acc":0.6993464052,"hamgardi_acc":0.7020648968,"bigbangpage_acc":0.8089171975,"wiki_ahlolbait_acc":0.8486842105,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7339449541,"virgool_1_acc":0.7246835443,"hamshahrionline_acc":0.8262295082,"tabnak_acc":0.7178423237,"alibaba_acc":0.7712418301,"digikala_mag_acc":0.7715430862,"yjc_acc":0.7183908046,"beytoote_acc":0.7479224377,"asriran_acc":0.768115942,"ecoiran_acc":0.6698412698,"hawzah_acc":0.7415730337,"zoomit_acc":0.79342723,"wikipedia_acc":0.819047619,"namnak_acc":0.7220708447,"khodro45_acc":0.75,"fidibo_acc":0.7665198238,"newmiind_acc":0.6631944444,"taaghche_acc":0.7820512821,"motamem_acc":0.8631578947,"varzesh3_acc":0.7525083612,"mehrnews_acc":0.6653225806,"tasnim_acc":0.75,"magerta_acc":0.6134453782,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.5,"parsiday_acc":0.7083333333,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 19 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.7160432086,"acc_strict":0.7160432086,"donyaeeqtesad_acc":0.6888217523,"isna_acc":0.69140625,"ninisite_article_acc":0.7,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.712,"digiato_acc":0.7181628392,"doctoreto_acc":0.745,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.6755162242,"bigbangpage_acc":0.7770700637,"wiki_ahlolbait_acc":0.8684210526,"virgool_3_acc":0.7194029851,"virgool_2_acc":0.7064220183,"virgool_1_acc":0.7056962025,"hamshahrionline_acc":0.7803278689,"tabnak_acc":0.6887966805,"alibaba_acc":0.7483660131,"digikala_mag_acc":0.7434869739,"yjc_acc":0.6724137931,"beytoote_acc":0.728531856,"asriran_acc":0.7487922705,"ecoiran_acc":0.6761904762,"hawzah_acc":0.7584269663,"zoomit_acc":0.7558685446,"wikipedia_acc":0.780952381,"namnak_acc":0.7002724796,"khodro45_acc":0.7279411765,"fidibo_acc":0.7665198238,"newmiind_acc":0.6202090592,"taaghche_acc":0.7628205128,"motamem_acc":0.8404255319,"varzesh3_acc":0.7324414716,"mehrnews_acc":0.6169354839,"tasnim_acc":0.6923076923,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.7878787879,"voolak_acc":0.6511627907,"farsroid_acc":0.7105263158,"parsiday_acc":0.575,"soft98_acc":0.9,"ninisite_discussion_acc":0.3}
|
| 20 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"12200000000","source_type":"Open-Source","acc":0.7094,"acc_strict":0.7094,"donyaeeqtesad_acc":0.6586102719,"isna_acc":0.65625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7643504532,"khabaronline_acc":0.704,"digiato_acc":0.7369519833,"doctoreto_acc":0.76,"sarzamindownload_acc":0.6797385621,"hamgardi_acc":0.6666666667,"bigbangpage_acc":0.7515923567,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7223880597,"virgool_2_acc":0.7584097859,"virgool_1_acc":0.7183544304,"hamshahrionline_acc":0.7213114754,"tabnak_acc":0.7219917012,"alibaba_acc":0.6830065359,"digikala_mag_acc":0.7354709419,"yjc_acc":0.6206896552,"beytoote_acc":0.7146814404,"asriran_acc":0.7198067633,"ecoiran_acc":0.6603174603,"hawzah_acc":0.702247191,"zoomit_acc":0.7323943662,"wikipedia_acc":0.7714285714,"namnak_acc":0.7329700272,"khodro45_acc":0.7352941176,"fidibo_acc":0.718061674,"newmiind_acc":0.6493055556,"taaghche_acc":0.7564102564,"motamem_acc":0.8210526316,"varzesh3_acc":0.7157190635,"mehrnews_acc":0.6088709677,"tasnim_acc":0.6576923077,"magerta_acc":0.6302521008,"radiokodak_book_acc":0.652173913,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6976744186,"farsroid_acc":0.7368421053,"parsiday_acc":0.6583333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.8}
|
| 21 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"14800000000","source_type":"Open-Source","acc":0.6958,"acc_strict":0.6958,"donyaeeqtesad_acc":0.6495468278,"isna_acc":0.62890625,"ninisite_article_acc":0.6972972973,"virgool_4_acc":0.7069486405,"khabaronline_acc":0.652,"digiato_acc":0.7202505219,"doctoreto_acc":0.77,"sarzamindownload_acc":0.614379085,"hamgardi_acc":0.6430678466,"bigbangpage_acc":0.7579617834,"wiki_ahlolbait_acc":0.7631578947,"virgool_3_acc":0.7373134328,"virgool_2_acc":0.7155963303,"virgool_1_acc":0.7278481013,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6970954357,"alibaba_acc":0.7254901961,"digikala_mag_acc":0.7074148297,"yjc_acc":0.6379310345,"beytoote_acc":0.6842105263,"asriran_acc":0.6859903382,"ecoiran_acc":0.653968254,"hawzah_acc":0.7078651685,"zoomit_acc":0.7676056338,"wikipedia_acc":0.8142857143,"namnak_acc":0.6621253406,"khodro45_acc":0.7647058824,"fidibo_acc":0.731277533,"newmiind_acc":0.6597222222,"taaghche_acc":0.6987179487,"motamem_acc":0.8105263158,"varzesh3_acc":0.6220735786,"mehrnews_acc":0.625,"tasnim_acc":0.6692307692,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.5652173913,"vipofilm_acc":0.9230769231,"wikishia_acc":0.8787878788,"voolak_acc":0.6279069767,"farsroid_acc":0.6052631579,"parsiday_acc":0.5666666667,"soft98_acc":0.9,"ninisite_discussion_acc":0.7}
|
| 22 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https_google.com","parameters_count":"24000000000","source_type":"Open-Source","acc":0.6894,"acc_strict":0.6894,"donyaeeqtesad_acc":0.670694864,"isna_acc":0.63671875,"ninisite_article_acc":0.6945945946,"virgool_4_acc":0.7039274924,"khabaronline_acc":0.664,"digiato_acc":0.6826722338,"doctoreto_acc":0.755,"sarzamindownload_acc":0.6339869281,"hamgardi_acc":0.6342182891,"bigbangpage_acc":0.7452229299,"wiki_ahlolbait_acc":0.7697368421,"virgool_3_acc":0.7014925373,"virgool_2_acc":0.7125382263,"virgool_1_acc":0.7341772152,"hamshahrionline_acc":0.7278688525,"tabnak_acc":0.6307053942,"alibaba_acc":0.7647058824,"digikala_mag_acc":0.7174348697,"yjc_acc":0.5804597701,"beytoote_acc":0.6814404432,"asriran_acc":0.6811594203,"ecoiran_acc":0.6158730159,"hawzah_acc":0.6994382022,"zoomit_acc":0.7441314554,"wikipedia_acc":0.8333333333,"namnak_acc":0.659400545,"khodro45_acc":0.7058823529,"fidibo_acc":0.7268722467,"newmiind_acc":0.6527777778,"taaghche_acc":0.7051282051,"motamem_acc":0.8526315789,"varzesh3_acc":0.6789297659,"mehrnews_acc":0.5887096774,"tasnim_acc":0.6692307692,"magerta_acc":0.6680672269,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.6923076923,"wikishia_acc":0.7575757576,"voolak_acc":0.6511627907,"farsroid_acc":0.6842105263,"parsiday_acc":0.55,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 23 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"30500000000","source_type":"Open-Source","acc":0.688,"acc_strict":0.688,"donyaeeqtesad_acc":0.6465256798,"isna_acc":0.6640625,"ninisite_article_acc":0.7243243243,"virgool_4_acc":0.7311178248,"khabaronline_acc":0.668,"digiato_acc":0.6764091858,"doctoreto_acc":0.765,"sarzamindownload_acc":0.7058823529,"hamgardi_acc":0.6519174041,"bigbangpage_acc":0.8025477707,"wiki_ahlolbait_acc":0.7368421053,"virgool_3_acc":0.7134328358,"virgool_2_acc":0.7003058104,"virgool_1_acc":0.7025316456,"hamshahrionline_acc":0.6819672131,"tabnak_acc":0.7012448133,"alibaba_acc":0.7189542484,"digikala_mag_acc":0.6753507014,"yjc_acc":0.632183908,"beytoote_acc":0.6703601108,"asriran_acc":0.652173913,"ecoiran_acc":0.6126984127,"hawzah_acc":0.7387640449,"zoomit_acc":0.7300469484,"wikipedia_acc":0.7904761905,"namnak_acc":0.6920980926,"khodro45_acc":0.7279411765,"fidibo_acc":0.6872246696,"newmiind_acc":0.6631944444,"taaghche_acc":0.6858974359,"motamem_acc":0.8,"varzesh3_acc":0.6120401338,"mehrnews_acc":0.6129032258,"tasnim_acc":0.65,"magerta_acc":0.6596638655,"radiokodak_book_acc":0.4782608696,"vipofilm_acc":0.7692307692,"wikishia_acc":0.8787878788,"voolak_acc":0.6511627907,"farsroid_acc":0.6578947368,"parsiday_acc":0.6,"soft98_acc":0.8,"ninisite_discussion_acc":0.4}
|
| 24 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https_google.com","parameters_count":"32300000000","source_type":"Open-Source","acc":0.6327,"acc_strict":0.0585,"donyaeeqtesad_acc":0.6223564955,"isna_acc":0.5703125,"ninisite_article_acc":0.6621621622,"virgool_4_acc":0.6435045317,"khabaronline_acc":0.632,"digiato_acc":0.6346555324,"doctoreto_acc":0.65,"sarzamindownload_acc":0.5620915033,"hamgardi_acc":0.6194690265,"bigbangpage_acc":0.7070063694,"wiki_ahlolbait_acc":0.6776315789,"virgool_3_acc":0.6208955224,"virgool_2_acc":0.626911315,"virgool_1_acc":0.6518987342,"hamshahrionline_acc":0.6557377049,"tabnak_acc":0.6639004149,"alibaba_acc":0.6666666667,"digikala_mag_acc":0.627254509,"yjc_acc":0.6206896552,"beytoote_acc":0.6675900277,"asriran_acc":0.6231884058,"ecoiran_acc":0.5904761905,"hawzah_acc":0.6797752809,"zoomit_acc":0.5915492958,"wikipedia_acc":0.7333333333,"namnak_acc":0.6403269755,"khodro45_acc":0.6102941176,"fidibo_acc":0.704845815,"newmiind_acc":0.5416666667,"taaghche_acc":0.6217948718,"motamem_acc":0.7684210526,"varzesh3_acc":0.6254180602,"mehrnews_acc":0.5927419355,"tasnim_acc":0.6230769231,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.6153846154,"wikishia_acc":0.7575757576,"voolak_acc":0.6279069767,"farsroid_acc":0.6578947368,"parsiday_acc":0.5083333333,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 25 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https_google.com","parameters_count":"35000000000","source_type":"Open-Source","acc":0.6,"acc_strict":0.6,"donyaeeqtesad_acc":0.6042296073,"isna_acc":0.56640625,"ninisite_article_acc":0.572972973,"virgool_4_acc":0.5951661631,"khabaronline_acc":0.6,"digiato_acc":0.5908141962,"doctoreto_acc":0.605,"sarzamindownload_acc":0.5882352941,"hamgardi_acc":0.5722713864,"bigbangpage_acc":0.6369426752,"wiki_ahlolbait_acc":0.6578947368,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.623853211,"virgool_1_acc":0.6139240506,"hamshahrionline_acc":0.6295081967,"tabnak_acc":0.6514522822,"alibaba_acc":0.6307189542,"digikala_mag_acc":0.6152304609,"yjc_acc":0.5747126437,"beytoote_acc":0.5900277008,"asriran_acc":0.5314009662,"ecoiran_acc":0.5619047619,"hawzah_acc":0.6292134831,"zoomit_acc":0.5915492958,"wikipedia_acc":0.6571428571,"namnak_acc":0.6267029973,"khodro45_acc":0.6397058824,"fidibo_acc":0.6872246696,"newmiind_acc":0.4895833333,"taaghche_acc":0.6217948718,"motamem_acc":0.6736842105,"varzesh3_acc":0.635451505,"mehrnews_acc":0.5725806452,"tasnim_acc":0.6115384615,"magerta_acc":0.5672268908,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.6060606061,"voolak_acc":0.6046511628,"farsroid_acc":0.4736842105,"parsiday_acc":0.5,"soft98_acc":0.8,"ninisite_discussion_acc":0.6}
|
| 26 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7850000000","source_type":"Open-Source","acc":0.5915,"acc_strict":0.5915,"donyaeeqtesad_acc":0.5528700906,"isna_acc":0.578125,"ninisite_article_acc":0.5945945946,"virgool_4_acc":0.5981873112,"khabaronline_acc":0.588,"digiato_acc":0.6283924843,"doctoreto_acc":0.615,"sarzamindownload_acc":0.6013071895,"hamgardi_acc":0.5516224189,"bigbangpage_acc":0.6242038217,"wiki_ahlolbait_acc":0.625,"virgool_3_acc":0.6119402985,"virgool_2_acc":0.6085626911,"virgool_1_acc":0.6297468354,"hamshahrionline_acc":0.606557377,"tabnak_acc":0.6141078838,"alibaba_acc":0.6045751634,"digikala_mag_acc":0.6132264529,"yjc_acc":0.6264367816,"beytoote_acc":0.5734072022,"asriran_acc":0.61352657,"ecoiran_acc":0.5428571429,"hawzah_acc":0.5758426966,"zoomit_acc":0.5821596244,"wikipedia_acc":0.6476190476,"namnak_acc":0.6049046322,"khodro45_acc":0.6397058824,"fidibo_acc":0.5594713656,"newmiind_acc":0.5451388889,"taaghche_acc":0.5897435897,"motamem_acc":0.6842105263,"varzesh3_acc":0.602006689,"mehrnews_acc":0.4879032258,"tasnim_acc":0.6153846154,"magerta_acc":0.5420168067,"radiokodak_book_acc":0.6086956522,"vipofilm_acc":0.6153846154,"wikishia_acc":0.6666666667,"voolak_acc":0.5813953488,"farsroid_acc":0.6052631579,"parsiday_acc":0.425,"soft98_acc":0.6,"ninisite_discussion_acc":0.4}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https_google.com","parameters_count":"N\/A","source_type":"Closed-Source","acc":0.5494,"acc_strict":0.5494,"donyaeeqtesad_acc":0.5347432024,"isna_acc":0.4921875,"ninisite_article_acc":0.5864864865,"virgool_4_acc":0.5921450151,"khabaronline_acc":0.556,"digiato_acc":0.5469728601,"doctoreto_acc":0.6,"sarzamindownload_acc":0.522875817,"hamgardi_acc":0.5044247788,"bigbangpage_acc":0.6305732484,"wiki_ahlolbait_acc":0.6644736842,"virgool_3_acc":0.5582089552,"virgool_2_acc":0.5107033639,"virgool_1_acc":0.5949367089,"hamshahrionline_acc":0.5639344262,"tabnak_acc":0.510373444,"alibaba_acc":0.6078431373,"digikala_mag_acc":0.5611222445,"yjc_acc":0.591954023,"beytoote_acc":0.5567867036,"asriran_acc":0.5265700483,"ecoiran_acc":0.4920634921,"hawzah_acc":0.547752809,"zoomit_acc":0.5821596244,"wikipedia_acc":0.6380952381,"namnak_acc":0.5449591281,"khodro45_acc":0.6102941176,"fidibo_acc":0.5726872247,"newmiind_acc":0.5,"taaghche_acc":0.5,"motamem_acc":0.6210526316,"varzesh3_acc":0.4816053512,"mehrnews_acc":0.4838709677,"tasnim_acc":0.5692307692,"magerta_acc":0.5042016807,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5757575758,"voolak_acc":0.488372093,"farsroid_acc":0.4736842105,"parsiday_acc":0.4,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.5437,"acc_strict":0.5437,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.515625,"ninisite_article_acc":0.527027027,"virgool_4_acc":0.5649546828,"khabaronline_acc":0.508,"digiato_acc":0.5469728601,"doctoreto_acc":0.605,"sarzamindownload_acc":0.4836601307,"hamgardi_acc":0.5014749263,"bigbangpage_acc":0.6560509554,"wiki_ahlolbait_acc":0.5723684211,"virgool_3_acc":0.6119402985,"virgool_2_acc":0.5626911315,"virgool_1_acc":0.5696202532,"hamshahrionline_acc":0.5540983607,"tabnak_acc":0.5643153527,"alibaba_acc":0.6045751634,"digikala_mag_acc":0.5711422846,"yjc_acc":0.5172413793,"beytoote_acc":0.5152354571,"asriran_acc":0.5555555556,"ecoiran_acc":0.4761904762,"hawzah_acc":0.5926966292,"zoomit_acc":0.5938967136,"wikipedia_acc":0.6761904762,"namnak_acc":0.4741144414,"khodro45_acc":0.5441176471,"fidibo_acc":0.5682819383,"newmiind_acc":0.5104166667,"taaghche_acc":0.5320512821,"motamem_acc":0.6526315789,"varzesh3_acc":0.4648829431,"mehrnews_acc":0.4475806452,"tasnim_acc":0.5153846154,"magerta_acc":0.5630252101,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.6923076923,"wikishia_acc":0.5757575758,"voolak_acc":0.511627907,"farsroid_acc":0.5789473684,"parsiday_acc":0.3916666667,"soft98_acc":0.7,"ninisite_discussion_acc":0.4}
|
| 29 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https_google.com","parameters_count":"8190000000","source_type":"Open-Source","acc":0.5272636318,"acc_strict":0.5271635818,"donyaeeqtesad_acc":0.4652567976,"isna_acc":0.5215686275,"ninisite_article_acc":0.5567567568,"virgool_4_acc":0.5649546828,"khabaronline_acc":0.5,"digiato_acc":0.5260960334,"doctoreto_acc":0.585,"sarzamindownload_acc":0.568627451,"hamgardi_acc":0.4955752212,"bigbangpage_acc":0.6242038217,"wiki_ahlolbait_acc":0.5894039735,"virgool_3_acc":0.5628742515,"virgool_2_acc":0.5351681957,"virgool_1_acc":0.5696202532,"hamshahrionline_acc":0.5245901639,"tabnak_acc":0.5020746888,"alibaba_acc":0.5424836601,"digikala_mag_acc":0.5130260521,"yjc_acc":0.5402298851,"beytoote_acc":0.5318559557,"asriran_acc":0.5388349515,"ecoiran_acc":0.4666666667,"hawzah_acc":0.5561797753,"zoomit_acc":0.5680751174,"wikipedia_acc":0.6,"namnak_acc":0.5476839237,"khodro45_acc":0.4485294118,"fidibo_acc":0.5374449339,"newmiind_acc":0.4756944444,"taaghche_acc":0.4871794872,"motamem_acc":0.6105263158,"varzesh3_acc":0.4581939799,"mehrnews_acc":0.4331983806,"tasnim_acc":0.5115384615,"magerta_acc":0.5168067227,"radiokodak_book_acc":0.4347826087,"vipofilm_acc":0.3846153846,"wikishia_acc":0.6666666667,"voolak_acc":0.511627907,"farsroid_acc":0.5263157895,"parsiday_acc":0.45,"soft98_acc":0.7,"ninisite_discussion_acc":0.3}
|
| 30 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4020000000","source_type":"Open-Source","acc":0.5033,"acc_strict":0.5033,"donyaeeqtesad_acc":0.4954682779,"isna_acc":0.48046875,"ninisite_article_acc":0.4810810811,"virgool_4_acc":0.5256797583,"khabaronline_acc":0.504,"digiato_acc":0.5073068894,"doctoreto_acc":0.615,"sarzamindownload_acc":0.4901960784,"hamgardi_acc":0.4601769912,"bigbangpage_acc":0.5414012739,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.5731343284,"virgool_2_acc":0.5565749235,"virgool_1_acc":0.5094936709,"hamshahrionline_acc":0.4655737705,"tabnak_acc":0.5145228216,"alibaba_acc":0.5098039216,"digikala_mag_acc":0.5230460922,"yjc_acc":0.5114942529,"beytoote_acc":0.4764542936,"asriran_acc":0.4782608696,"ecoiran_acc":0.4253968254,"hawzah_acc":0.5028089888,"zoomit_acc":0.5328638498,"wikipedia_acc":0.6047619048,"namnak_acc":0.4795640327,"khodro45_acc":0.6102941176,"fidibo_acc":0.550660793,"newmiind_acc":0.4895833333,"taaghche_acc":0.5064102564,"motamem_acc":0.5894736842,"varzesh3_acc":0.3913043478,"mehrnews_acc":0.439516129,"tasnim_acc":0.4807692308,"magerta_acc":0.5546218487,"radiokodak_book_acc":0.347826087,"vipofilm_acc":0.5384615385,"wikishia_acc":0.5454545455,"voolak_acc":0.488372093,"farsroid_acc":0.5263157895,"parsiday_acc":0.3083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
| 31 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"4300000000","source_type":"Open-Source","acc":0.4832,"acc_strict":0.4832,"donyaeeqtesad_acc":0.498489426,"isna_acc":0.44140625,"ninisite_article_acc":0.4486486486,"virgool_4_acc":0.5075528701,"khabaronline_acc":0.504,"digiato_acc":0.4822546973,"doctoreto_acc":0.5,"sarzamindownload_acc":0.4117647059,"hamgardi_acc":0.5250737463,"bigbangpage_acc":0.5031847134,"wiki_ahlolbait_acc":0.5197368421,"virgool_3_acc":0.4985074627,"virgool_2_acc":0.4495412844,"virgool_1_acc":0.5063291139,"hamshahrionline_acc":0.5344262295,"tabnak_acc":0.4605809129,"alibaba_acc":0.5032679739,"digikala_mag_acc":0.4729458918,"yjc_acc":0.4482758621,"beytoote_acc":0.4903047091,"asriran_acc":0.4734299517,"ecoiran_acc":0.419047619,"hawzah_acc":0.4831460674,"zoomit_acc":0.5305164319,"wikipedia_acc":0.5666666667,"namnak_acc":0.4931880109,"khodro45_acc":0.5294117647,"fidibo_acc":0.4801762115,"newmiind_acc":0.4479166667,"taaghche_acc":0.4230769231,"motamem_acc":0.6421052632,"varzesh3_acc":0.4515050167,"mehrnews_acc":0.4072580645,"tasnim_acc":0.5,"magerta_acc":0.4453781513,"radiokodak_book_acc":0.5217391304,"vipofilm_acc":0.5384615385,"wikishia_acc":0.6363636364,"voolak_acc":0.4651162791,"farsroid_acc":0.4736842105,"parsiday_acc":0.35,"soft98_acc":0.9,"ninisite_discussion_acc":0.5}
|
| 32 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https_google.com","parameters_count":"7250000000","source_type":"Open-Source","acc":0.3015,"acc_strict":0.3011,"donyaeeqtesad_acc":0.2990936556,"isna_acc":0.29296875,"ninisite_article_acc":0.2864864865,"virgool_4_acc":0.2839879154,"khabaronline_acc":0.276,"digiato_acc":0.2922755741,"doctoreto_acc":0.345,"sarzamindownload_acc":0.3267973856,"hamgardi_acc":0.3215339233,"bigbangpage_acc":0.2547770701,"wiki_ahlolbait_acc":0.2894736842,"virgool_3_acc":0.3313432836,"virgool_2_acc":0.3058103976,"virgool_1_acc":0.3512658228,"hamshahrionline_acc":0.262295082,"tabnak_acc":0.3278008299,"alibaba_acc":0.3235294118,"digikala_mag_acc":0.3206412826,"yjc_acc":0.2816091954,"beytoote_acc":0.2991689751,"asriran_acc":0.3188405797,"ecoiran_acc":0.2698412698,"hawzah_acc":0.3174157303,"zoomit_acc":0.3028169014,"wikipedia_acc":0.3380952381,"namnak_acc":0.2888283379,"khodro45_acc":0.3308823529,"fidibo_acc":0.3259911894,"newmiind_acc":0.2916666667,"taaghche_acc":0.2371794872,"motamem_acc":0.4,"varzesh3_acc":0.2240802676,"mehrnews_acc":0.2459677419,"tasnim_acc":0.3346153846,"magerta_acc":0.3361344538,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.4615384615,"wikishia_acc":0.2727272727,"voolak_acc":0.4418604651,"farsroid_acc":0.1578947368,"parsiday_acc":0.2083333333,"soft98_acc":0.7,"ninisite_discussion_acc":0.1}
|
| 33 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1000000000","source_type":"Open-Source","acc":0.2599,"acc_strict":0.2599,"donyaeeqtesad_acc":0.2719033233,"isna_acc":0.25,"ninisite_article_acc":0.2567567568,"virgool_4_acc":0.3202416918,"khabaronline_acc":0.276,"digiato_acc":0.2630480167,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2418300654,"hamgardi_acc":0.2979351032,"bigbangpage_acc":0.2484076433,"wiki_ahlolbait_acc":0.2631578947,"virgool_3_acc":0.2507462687,"virgool_2_acc":0.247706422,"virgool_1_acc":0.2594936709,"hamshahrionline_acc":0.2852459016,"tabnak_acc":0.2489626556,"alibaba_acc":0.2712418301,"digikala_mag_acc":0.2705410822,"yjc_acc":0.275862069,"beytoote_acc":0.2603878116,"asriran_acc":0.2608695652,"ecoiran_acc":0.2634920635,"hawzah_acc":0.2724719101,"zoomit_acc":0.2511737089,"wikipedia_acc":0.2857142857,"namnak_acc":0.2098092643,"khodro45_acc":0.2720588235,"fidibo_acc":0.2466960352,"newmiind_acc":0.2222222222,"taaghche_acc":0.25,"motamem_acc":0.2947368421,"varzesh3_acc":0.2441471572,"mehrnews_acc":0.25,"tasnim_acc":0.2692307692,"magerta_acc":0.2352941176,"radiokodak_book_acc":0.3043478261,"vipofilm_acc":0.3846153846,"wikishia_acc":0.1515151515,"voolak_acc":0.2558139535,"farsroid_acc":0.2631578947,"parsiday_acc":0.1916666667,"soft98_acc":0.1,"ninisite_discussion_acc":0.4}
|
| 34 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"3210000000","source_type":"Open-Source","acc":0.2521,"acc_strict":0.2517,"donyaeeqtesad_acc":0.2779456193,"isna_acc":0.29296875,"ninisite_article_acc":0.2594594595,"virgool_4_acc":0.2235649547,"khabaronline_acc":0.2,"digiato_acc":0.24217119,"doctoreto_acc":0.24,"sarzamindownload_acc":0.2352941176,"hamgardi_acc":0.2684365782,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2368421053,"virgool_3_acc":0.2298507463,"virgool_2_acc":0.2599388379,"virgool_1_acc":0.2689873418,"hamshahrionline_acc":0.2327868852,"tabnak_acc":0.2697095436,"alibaba_acc":0.2124183007,"digikala_mag_acc":0.246492986,"yjc_acc":0.2586206897,"beytoote_acc":0.2631578947,"asriran_acc":0.2898550725,"ecoiran_acc":0.2603174603,"hawzah_acc":0.2556179775,"zoomit_acc":0.2887323944,"wikipedia_acc":0.2238095238,"namnak_acc":0.2561307902,"khodro45_acc":0.25,"fidibo_acc":0.2202643172,"newmiind_acc":0.25,"taaghche_acc":0.2692307692,"motamem_acc":0.2842105263,"varzesh3_acc":0.2107023411,"mehrnews_acc":0.2338709677,"tasnim_acc":0.2307692308,"magerta_acc":0.3235294118,"radiokodak_book_acc":0.1739130435,"vipofilm_acc":0.4615384615,"wikishia_acc":0.3333333333,"voolak_acc":0.2790697674,"farsroid_acc":0.2368421053,"parsiday_acc":0.1833333333,"soft98_acc":0.3,"ninisite_discussion_acc":0.5}
|
| 35 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https_google.com","parameters_count":"1240000000","source_type":"Open-Source","acc":0.2412,"acc_strict":0.0079,"donyaeeqtesad_acc":0.253776435,"isna_acc":0.25390625,"ninisite_article_acc":0.2486486486,"virgool_4_acc":0.2809667674,"khabaronline_acc":0.248,"digiato_acc":0.2192066806,"doctoreto_acc":0.245,"sarzamindownload_acc":0.2483660131,"hamgardi_acc":0.2507374631,"bigbangpage_acc":0.2802547771,"wiki_ahlolbait_acc":0.2434210526,"virgool_3_acc":0.2208955224,"virgool_2_acc":0.2140672783,"virgool_1_acc":0.2373417722,"hamshahrionline_acc":0.2983606557,"tabnak_acc":0.2282157676,"alibaba_acc":0.2581699346,"digikala_mag_acc":0.2224448898,"yjc_acc":0.2701149425,"beytoote_acc":0.2520775623,"asriran_acc":0.1884057971,"ecoiran_acc":0.2349206349,"hawzah_acc":0.2696629213,"zoomit_acc":0.2558685446,"wikipedia_acc":0.1761904762,"namnak_acc":0.2343324251,"khodro45_acc":0.2279411765,"fidibo_acc":0.2907488987,"newmiind_acc":0.28125,"taaghche_acc":0.1987179487,"motamem_acc":0.2736842105,"varzesh3_acc":0.2307692308,"mehrnews_acc":0.2096774194,"tasnim_acc":0.2269230769,"magerta_acc":0.1848739496,"radiokodak_book_acc":0.2173913043,"vipofilm_acc":0.1538461538,"wikishia_acc":0.1515151515,"voolak_acc":0.2790697674,"farsroid_acc":0.2105263158,"parsiday_acc":0.225,"soft98_acc":0.2,"ninisite_discussion_acc":0.4}
|
leaderboard/boards_data/persian_nlg.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard/boards_data/persian_nlu.jsonl
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
leaderboard/boards_data/question-generation_PersianQA.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2126001437,"question-generation_PersianQA_rougeL_recall":0.3731677121,"question-generation_PersianQA_rougeL_f1_score":0.2603121806,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2171998078,"question-generation_PersianQA_rougeL_recall":0.3938560893,"question-generation_PersianQA_rougeL_f1_score":0.268371521,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2782492277,"question-generation_PersianQA_rougeL_recall":0.3823213358,"question-generation_PersianQA_rougeL_f1_score":0.3109786075,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2978290521,"question-generation_PersianQA_rougeL_recall":0.4184808562,"question-generation_PersianQA_rougeL_f1_score":0.3324485723,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1693490122,"question-generation_PersianQA_rougeL_recall":0.3886090827,"question-generation_PersianQA_rougeL_f1_score":0.227277052,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1704020873,"question-generation_PersianQA_rougeL_recall":0.3000756202,"question-generation_PersianQA_rougeL_f1_score":0.2079039891,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2149535143,"question-generation_PersianQA_rougeL_recall":0.3019561885,"question-generation_PersianQA_rougeL_f1_score":0.2405115465,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1365997259,"question-generation_PersianQA_rougeL_recall":0.3257934111,"question-generation_PersianQA_rougeL_f1_score":0.1803398036,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2576021626,"question-generation_PersianQA_rougeL_recall":0.3924501003,"question-generation_PersianQA_rougeL_f1_score":0.2985826349,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":null,"question-generation_PersianQA_rougeL_recall":null,"question-generation_PersianQA_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2572991833,"question-generation_PersianQA_rougeL_recall":0.3740225235,"question-generation_PersianQA_rougeL_f1_score":0.2927586837,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2041596361,"question-generation_PersianQA_rougeL_recall":0.3456815337,"question-generation_PersianQA_rougeL_f1_score":0.2459732807,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1987198912,"question-generation_PersianQA_rougeL_recall":0.3431437262,"question-generation_PersianQA_rougeL_f1_score":0.2419384398,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2281053588,"question-generation_PersianQA_rougeL_recall":0.370933314,"question-generation_PersianQA_rougeL_f1_score":0.273363418,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.0870939736,"question-generation_PersianQA_rougeL_recall":0.3600941065,"question-generation_PersianQA_rougeL_f1_score":0.1336375958,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1520819517,"question-generation_PersianQA_rougeL_recall":0.26324767,"question-generation_PersianQA_rougeL_f1_score":0.1843401988,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1669255457,"question-generation_PersianQA_rougeL_recall":0.2952488346,"question-generation_PersianQA_rougeL_f1_score":0.2007786564,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1939037413,"question-generation_PersianQA_rougeL_recall":0.4070822245,"question-generation_PersianQA_rougeL_f1_score":0.2439578999,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1744197112,"question-generation_PersianQA_rougeL_recall":0.2697024508,"question-generation_PersianQA_rougeL_f1_score":0.2017710943,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.045673941,"question-generation_PersianQA_rougeL_recall":0.0991932753,"question-generation_PersianQA_rougeL_f1_score":0.0576169145,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0980160864,"question-generation_PersianQA_rougeL_recall":0.347983913,"question-generation_PersianQA_rougeL_f1_score":0.1443872083,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2447184183,"question-generation_PersianQA_rougeL_recall":0.3388367288,"question-generation_PersianQA_rougeL_f1_score":0.269297654,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2438951227,"question-generation_PersianQA_rougeL_recall":0.3687301621,"question-generation_PersianQA_rougeL_f1_score":0.2816187853,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2126001437,"question-generation_PersianQA_rougeL_recall":0.3731677121,"question-generation_PersianQA_rougeL_f1_score":0.2603121806,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1942536013,"question-generation_PersianQA_rougeL_recall":0.3435531442,"question-generation_PersianQA_rougeL_f1_score":0.2369359061,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.2171998078,"question-generation_PersianQA_rougeL_recall":0.3938560893,"question-generation_PersianQA_rougeL_f1_score":0.268371521,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3141052553,"question-generation_PersianQA_rougeL_recall":0.4102615831,"question-generation_PersianQA_rougeL_f1_score":0.3441804021,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.3121385499,"question-generation_PersianQA_rougeL_recall":0.4162991047,"question-generation_PersianQA_rougeL_f1_score":0.3445136596,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2782492277,"question-generation_PersianQA_rougeL_recall":0.3823213358,"question-generation_PersianQA_rougeL_f1_score":0.3109786075,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1546246184,"question-generation_PersianQA_rougeL_recall":0.253394795,"question-generation_PersianQA_rougeL_f1_score":0.1829113647,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.1622159789,"question-generation_PersianQA_rougeL_recall":0.302597472,"question-generation_PersianQA_rougeL_f1_score":0.2021048057,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1965366702,"question-generation_PersianQA_rougeL_recall":0.340760284,"question-generation_PersianQA_rougeL_f1_score":0.2388923895,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1127092702,"question-generation_PersianQA_rougeL_recall":0.2982763168,"question-generation_PersianQA_rougeL_f1_score":0.1525970768,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2275858051,"question-generation_PersianQA_rougeL_recall":0.3654754607,"question-generation_PersianQA_rougeL_f1_score":0.2679025722,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1342253144,"question-generation_PersianQA_rougeL_recall":0.4100317735,"question-generation_PersianQA_rougeL_f1_score":0.18410589,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1996840686,"question-generation_PersianQA_rougeL_recall":0.3393114266,"question-generation_PersianQA_rougeL_f1_score":0.2417040176,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.1264186031,"question-generation_PersianQA_rougeL_recall":0.2582953109,"question-generation_PersianQA_rougeL_f1_score":0.1600835412,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","question-generation_PersianQA_rougeL_precision":0.0567952998,"question-generation_PersianQA_rougeL_recall":0.2105979358,"question-generation_PersianQA_rougeL_f1_score":0.0793499521,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","question-generation_PersianQA_rougeL_precision":0.2978290521,"question-generation_PersianQA_rougeL_recall":0.4184808562,"question-generation_PersianQA_rougeL_f1_score":0.3324485723,"nlg_score":0.1151518212}
|
leaderboard/boards_data/sentiment-analysis_deepsentipers.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8058409951,"sentiment-analysis_deepsentipers_precision_modified":0.7717795715,"sentiment-analysis_deepsentipers_recall_modified":0.8211827366,"sentiment-analysis_deepsentipers_fscore_modified":0.7889064935,"sentiment-analysis_deepsentipers_acc":0.8062770563,"sentiment-analysis_deepsentipers_precision":0.7721972011,"sentiment-analysis_deepsentipers_recall":0.8216270995,"sentiment-analysis_deepsentipers_fscore":0.7893333909,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994591671,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7664509169,"sentiment-analysis_deepsentipers_precision_modified":0.7235774595,"sentiment-analysis_deepsentipers_recall_modified":0.785720049,"sentiment-analysis_deepsentipers_fscore_modified":0.7440236575,"sentiment-analysis_deepsentipers_acc":0.766864544,"sentiment-analysis_deepsentipers_precision":0.7239679492,"sentiment-analysis_deepsentipers_recall":0.786144075,"sentiment-analysis_deepsentipers_fscore":0.7444251813,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7556634304,"sentiment-analysis_deepsentipers_precision_modified":0.7100962569,"sentiment-analysis_deepsentipers_recall_modified":0.796296032,"sentiment-analysis_deepsentipers_fscore_modified":0.7198160026,"sentiment-analysis_deepsentipers_acc":0.7556634304,"sentiment-analysis_deepsentipers_precision":0.7100962569,"sentiment-analysis_deepsentipers_recall":0.796296032,"sentiment-analysis_deepsentipers_fscore":0.7198160026,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7820927724,"sentiment-analysis_deepsentipers_precision_modified":0.7592820571,"sentiment-analysis_deepsentipers_recall_modified":0.7768252647,"sentiment-analysis_deepsentipers_fscore_modified":0.7562669975,"sentiment-analysis_deepsentipers_acc":0.7820927724,"sentiment-analysis_deepsentipers_precision":0.7592820571,"sentiment-analysis_deepsentipers_recall":0.7768252647,"sentiment-analysis_deepsentipers_fscore":0.7562669975,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7521691974,"sentiment-analysis_deepsentipers_precision_modified":0.7149147283,"sentiment-analysis_deepsentipers_recall_modified":0.7661218172,"sentiment-analysis_deepsentipers_fscore_modified":0.7340307684,"sentiment-analysis_deepsentipers_acc":0.7550353838,"sentiment-analysis_deepsentipers_precision":0.7176389542,"sentiment-analysis_deepsentipers_recall":0.7690411709,"sentiment-analysis_deepsentipers_fscore":0.7368278372,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962039046,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7038834951,"sentiment-analysis_deepsentipers_precision_modified":0.6558175114,"sentiment-analysis_deepsentipers_recall_modified":0.7195323034,"sentiment-analysis_deepsentipers_fscore_modified":0.6634075099,"sentiment-analysis_deepsentipers_acc":0.7038834951,"sentiment-analysis_deepsentipers_precision":0.6558175114,"sentiment-analysis_deepsentipers_recall":0.7195323034,"sentiment-analysis_deepsentipers_fscore":0.6634075099,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7669902913,"sentiment-analysis_deepsentipers_precision_modified":0.7411642414,"sentiment-analysis_deepsentipers_recall_modified":0.7990679398,"sentiment-analysis_deepsentipers_fscore_modified":0.7346216275,"sentiment-analysis_deepsentipers_acc":0.7669902913,"sentiment-analysis_deepsentipers_precision":0.7411642414,"sentiment-analysis_deepsentipers_recall":0.7990679398,"sentiment-analysis_deepsentipers_fscore":0.7346216275,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6855447681,"sentiment-analysis_deepsentipers_precision_modified":0.6408552737,"sentiment-analysis_deepsentipers_recall_modified":0.7180772523,"sentiment-analysis_deepsentipers_fscore_modified":0.6446920024,"sentiment-analysis_deepsentipers_acc":0.6855447681,"sentiment-analysis_deepsentipers_precision":0.6408552737,"sentiment-analysis_deepsentipers_recall":0.7180772523,"sentiment-analysis_deepsentipers_fscore":0.6446920024,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5199568501,"sentiment-analysis_deepsentipers_precision_modified":0.4907692439,"sentiment-analysis_deepsentipers_recall_modified":0.5047701764,"sentiment-analysis_deepsentipers_fscore_modified":0.4457895794,"sentiment-analysis_deepsentipers_acc":0.571767497,"sentiment-analysis_deepsentipers_precision":0.5396715174,"sentiment-analysis_deepsentipers_recall":0.5550675605,"sentiment-analysis_deepsentipers_fscore":0.4902098934,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9093851133,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7642934196,"sentiment-analysis_deepsentipers_precision_modified":0.7287131406,"sentiment-analysis_deepsentipers_recall_modified":0.7801104156,"sentiment-analysis_deepsentipers_fscore_modified":0.7434018552,"sentiment-analysis_deepsentipers_acc":0.7951739618,"sentiment-analysis_deepsentipers_precision":0.7581560958,"sentiment-analysis_deepsentipers_recall":0.8116300284,"sentiment-analysis_deepsentipers_fscore":0.7734382938,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9611650485,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7324703344,"sentiment-analysis_deepsentipers_precision_modified":0.7023773257,"sentiment-analysis_deepsentipers_recall_modified":0.7824931708,"sentiment-analysis_deepsentipers_fscore_modified":0.6905624385,"sentiment-analysis_deepsentipers_acc":0.7324703344,"sentiment-analysis_deepsentipers_precision":0.7023773257,"sentiment-analysis_deepsentipers_recall":0.7824931708,"sentiment-analysis_deepsentipers_fscore":0.6905624385,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7869471413,"sentiment-analysis_deepsentipers_precision_modified":0.7486325068,"sentiment-analysis_deepsentipers_recall_modified":0.811119619,"sentiment-analysis_deepsentipers_fscore_modified":0.7669134988,"sentiment-analysis_deepsentipers_acc":0.7869471413,"sentiment-analysis_deepsentipers_precision":0.7486325068,"sentiment-analysis_deepsentipers_recall":0.811119619,"sentiment-analysis_deepsentipers_fscore":0.7669134988,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7700109372,"sentiment-analysis_deepsentipers_recall_modified":0.8303259501,"sentiment-analysis_deepsentipers_fscore_modified":0.7817187645,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7700109372,"sentiment-analysis_deepsentipers_recall":0.8303259501,"sentiment-analysis_deepsentipers_fscore":0.7817187645,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7692351798,"sentiment-analysis_deepsentipers_recall_modified":0.8125606487,"sentiment-analysis_deepsentipers_fscore_modified":0.7842327246,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7692351798,"sentiment-analysis_deepsentipers_recall":0.8125606487,"sentiment-analysis_deepsentipers_fscore":0.7842327246,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7740021575,"sentiment-analysis_deepsentipers_precision_modified":0.7295627073,"sentiment-analysis_deepsentipers_recall_modified":0.7969121455,"sentiment-analysis_deepsentipers_fscore_modified":0.7492633779,"sentiment-analysis_deepsentipers_acc":0.7744198597,"sentiment-analysis_deepsentipers_precision":0.729956427,"sentiment-analysis_deepsentipers_recall":0.7973422114,"sentiment-analysis_deepsentipers_fscore":0.7496677294,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7412673027,"sentiment-analysis_deepsentipers_recall_modified":0.7880284687,"sentiment-analysis_deepsentipers_fscore_modified":0.7263322065,"sentiment-analysis_deepsentipers_acc":0.7610571737,"sentiment-analysis_deepsentipers_precision":0.7412673027,"sentiment-analysis_deepsentipers_recall":0.7880284687,"sentiment-analysis_deepsentipers_fscore":0.7263322065,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7233450689,"sentiment-analysis_deepsentipers_recall_modified":0.7938691015,"sentiment-analysis_deepsentipers_fscore_modified":0.7265888673,"sentiment-analysis_deepsentipers_acc":0.7622906537,"sentiment-analysis_deepsentipers_precision":0.7245174272,"sentiment-analysis_deepsentipers_recall":0.7951557613,"sentiment-analysis_deepsentipers_fscore":0.727766483,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7448759439,"sentiment-analysis_deepsentipers_precision_modified":0.7197594162,"sentiment-analysis_deepsentipers_recall_modified":0.7773395601,"sentiment-analysis_deepsentipers_fscore_modified":0.7035572334,"sentiment-analysis_deepsentipers_acc":0.7448759439,"sentiment-analysis_deepsentipers_precision":0.7197594162,"sentiment-analysis_deepsentipers_recall":0.7773395601,"sentiment-analysis_deepsentipers_fscore":0.7035572334,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3737864078,"sentiment-analysis_deepsentipers_precision_modified":0.4303148768,"sentiment-analysis_deepsentipers_recall_modified":0.3053254234,"sentiment-analysis_deepsentipers_fscore_modified":0.2934454786,"sentiment-analysis_deepsentipers_acc":0.6209677419,"sentiment-analysis_deepsentipers_precision":0.7148779405,"sentiment-analysis_deepsentipers_recall":0.5072341711,"sentiment-analysis_deepsentipers_fscore":0.4874981338,"sentiment-analysis_deepsentipers_valid_output_ratio":0.6019417476,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7988133765,"sentiment-analysis_deepsentipers_precision_modified":0.7580375513,"sentiment-analysis_deepsentipers_recall_modified":0.8108044611,"sentiment-analysis_deepsentipers_fscore_modified":0.7757714496,"sentiment-analysis_deepsentipers_acc":0.7988133765,"sentiment-analysis_deepsentipers_precision":0.7580375513,"sentiment-analysis_deepsentipers_recall":0.8108044611,"sentiment-analysis_deepsentipers_fscore":0.7757714496,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3149946063,"sentiment-analysis_deepsentipers_precision_modified":0.6011059335,"sentiment-analysis_deepsentipers_recall_modified":0.4700288555,"sentiment-analysis_deepsentipers_fscore_modified":0.3135968578,"sentiment-analysis_deepsentipers_acc":0.3155051324,"sentiment-analysis_deepsentipers_precision":0.6020801732,"sentiment-analysis_deepsentipers_recall":0.4707906527,"sentiment-analysis_deepsentipers_fscore":0.3141051185,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0787486516,"sentiment-analysis_deepsentipers_precision_modified":0.1370950606,"sentiment-analysis_deepsentipers_recall_modified":0.1281381117,"sentiment-analysis_deepsentipers_fscore_modified":0.0722798642,"sentiment-analysis_deepsentipers_acc":0.2106782107,"sentiment-analysis_deepsentipers_precision":0.3667737986,"sentiment-analysis_deepsentipers_recall":0.3428110522,"sentiment-analysis_deepsentipers_fscore":0.1933721042,"sentiment-analysis_deepsentipers_valid_output_ratio":0.3737864078,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7939590076,"sentiment-analysis_deepsentipers_precision_modified":0.7547931347,"sentiment-analysis_deepsentipers_recall_modified":0.8064164986,"sentiment-analysis_deepsentipers_fscore_modified":0.7679289467,"sentiment-analysis_deepsentipers_acc":0.7939590076,"sentiment-analysis_deepsentipers_precision":0.7547931347,"sentiment-analysis_deepsentipers_recall":0.8064164986,"sentiment-analysis_deepsentipers_fscore":0.7679289467,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8058409951,"sentiment-analysis_deepsentipers_precision_modified":0.7717795715,"sentiment-analysis_deepsentipers_recall_modified":0.8211827366,"sentiment-analysis_deepsentipers_fscore_modified":0.7889064935,"sentiment-analysis_deepsentipers_acc":0.8062770563,"sentiment-analysis_deepsentipers_precision":0.7721972011,"sentiment-analysis_deepsentipers_recall":0.8216270995,"sentiment-analysis_deepsentipers_fscore":0.7893333909,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994591671,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6817691478,"sentiment-analysis_deepsentipers_precision_modified":0.6546744642,"sentiment-analysis_deepsentipers_recall_modified":0.7378694789,"sentiment-analysis_deepsentipers_fscore_modified":0.6356142977,"sentiment-analysis_deepsentipers_acc":0.6817691478,"sentiment-analysis_deepsentipers_precision":0.6546744642,"sentiment-analysis_deepsentipers_recall":0.7378694789,"sentiment-analysis_deepsentipers_fscore":0.6356142977,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.738403452,"sentiment-analysis_deepsentipers_precision_modified":0.706763853,"sentiment-analysis_deepsentipers_recall_modified":0.7658510846,"sentiment-analysis_deepsentipers_fscore_modified":0.726373242,"sentiment-analysis_deepsentipers_acc":0.738403452,"sentiment-analysis_deepsentipers_precision":0.706763853,"sentiment-analysis_deepsentipers_recall":0.7658510846,"sentiment-analysis_deepsentipers_fscore":0.726373242,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7518878101,"sentiment-analysis_deepsentipers_precision_modified":0.7333601788,"sentiment-analysis_deepsentipers_recall_modified":0.7850018857,"sentiment-analysis_deepsentipers_fscore_modified":0.7214827861,"sentiment-analysis_deepsentipers_acc":0.752293578,"sentiment-analysis_deepsentipers_precision":0.7337559479,"sentiment-analysis_deepsentipers_recall":0.785425524,"sentiment-analysis_deepsentipers_fscore":0.7218721454,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7661826532,"sentiment-analysis_deepsentipers_recall_modified":0.8089861144,"sentiment-analysis_deepsentipers_fscore_modified":0.7830417049,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7661826532,"sentiment-analysis_deepsentipers_recall":0.8089861144,"sentiment-analysis_deepsentipers_fscore":0.7830417049,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":null,"sentiment-analysis_deepsentipers_precision_modified":null,"sentiment-analysis_deepsentipers_recall_modified":null,"sentiment-analysis_deepsentipers_fscore_modified":null,"sentiment-analysis_deepsentipers_acc":null,"sentiment-analysis_deepsentipers_precision":null,"sentiment-analysis_deepsentipers_recall":null,"sentiment-analysis_deepsentipers_fscore":null,"sentiment-analysis_deepsentipers_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7664509169,"sentiment-analysis_deepsentipers_precision_modified":0.7235774595,"sentiment-analysis_deepsentipers_recall_modified":0.785720049,"sentiment-analysis_deepsentipers_fscore_modified":0.7440236575,"sentiment-analysis_deepsentipers_acc":0.766864544,"sentiment-analysis_deepsentipers_precision":0.7239679492,"sentiment-analysis_deepsentipers_recall":0.786144075,"sentiment-analysis_deepsentipers_fscore":0.7444251813,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7556634304,"sentiment-analysis_deepsentipers_precision_modified":0.7100962569,"sentiment-analysis_deepsentipers_recall_modified":0.796296032,"sentiment-analysis_deepsentipers_fscore_modified":0.7198160026,"sentiment-analysis_deepsentipers_acc":0.7556634304,"sentiment-analysis_deepsentipers_precision":0.7100962569,"sentiment-analysis_deepsentipers_recall":0.796296032,"sentiment-analysis_deepsentipers_fscore":0.7198160026,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7459546926,"sentiment-analysis_deepsentipers_precision_modified":0.696002467,"sentiment-analysis_deepsentipers_recall_modified":0.7725731976,"sentiment-analysis_deepsentipers_fscore_modified":0.7160207999,"sentiment-analysis_deepsentipers_acc":0.7459546926,"sentiment-analysis_deepsentipers_precision":0.696002467,"sentiment-analysis_deepsentipers_recall":0.7725731976,"sentiment-analysis_deepsentipers_fscore":0.7160207999,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7820927724,"sentiment-analysis_deepsentipers_precision_modified":0.7592820571,"sentiment-analysis_deepsentipers_recall_modified":0.7768252647,"sentiment-analysis_deepsentipers_fscore_modified":0.7562669975,"sentiment-analysis_deepsentipers_acc":0.7820927724,"sentiment-analysis_deepsentipers_precision":0.7592820571,"sentiment-analysis_deepsentipers_recall":0.7768252647,"sentiment-analysis_deepsentipers_fscore":0.7562669975,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7521691974,"sentiment-analysis_deepsentipers_precision_modified":0.7149147283,"sentiment-analysis_deepsentipers_recall_modified":0.7661218172,"sentiment-analysis_deepsentipers_fscore_modified":0.7340307684,"sentiment-analysis_deepsentipers_acc":0.7550353838,"sentiment-analysis_deepsentipers_precision":0.7176389542,"sentiment-analysis_deepsentipers_recall":0.7690411709,"sentiment-analysis_deepsentipers_fscore":0.7368278372,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9962039046,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5949298813,"sentiment-analysis_deepsentipers_precision_modified":0.6633283768,"sentiment-analysis_deepsentipers_recall_modified":0.6739827327,"sentiment-analysis_deepsentipers_fscore_modified":0.597099001,"sentiment-analysis_deepsentipers_acc":0.5958941113,"sentiment-analysis_deepsentipers_precision":0.6644034633,"sentiment-analysis_deepsentipers_recall":0.6750750872,"sentiment-analysis_deepsentipers_fscore":0.5980667466,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7038834951,"sentiment-analysis_deepsentipers_precision_modified":0.6558175114,"sentiment-analysis_deepsentipers_recall_modified":0.7195323034,"sentiment-analysis_deepsentipers_fscore_modified":0.6634075099,"sentiment-analysis_deepsentipers_acc":0.7038834951,"sentiment-analysis_deepsentipers_precision":0.6558175114,"sentiment-analysis_deepsentipers_recall":0.7195323034,"sentiment-analysis_deepsentipers_fscore":0.6634075099,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7669902913,"sentiment-analysis_deepsentipers_precision_modified":0.7411642414,"sentiment-analysis_deepsentipers_recall_modified":0.7990679398,"sentiment-analysis_deepsentipers_fscore_modified":0.7346216275,"sentiment-analysis_deepsentipers_acc":0.7669902913,"sentiment-analysis_deepsentipers_precision":0.7411642414,"sentiment-analysis_deepsentipers_recall":0.7990679398,"sentiment-analysis_deepsentipers_fscore":0.7346216275,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6855447681,"sentiment-analysis_deepsentipers_precision_modified":0.6408552737,"sentiment-analysis_deepsentipers_recall_modified":0.7180772523,"sentiment-analysis_deepsentipers_fscore_modified":0.6446920024,"sentiment-analysis_deepsentipers_acc":0.6855447681,"sentiment-analysis_deepsentipers_precision":0.6408552737,"sentiment-analysis_deepsentipers_recall":0.7180772523,"sentiment-analysis_deepsentipers_fscore":0.6446920024,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.5199568501,"sentiment-analysis_deepsentipers_precision_modified":0.4907692439,"sentiment-analysis_deepsentipers_recall_modified":0.5047701764,"sentiment-analysis_deepsentipers_fscore_modified":0.4457895794,"sentiment-analysis_deepsentipers_acc":0.571767497,"sentiment-analysis_deepsentipers_precision":0.5396715174,"sentiment-analysis_deepsentipers_recall":0.5550675605,"sentiment-analysis_deepsentipers_fscore":0.4902098934,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9093851133,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7642934196,"sentiment-analysis_deepsentipers_precision_modified":0.7287131406,"sentiment-analysis_deepsentipers_recall_modified":0.7801104156,"sentiment-analysis_deepsentipers_fscore_modified":0.7434018552,"sentiment-analysis_deepsentipers_acc":0.7951739618,"sentiment-analysis_deepsentipers_precision":0.7581560958,"sentiment-analysis_deepsentipers_recall":0.8116300284,"sentiment-analysis_deepsentipers_fscore":0.7734382938,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9611650485,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7324703344,"sentiment-analysis_deepsentipers_precision_modified":0.7023773257,"sentiment-analysis_deepsentipers_recall_modified":0.7824931708,"sentiment-analysis_deepsentipers_fscore_modified":0.6905624385,"sentiment-analysis_deepsentipers_acc":0.7324703344,"sentiment-analysis_deepsentipers_precision":0.7023773257,"sentiment-analysis_deepsentipers_recall":0.7824931708,"sentiment-analysis_deepsentipers_fscore":0.6905624385,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7869471413,"sentiment-analysis_deepsentipers_precision_modified":0.7486325068,"sentiment-analysis_deepsentipers_recall_modified":0.811119619,"sentiment-analysis_deepsentipers_fscore_modified":0.7669134988,"sentiment-analysis_deepsentipers_acc":0.7869471413,"sentiment-analysis_deepsentipers_precision":0.7486325068,"sentiment-analysis_deepsentipers_recall":0.811119619,"sentiment-analysis_deepsentipers_fscore":0.7669134988,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.806364617,"sentiment-analysis_deepsentipers_precision_modified":0.7700109372,"sentiment-analysis_deepsentipers_recall_modified":0.8303259501,"sentiment-analysis_deepsentipers_fscore_modified":0.7817187645,"sentiment-analysis_deepsentipers_acc":0.806364617,"sentiment-analysis_deepsentipers_precision":0.7700109372,"sentiment-analysis_deepsentipers_recall":0.8303259501,"sentiment-analysis_deepsentipers_fscore":0.7817187645,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7713052859,"sentiment-analysis_deepsentipers_precision_modified":0.7288724929,"sentiment-analysis_deepsentipers_recall_modified":0.803675275,"sentiment-analysis_deepsentipers_fscore_modified":0.753174206,"sentiment-analysis_deepsentipers_acc":0.7713052859,"sentiment-analysis_deepsentipers_precision":0.7288724929,"sentiment-analysis_deepsentipers_recall":0.803675275,"sentiment-analysis_deepsentipers_fscore":0.753174206,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.8047464941,"sentiment-analysis_deepsentipers_precision_modified":0.7692351798,"sentiment-analysis_deepsentipers_recall_modified":0.8125606487,"sentiment-analysis_deepsentipers_fscore_modified":0.7842327246,"sentiment-analysis_deepsentipers_acc":0.8047464941,"sentiment-analysis_deepsentipers_precision":0.7692351798,"sentiment-analysis_deepsentipers_recall":0.8125606487,"sentiment-analysis_deepsentipers_fscore":0.7842327246,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7740021575,"sentiment-analysis_deepsentipers_precision_modified":0.7295627073,"sentiment-analysis_deepsentipers_recall_modified":0.7969121455,"sentiment-analysis_deepsentipers_fscore_modified":0.7492633779,"sentiment-analysis_deepsentipers_acc":0.7744198597,"sentiment-analysis_deepsentipers_precision":0.729956427,"sentiment-analysis_deepsentipers_recall":0.7973422114,"sentiment-analysis_deepsentipers_fscore":0.7496677294,"sentiment-analysis_deepsentipers_valid_output_ratio":0.9994606257,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7412673027,"sentiment-analysis_deepsentipers_recall_modified":0.7880284687,"sentiment-analysis_deepsentipers_fscore_modified":0.7263322065,"sentiment-analysis_deepsentipers_acc":0.7610571737,"sentiment-analysis_deepsentipers_precision":0.7412673027,"sentiment-analysis_deepsentipers_recall":0.7880284687,"sentiment-analysis_deepsentipers_fscore":0.7263322065,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7610571737,"sentiment-analysis_deepsentipers_precision_modified":0.7233450689,"sentiment-analysis_deepsentipers_recall_modified":0.7938691015,"sentiment-analysis_deepsentipers_fscore_modified":0.7265888673,"sentiment-analysis_deepsentipers_acc":0.7622906537,"sentiment-analysis_deepsentipers_precision":0.7245174272,"sentiment-analysis_deepsentipers_recall":0.7951557613,"sentiment-analysis_deepsentipers_fscore":0.727766483,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6154261057,"sentiment-analysis_deepsentipers_precision_modified":0.6519864557,"sentiment-analysis_deepsentipers_recall_modified":0.6762525877,"sentiment-analysis_deepsentipers_fscore_modified":0.5290317996,"sentiment-analysis_deepsentipers_acc":0.6154261057,"sentiment-analysis_deepsentipers_precision":0.6519864557,"sentiment-analysis_deepsentipers_recall":0.6762525877,"sentiment-analysis_deepsentipers_fscore":0.5290317996,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7448759439,"sentiment-analysis_deepsentipers_precision_modified":0.7197594162,"sentiment-analysis_deepsentipers_recall_modified":0.7773395601,"sentiment-analysis_deepsentipers_fscore_modified":0.7035572334,"sentiment-analysis_deepsentipers_acc":0.7448759439,"sentiment-analysis_deepsentipers_precision":0.7197594162,"sentiment-analysis_deepsentipers_recall":0.7773395601,"sentiment-analysis_deepsentipers_fscore":0.7035572334,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7411003236,"sentiment-analysis_deepsentipers_precision_modified":0.7010084925,"sentiment-analysis_deepsentipers_recall_modified":0.7529009939,"sentiment-analysis_deepsentipers_fscore_modified":0.7172781226,"sentiment-analysis_deepsentipers_acc":0.7411003236,"sentiment-analysis_deepsentipers_precision":0.7010084925,"sentiment-analysis_deepsentipers_recall":0.7529009939,"sentiment-analysis_deepsentipers_fscore":0.7172781226,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3737864078,"sentiment-analysis_deepsentipers_precision_modified":0.4303148768,"sentiment-analysis_deepsentipers_recall_modified":0.3053254234,"sentiment-analysis_deepsentipers_fscore_modified":0.2934454786,"sentiment-analysis_deepsentipers_acc":0.6209677419,"sentiment-analysis_deepsentipers_precision":0.7148779405,"sentiment-analysis_deepsentipers_recall":0.5072341711,"sentiment-analysis_deepsentipers_fscore":0.4874981338,"sentiment-analysis_deepsentipers_valid_output_ratio":0.6019417476,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7988133765,"sentiment-analysis_deepsentipers_precision_modified":0.7580375513,"sentiment-analysis_deepsentipers_recall_modified":0.8108044611,"sentiment-analysis_deepsentipers_fscore_modified":0.7757714496,"sentiment-analysis_deepsentipers_acc":0.7988133765,"sentiment-analysis_deepsentipers_precision":0.7580375513,"sentiment-analysis_deepsentipers_recall":0.8108044611,"sentiment-analysis_deepsentipers_fscore":0.7757714496,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.6278317152,"sentiment-analysis_deepsentipers_precision_modified":0.5954545705,"sentiment-analysis_deepsentipers_recall_modified":0.6239967818,"sentiment-analysis_deepsentipers_fscore_modified":0.6073033689,"sentiment-analysis_deepsentipers_acc":0.7288666249,"sentiment-analysis_deepsentipers_precision":0.691279132,"sentiment-analysis_deepsentipers_recall":0.7244145482,"sentiment-analysis_deepsentipers_fscore":0.7050347188,"sentiment-analysis_deepsentipers_valid_output_ratio":0.8613807983,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.7502696872,"sentiment-analysis_deepsentipers_precision_modified":0.7162099301,"sentiment-analysis_deepsentipers_recall_modified":0.7649004728,"sentiment-analysis_deepsentipers_fscore_modified":0.716460892,"sentiment-analysis_deepsentipers_acc":0.7502696872,"sentiment-analysis_deepsentipers_precision":0.7162099301,"sentiment-analysis_deepsentipers_recall":0.7649004728,"sentiment-analysis_deepsentipers_fscore":0.716460892,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.3149946063,"sentiment-analysis_deepsentipers_precision_modified":0.6011059335,"sentiment-analysis_deepsentipers_recall_modified":0.4700288555,"sentiment-analysis_deepsentipers_fscore_modified":0.3135968578,"sentiment-analysis_deepsentipers_acc":0.3155051324,"sentiment-analysis_deepsentipers_precision":0.6020801732,"sentiment-analysis_deepsentipers_recall":0.4707906527,"sentiment-analysis_deepsentipers_fscore":0.3141051185,"sentiment-analysis_deepsentipers_valid_output_ratio":0.998381877,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sentiment-analysis_deepsentipers_acc_modified":0.0787486516,"sentiment-analysis_deepsentipers_precision_modified":0.1370950606,"sentiment-analysis_deepsentipers_recall_modified":0.1281381117,"sentiment-analysis_deepsentipers_fscore_modified":0.0722798642,"sentiment-analysis_deepsentipers_acc":0.2106782107,"sentiment-analysis_deepsentipers_precision":0.3667737986,"sentiment-analysis_deepsentipers_recall":0.3428110522,"sentiment-analysis_deepsentipers_fscore":0.1933721042,"sentiment-analysis_deepsentipers_valid_output_ratio":0.3737864078,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sentiment-analysis_deepsentipers_acc_modified":0.7939590076,"sentiment-analysis_deepsentipers_precision_modified":0.7547931347,"sentiment-analysis_deepsentipers_recall_modified":0.8064164986,"sentiment-analysis_deepsentipers_fscore_modified":0.7679289467,"sentiment-analysis_deepsentipers_acc":0.7939590076,"sentiment-analysis_deepsentipers_precision":0.7547931347,"sentiment-analysis_deepsentipers_recall":0.8064164986,"sentiment-analysis_deepsentipers_fscore":0.7679289467,"sentiment-analysis_deepsentipers_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/sts_FarSICK.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8020636156,"sts_FarSICK_corrcoef":0.8020636156,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8663758584,"sts_FarSICK_corrcoef":0.8663758584,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.058,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8367188896,"sts_FarSICK_corrcoef":0.8367188896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8461251715,"sts_FarSICK_corrcoef":0.8461251715,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8357730413,"sts_FarSICK_corrcoef":0.8357730413,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8606070195,"sts_FarSICK_corrcoef":0.8606070195,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8471466571,"sts_FarSICK_corrcoef":0.8471466571,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8332013424,"sts_FarSICK_corrcoef":0.8332013424,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8602460091,"sts_FarSICK_corrcoef":0.8602460091,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8624442565,"sts_FarSICK_corrcoef":0.8624442565,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":null,"sts_FarSICK_corrcoef":null,"sts_FarSICK_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8767598269,"sts_FarSICK_corrcoef":0.8767598269,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8796836219,"sts_FarSICK_corrcoef":0.8796836219,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8509006434,"sts_FarSICK_corrcoef":0.8509006434,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8550824218,"sts_FarSICK_corrcoef":0.8550824218,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.2533632205,"sts_FarSICK_corrcoef":0.8617796616,"sts_FarSICK_valid_output_ratio":0.294,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8346099969,"sts_FarSICK_corrcoef":0.8346099969,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8020636156,"sts_FarSICK_corrcoef":0.8020636156,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8641781993,"sts_FarSICK_corrcoef":0.8641781993,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8363152655,"sts_FarSICK_corrcoef":0.8430597434,"sts_FarSICK_valid_output_ratio":0.992,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.6678492429,"sts_FarSICK_corrcoef":0.6913553239,"sts_FarSICK_valid_output_ratio":0.966,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8274969834,"sts_FarSICK_corrcoef":0.8274969834,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.86471356,"sts_FarSICK_corrcoef":0.86471356,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8663758584,"sts_FarSICK_corrcoef":0.8663758584,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8671704383,"sts_FarSICK_corrcoef":0.8671704383,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.058,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8492628764,"sts_FarSICK_corrcoef":0.8492628764,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8367188896,"sts_FarSICK_corrcoef":0.8367188896,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8461251715,"sts_FarSICK_corrcoef":0.8461251715,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8375953381,"sts_FarSICK_corrcoef":0.8375953381,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8720703866,"sts_FarSICK_corrcoef":0.8720703866,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8497629768,"sts_FarSICK_corrcoef":0.8497629768,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.5531047251,"sts_FarSICK_corrcoef":0.8039312865,"sts_FarSICK_valid_output_ratio":0.688,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8612153956,"sts_FarSICK_corrcoef":0.8612153956,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8547994421,"sts_FarSICK_corrcoef":0.8547994421,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.8643540763,"sts_FarSICK_corrcoef":0.8643540763,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_FarSICK_corrcoef_modified":0.0,"sts_FarSICK_corrcoef":0.0,"sts_FarSICK_valid_output_ratio":0.018,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_FarSICK_corrcoef_modified":0.8357730413,"sts_FarSICK_corrcoef":0.8357730413,"sts_FarSICK_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/sts_SynPerSTS.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9639002346,"sts_SynPerSTS_corrcoef":0.9639002346,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.950218242,"sts_SynPerSTS_corrcoef":0.950218242,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9583160478,"sts_SynPerSTS_corrcoef":0.9583160478,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.961773092,"sts_SynPerSTS_corrcoef":0.961773092,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.3217761614,"sts_SynPerSTS_corrcoef":0.8791698399,"sts_SynPerSTS_valid_output_ratio":0.366,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.8942713775,"sts_SynPerSTS_corrcoef":0.8942713775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9550693345,"sts_SynPerSTS_corrcoef":0.9550693345,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9175164452,"sts_SynPerSTS_corrcoef":0.9324354118,"sts_SynPerSTS_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.7233454448,"sts_SynPerSTS_corrcoef":0.8037171609,"sts_SynPerSTS_valid_output_ratio":0.9,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9409955022,"sts_SynPerSTS_corrcoef":0.9409955022,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9622305784,"sts_SynPerSTS_corrcoef":0.9622305784,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9509371488,"sts_SynPerSTS_corrcoef":0.9509371488,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.1337524795,"sts_SynPerSTS_corrcoef":0.8057378284,"sts_SynPerSTS_valid_output_ratio":0.166,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9665566215,"sts_SynPerSTS_corrcoef":0.9665566215,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9401486573,"sts_SynPerSTS_corrcoef":0.9401486573,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9551219626,"sts_SynPerSTS_corrcoef":0.9551219626,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.930027057,"sts_SynPerSTS_corrcoef":0.930027057,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.4432810096,"sts_SynPerSTS_corrcoef":0.7363471921,"sts_SynPerSTS_valid_output_ratio":0.602,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9586779662,"sts_SynPerSTS_corrcoef":0.9586779662,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.02,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9446265171,"sts_SynPerSTS_corrcoef":0.9446265171,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9639002346,"sts_SynPerSTS_corrcoef":0.9639002346,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9198771683,"sts_SynPerSTS_corrcoef":0.9198771683,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9445586775,"sts_SynPerSTS_corrcoef":0.9445586775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9587180795,"sts_SynPerSTS_corrcoef":0.9587180795,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9620104912,"sts_SynPerSTS_corrcoef":0.9620104912,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":null,"sts_SynPerSTS_corrcoef":null,"sts_SynPerSTS_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.950218242,"sts_SynPerSTS_corrcoef":0.950218242,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9583160478,"sts_SynPerSTS_corrcoef":0.9583160478,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9603899338,"sts_SynPerSTS_corrcoef":0.9603899338,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.961773092,"sts_SynPerSTS_corrcoef":0.961773092,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.3217761614,"sts_SynPerSTS_corrcoef":0.8791698399,"sts_SynPerSTS_valid_output_ratio":0.366,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9424987971,"sts_SynPerSTS_corrcoef":0.9424987971,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.8942713775,"sts_SynPerSTS_corrcoef":0.8942713775,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9550693345,"sts_SynPerSTS_corrcoef":0.9550693345,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9175164452,"sts_SynPerSTS_corrcoef":0.9324354118,"sts_SynPerSTS_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.7233454448,"sts_SynPerSTS_corrcoef":0.8037171609,"sts_SynPerSTS_valid_output_ratio":0.9,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9409955022,"sts_SynPerSTS_corrcoef":0.9409955022,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9622305784,"sts_SynPerSTS_corrcoef":0.9622305784,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9509371488,"sts_SynPerSTS_corrcoef":0.9509371488,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9590342543,"sts_SynPerSTS_corrcoef":0.9590342543,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.1337524795,"sts_SynPerSTS_corrcoef":0.8057378284,"sts_SynPerSTS_valid_output_ratio":0.166,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9665566215,"sts_SynPerSTS_corrcoef":0.9665566215,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9401486573,"sts_SynPerSTS_corrcoef":0.9401486573,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9551219626,"sts_SynPerSTS_corrcoef":0.9551219626,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9009001164,"sts_SynPerSTS_corrcoef":0.9009001164,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.930027057,"sts_SynPerSTS_corrcoef":0.930027057,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9555087155,"sts_SynPerSTS_corrcoef":0.9555087155,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.4432810096,"sts_SynPerSTS_corrcoef":0.7363471921,"sts_SynPerSTS_valid_output_ratio":0.602,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9586779662,"sts_SynPerSTS_corrcoef":0.9586779662,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9581074422,"sts_SynPerSTS_corrcoef":0.9581074422,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.9421493238,"sts_SynPerSTS_corrcoef":0.9421493238,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","sts_SynPerSTS_corrcoef_modified":0.0,"sts_SynPerSTS_corrcoef":0.0,"sts_SynPerSTS_valid_output_ratio":0.02,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","sts_SynPerSTS_corrcoef_modified":0.9446265171,"sts_SynPerSTS_corrcoef":0.9446265171,"sts_SynPerSTS_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/summarization_PnSummary.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0950296419,"summarization_PnSummary_rougeL_recall":0.3412128078,"summarization_PnSummary_rougeL_f1_score":0.1438085772,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1116612356,"summarization_PnSummary_rougeL_recall":0.420922163,"summarization_PnSummary_rougeL_f1_score":0.1723099731,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1191404443,"summarization_PnSummary_rougeL_recall":0.365434541,"summarization_PnSummary_rougeL_f1_score":0.1744092468,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1091020199,"summarization_PnSummary_rougeL_recall":0.3487472682,"summarization_PnSummary_rougeL_f1_score":0.1614333679,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1188323392,"summarization_PnSummary_rougeL_recall":0.3948447809,"summarization_PnSummary_rougeL_f1_score":0.1786530476,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1095844839,"summarization_PnSummary_rougeL_recall":0.3735331299,"summarization_PnSummary_rougeL_f1_score":0.1645385252,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1227039295,"summarization_PnSummary_rougeL_recall":0.4315497639,"summarization_PnSummary_rougeL_f1_score":0.1856517383,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1072486168,"summarization_PnSummary_rougeL_recall":0.3966587345,"summarization_PnSummary_rougeL_f1_score":0.1627029568,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1234743619,"summarization_PnSummary_rougeL_recall":0.376111826,"summarization_PnSummary_rougeL_f1_score":0.1808600563,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":null,"summarization_PnSummary_rougeL_recall":null,"summarization_PnSummary_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1156871575,"summarization_PnSummary_rougeL_recall":0.3630716995,"summarization_PnSummary_rougeL_f1_score":0.1697348346,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1124574222,"summarization_PnSummary_rougeL_recall":0.3717393409,"summarization_PnSummary_rougeL_f1_score":0.1673025553,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1123870374,"summarization_PnSummary_rougeL_recall":0.4032007327,"summarization_PnSummary_rougeL_f1_score":0.17115848,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1089978489,"summarization_PnSummary_rougeL_recall":0.3936021933,"summarization_PnSummary_rougeL_f1_score":0.1662525669,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.132916538,"summarization_PnSummary_rougeL_recall":0.3579358655,"summarization_PnSummary_rougeL_f1_score":0.1887379797,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1029257145,"summarization_PnSummary_rougeL_recall":0.4347811424,"summarization_PnSummary_rougeL_f1_score":0.1621438757,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0833149314,"summarization_PnSummary_rougeL_recall":0.4027758903,"summarization_PnSummary_rougeL_f1_score":0.1338404051,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0811186656,"summarization_PnSummary_rougeL_recall":0.3940089293,"summarization_PnSummary_rougeL_f1_score":0.1316106196,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1067208324,"summarization_PnSummary_rougeL_recall":0.4109136551,"summarization_PnSummary_rougeL_f1_score":0.1648475797,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.111370713,"summarization_PnSummary_rougeL_recall":0.3732014316,"summarization_PnSummary_rougeL_f1_score":0.1661125342,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1094933227,"summarization_PnSummary_rougeL_recall":0.3979476995,"summarization_PnSummary_rougeL_f1_score":0.1674664883,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2237919051,"summarization_PnSummary_rougeL_recall":0.3532978852,"summarization_PnSummary_rougeL_f1_score":0.2484855426,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1156493376,"summarization_PnSummary_rougeL_recall":0.403347998,"summarization_PnSummary_rougeL_f1_score":0.1750055649,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.0950296419,"summarization_PnSummary_rougeL_recall":0.3412128078,"summarization_PnSummary_rougeL_f1_score":0.1438085772,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1160048958,"summarization_PnSummary_rougeL_recall":0.3980422927,"summarization_PnSummary_rougeL_f1_score":0.1751797476,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1116612356,"summarization_PnSummary_rougeL_recall":0.420922163,"summarization_PnSummary_rougeL_f1_score":0.1723099731,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1228424623,"summarization_PnSummary_rougeL_recall":0.3750771332,"summarization_PnSummary_rougeL_f1_score":0.1793201723,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1070140366,"summarization_PnSummary_rougeL_recall":0.4357356292,"summarization_PnSummary_rougeL_f1_score":0.1672508999,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1191404443,"summarization_PnSummary_rougeL_recall":0.365434541,"summarization_PnSummary_rougeL_f1_score":0.1744092468,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1298447221,"summarization_PnSummary_rougeL_recall":0.3548911672,"summarization_PnSummary_rougeL_f1_score":0.1841564462,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1120916238,"summarization_PnSummary_rougeL_recall":0.3610411286,"summarization_PnSummary_rougeL_f1_score":0.1660826543,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.109255822,"summarization_PnSummary_rougeL_recall":0.3979273385,"summarization_PnSummary_rougeL_f1_score":0.1669061111,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0921640152,"summarization_PnSummary_rougeL_recall":0.4401953868,"summarization_PnSummary_rougeL_f1_score":0.1480945013,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1134979628,"summarization_PnSummary_rougeL_recall":0.3909794734,"summarization_PnSummary_rougeL_f1_score":0.1716841943,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.0849469928,"summarization_PnSummary_rougeL_recall":0.3820724231,"summarization_PnSummary_rougeL_f1_score":0.1359575611,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.1212751301,"summarization_PnSummary_rougeL_recall":0.3923323141,"summarization_PnSummary_rougeL_f1_score":0.1804727387,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.151465786,"summarization_PnSummary_rougeL_recall":0.3775823327,"summarization_PnSummary_rougeL_f1_score":0.203395452,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_PnSummary_rougeL_precision":0.2061378815,"summarization_PnSummary_rougeL_recall":0.2755376589,"summarization_PnSummary_rougeL_f1_score":0.2192316506,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_PnSummary_rougeL_precision":0.1091020199,"summarization_PnSummary_rougeL_recall":0.3487472682,"summarization_PnSummary_rougeL_f1_score":0.1614333679,"nlg_score":0.1151518212}
|
leaderboard/boards_data/summarization_SamSUM-fa.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1495202929,"summarization_SamSUM-fa_rougeL_recall":0.3342802415,"summarization_SamSUM-fa_rougeL_f1_score":0.1977642173,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.141334615,"summarization_SamSUM-fa_rougeL_recall":0.4016833546,"summarization_SamSUM-fa_rougeL_f1_score":0.2005260444,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1810410242,"summarization_SamSUM-fa_rougeL_recall":0.4016180552,"summarization_SamSUM-fa_rougeL_f1_score":0.2380560527,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1579878887,"summarization_SamSUM-fa_rougeL_recall":0.3549924347,"summarization_SamSUM-fa_rougeL_f1_score":0.2083528945,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1538512444,"summarization_SamSUM-fa_rougeL_recall":0.3849531288,"summarization_SamSUM-fa_rougeL_f1_score":0.2115502707,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1638274716,"summarization_SamSUM-fa_rougeL_recall":0.3535878882,"summarization_SamSUM-fa_rougeL_f1_score":0.2134854664,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1591262985,"summarization_SamSUM-fa_rougeL_recall":0.4163090512,"summarization_SamSUM-fa_rougeL_f1_score":0.2208876443,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1468362777,"summarization_SamSUM-fa_rougeL_recall":0.3858089513,"summarization_SamSUM-fa_rougeL_f1_score":0.2041300257,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1681357159,"summarization_SamSUM-fa_rougeL_recall":0.3567938895,"summarization_SamSUM-fa_rougeL_f1_score":0.2189693454,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":null,"summarization_SamSUM-fa_rougeL_recall":null,"summarization_SamSUM-fa_rougeL_f1_score":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1658145118,"summarization_SamSUM-fa_rougeL_recall":0.3677760479,"summarization_SamSUM-fa_rougeL_f1_score":0.2189237562,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1588367988,"summarization_SamSUM-fa_rougeL_recall":0.3735722635,"summarization_SamSUM-fa_rougeL_f1_score":0.2131671502,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1469468837,"summarization_SamSUM-fa_rougeL_recall":0.3743807014,"summarization_SamSUM-fa_rougeL_f1_score":0.2022859929,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1514618398,"summarization_SamSUM-fa_rougeL_recall":0.3683020708,"summarization_SamSUM-fa_rougeL_f1_score":0.2063212948,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1772724525,"summarization_SamSUM-fa_rougeL_recall":0.341583677,"summarization_SamSUM-fa_rougeL_f1_score":0.2233271064,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.147286408,"summarization_SamSUM-fa_rougeL_recall":0.4066657958,"summarization_SamSUM-fa_rougeL_f1_score":0.2072278176,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1137219495,"summarization_SamSUM-fa_rougeL_recall":0.3496708707,"summarization_SamSUM-fa_rougeL_f1_score":0.1628971148,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.0893319419,"summarization_SamSUM-fa_rougeL_recall":0.3701712252,"summarization_SamSUM-fa_rougeL_f1_score":0.1392333016,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1429609514,"summarization_SamSUM-fa_rougeL_recall":0.397717388,"summarization_SamSUM-fa_rougeL_f1_score":0.2013136641,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1340334866,"summarization_SamSUM-fa_rougeL_recall":0.3184206946,"summarization_SamSUM-fa_rougeL_f1_score":0.179098961,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1338082958,"summarization_SamSUM-fa_rougeL_recall":0.397938928,"summarization_SamSUM-fa_rougeL_f1_score":0.1933390916,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1632163927,"summarization_SamSUM-fa_rougeL_recall":0.387510969,"summarization_SamSUM-fa_rougeL_f1_score":0.2157634129,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.171454009,"summarization_SamSUM-fa_rougeL_recall":0.3692597258,"summarization_SamSUM-fa_rougeL_f1_score":0.2248722593,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1495202929,"summarization_SamSUM-fa_rougeL_recall":0.3342802415,"summarization_SamSUM-fa_rougeL_f1_score":0.1977642173,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1578034675,"summarization_SamSUM-fa_rougeL_recall":0.3902121243,"summarization_SamSUM-fa_rougeL_f1_score":0.2156396673,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.141334615,"summarization_SamSUM-fa_rougeL_recall":0.4016833546,"summarization_SamSUM-fa_rougeL_f1_score":0.2005260444,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1808561992,"summarization_SamSUM-fa_rougeL_recall":0.414509553,"summarization_SamSUM-fa_rougeL_f1_score":0.2406998552,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1525978605,"summarization_SamSUM-fa_rougeL_recall":0.3945587249,"summarization_SamSUM-fa_rougeL_f1_score":0.209852471,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1810410242,"summarization_SamSUM-fa_rougeL_recall":0.4016180552,"summarization_SamSUM-fa_rougeL_f1_score":0.2380560527,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1944265929,"summarization_SamSUM-fa_rougeL_recall":0.3761499249,"summarization_SamSUM-fa_rougeL_f1_score":0.242617187,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.16175156,"summarization_SamSUM-fa_rougeL_recall":0.3477483743,"summarization_SamSUM-fa_rougeL_f1_score":0.209834706,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1463365551,"summarization_SamSUM-fa_rougeL_recall":0.3856017289,"summarization_SamSUM-fa_rougeL_f1_score":0.2024070197,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1180795687,"summarization_SamSUM-fa_rougeL_recall":0.3922712004,"summarization_SamSUM-fa_rougeL_f1_score":0.170765794,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.165108522,"summarization_SamSUM-fa_rougeL_recall":0.3982318891,"summarization_SamSUM-fa_rougeL_f1_score":0.2240082992,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1081719649,"summarization_SamSUM-fa_rougeL_recall":0.3726803698,"summarization_SamSUM-fa_rougeL_f1_score":0.1606804283,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1523824175,"summarization_SamSUM-fa_rougeL_recall":0.3838683519,"summarization_SamSUM-fa_rougeL_f1_score":0.2083553767,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1385750553,"summarization_SamSUM-fa_rougeL_recall":0.3133561002,"summarization_SamSUM-fa_rougeL_f1_score":0.1819150852,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","summarization_SamSUM-fa_rougeL_precision":0.1565749742,"summarization_SamSUM-fa_rougeL_recall":0.2642298658,"summarization_SamSUM-fa_rougeL_f1_score":0.1759907012,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","summarization_SamSUM-fa_rougeL_precision":0.1579878887,"summarization_SamSUM-fa_rougeL_recall":0.3549924347,"summarization_SamSUM-fa_rougeL_f1_score":0.2083528945,"nlg_score":0.1151518212}
|
leaderboard/boards_data/tone-classification_SynTone.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.8180456965,"tone-classification_SynTone_recall_modified":0.5977640757,"tone-classification_SynTone_fscore_modified":0.6364434216,"tone-classification_SynTone_acc":0.8859060403,"tone-classification_SynTone_precision":0.8839285714,"tone-classification_SynTone_recall":0.6459061489,"tone-classification_SynTone_fscore":0.68770061,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7005172378,"tone-classification_SynTone_recall_modified":0.6637288786,"tone-classification_SynTone_fscore_modified":0.6775611485,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7005172378,"tone-classification_SynTone_recall":0.6637288786,"tone-classification_SynTone_fscore":0.6775611485,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7888198758,"tone-classification_SynTone_precision_modified":0.6529418051,"tone-classification_SynTone_recall_modified":0.7187467998,"tone-classification_SynTone_fscore_modified":0.6745690521,"tone-classification_SynTone_acc":0.7888198758,"tone-classification_SynTone_precision":0.6529418051,"tone-classification_SynTone_recall":0.7187467998,"tone-classification_SynTone_fscore":0.6745690521,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8074534161,"tone-classification_SynTone_precision_modified":0.6799424424,"tone-classification_SynTone_recall_modified":0.6338304318,"tone-classification_SynTone_fscore_modified":0.6490392995,"tone-classification_SynTone_acc":0.8074534161,"tone-classification_SynTone_precision":0.6799424424,"tone-classification_SynTone_recall":0.6338304318,"tone-classification_SynTone_fscore":0.6490392995,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.1552795031,"tone-classification_SynTone_precision_modified":0.1130928528,"tone-classification_SynTone_recall_modified":0.2155401891,"tone-classification_SynTone_fscore_modified":0.1465713376,"tone-classification_SynTone_acc":0.3424657534,"tone-classification_SynTone_precision":0.2494239631,"tone-classification_SynTone_recall":0.4753694581,"tone-classification_SynTone_fscore":0.3232600733,"tone-classification_SynTone_valid_output_ratio":0.4534161491,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7339379455,"tone-classification_SynTone_recall_modified":0.6971283495,"tone-classification_SynTone_fscore_modified":0.7109712868,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7339379455,"tone-classification_SynTone_recall":0.6971283495,"tone-classification_SynTone_fscore":0.7109712868,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.541342126,"tone-classification_SynTone_recall_modified":0.5629697742,"tone-classification_SynTone_fscore_modified":0.5384255059,"tone-classification_SynTone_acc":0.6666666667,"tone-classification_SynTone_precision":0.5481514609,"tone-classification_SynTone_recall":0.570051155,"tone-classification_SynTone_fscore":0.5451981537,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0248447205,"tone-classification_SynTone_precision_modified":0.0258799172,"tone-classification_SynTone_recall_modified":0.0266193434,"tone-classification_SynTone_fscore_modified":0.0174833218,"tone-classification_SynTone_acc":0.4,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.4285714286,"tone-classification_SynTone_fscore":0.2814814815,"tone-classification_SynTone_valid_output_ratio":0.0621118012,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6832298137,"tone-classification_SynTone_precision_modified":0.6414593698,"tone-classification_SynTone_recall_modified":0.7386456733,"tone-classification_SynTone_fscore_modified":0.6520765046,"tone-classification_SynTone_acc":0.6832298137,"tone-classification_SynTone_precision":0.6414593698,"tone-classification_SynTone_recall":0.7386456733,"tone-classification_SynTone_fscore":0.6520765046,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.7790862291,"tone-classification_SynTone_recall_modified":0.747838795,"tone-classification_SynTone_fscore_modified":0.7624467793,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.7790862291,"tone-classification_SynTone_recall":0.747838795,"tone-classification_SynTone_fscore":0.7624467793,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7743534483,"tone-classification_SynTone_recall_modified":0.6258725892,"tone-classification_SynTone_fscore_modified":0.6586899664,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7743534483,"tone-classification_SynTone_recall":0.6258725892,"tone-classification_SynTone_fscore":0.6586899664,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6335403727,"tone-classification_SynTone_precision_modified":0.5112987647,"tone-classification_SynTone_recall_modified":0.4686215462,"tone-classification_SynTone_fscore_modified":0.4864069611,"tone-classification_SynTone_acc":0.8947368421,"tone-classification_SynTone_precision":0.7220973783,"tone-classification_SynTone_recall":0.6618251661,"tone-classification_SynTone_fscore":0.6869431644,"tone-classification_SynTone_valid_output_ratio":0.7080745342,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6667611945,"tone-classification_SynTone_recall_modified":0.6423579109,"tone-classification_SynTone_fscore_modified":0.6405012061,"tone-classification_SynTone_acc":0.7763975155,"tone-classification_SynTone_precision":0.6667611945,"tone-classification_SynTone_recall":0.6423579109,"tone-classification_SynTone_fscore":0.6405012061,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7233493732,"tone-classification_SynTone_recall_modified":0.6079611555,"tone-classification_SynTone_fscore_modified":0.6452957705,"tone-classification_SynTone_acc":0.8616352201,"tone-classification_SynTone_precision":0.7324481074,"tone-classification_SynTone_recall":0.6156084656,"tone-classification_SynTone_fscore":0.6534126984,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7210630279,"tone-classification_SynTone_recall_modified":0.6034242192,"tone-classification_SynTone_fscore_modified":0.6394308021,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7210630279,"tone-classification_SynTone_recall":0.6034242192,"tone-classification_SynTone_fscore":0.6394308021,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8322981366,"tone-classification_SynTone_precision_modified":0.7291450859,"tone-classification_SynTone_recall_modified":0.5849377027,"tone-classification_SynTone_fscore_modified":0.6077296942,"tone-classification_SynTone_acc":0.8322981366,"tone-classification_SynTone_precision":0.7291450859,"tone-classification_SynTone_recall":0.5849377027,"tone-classification_SynTone_fscore":0.6077296942,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.2919254658,"tone-classification_SynTone_precision_modified":0.193413297,"tone-classification_SynTone_recall_modified":0.2361166008,"tone-classification_SynTone_fscore_modified":0.1873840673,"tone-classification_SynTone_acc":0.5802469136,"tone-classification_SynTone_precision":0.3844387755,"tone-classification_SynTone_recall":0.4693181818,"tone-classification_SynTone_fscore":0.3724547511,"tone-classification_SynTone_valid_output_ratio":0.5031055901,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9068322981,"tone-classification_SynTone_precision_modified":0.8215992694,"tone-classification_SynTone_recall_modified":0.7311721283,"tone-classification_SynTone_fscore_modified":0.7679761027,"tone-classification_SynTone_acc":0.9068322981,"tone-classification_SynTone_precision":0.8215992694,"tone-classification_SynTone_recall":0.7311721283,"tone-classification_SynTone_fscore":0.7679761027,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0062111801,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8757763975,"tone-classification_SynTone_precision_modified":0.7230276907,"tone-classification_SynTone_recall_modified":0.6373933265,"tone-classification_SynTone_fscore_modified":0.669281794,"tone-classification_SynTone_acc":0.8757763975,"tone-classification_SynTone_precision":0.7230276907,"tone-classification_SynTone_recall":0.6373933265,"tone-classification_SynTone_fscore":0.669281794,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8198757764,"tone-classification_SynTone_precision_modified":0.8180456965,"tone-classification_SynTone_recall_modified":0.5977640757,"tone-classification_SynTone_fscore_modified":0.6364434216,"tone-classification_SynTone_acc":0.8859060403,"tone-classification_SynTone_precision":0.8839285714,"tone-classification_SynTone_recall":0.6459061489,"tone-classification_SynTone_fscore":0.68770061,"tone-classification_SynTone_valid_output_ratio":0.9254658385,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5031055901,"tone-classification_SynTone_precision_modified":0.5228364877,"tone-classification_SynTone_recall_modified":0.5168736971,"tone-classification_SynTone_fscore_modified":0.4644759375,"tone-classification_SynTone_acc":0.5094339623,"tone-classification_SynTone_precision":0.5294130473,"tone-classification_SynTone_recall":0.523375253,"tone-classification_SynTone_fscore":0.4703184021,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6770186335,"tone-classification_SynTone_precision_modified":0.6041666667,"tone-classification_SynTone_recall_modified":0.5499765318,"tone-classification_SynTone_fscore_modified":0.5393404488,"tone-classification_SynTone_acc":0.6770186335,"tone-classification_SynTone_precision":0.6041666667,"tone-classification_SynTone_recall":0.5499765318,"tone-classification_SynTone_fscore":0.5393404488,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.5776397516,"tone-classification_SynTone_precision_modified":0.4784671979,"tone-classification_SynTone_recall_modified":0.4996211806,"tone-classification_SynTone_fscore_modified":0.4487276462,"tone-classification_SynTone_acc":0.5849056604,"tone-classification_SynTone_precision":0.4844856532,"tone-classification_SynTone_recall":0.5059057238,"tone-classification_SynTone_fscore":0.4543720191,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.8396595026,"tone-classification_SynTone_recall_modified":0.7058371736,"tone-classification_SynTone_fscore_modified":0.748745873,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.8396595026,"tone-classification_SynTone_recall":0.7058371736,"tone-classification_SynTone_fscore":0.748745873,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":null,"tone-classification_SynTone_precision_modified":null,"tone-classification_SynTone_recall_modified":null,"tone-classification_SynTone_fscore_modified":null,"tone-classification_SynTone_acc":null,"tone-classification_SynTone_precision":null,"tone-classification_SynTone_recall":null,"tone-classification_SynTone_fscore":null,"tone-classification_SynTone_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7005172378,"tone-classification_SynTone_recall_modified":0.6637288786,"tone-classification_SynTone_fscore_modified":0.6775611485,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7005172378,"tone-classification_SynTone_recall":0.6637288786,"tone-classification_SynTone_fscore":0.6775611485,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7888198758,"tone-classification_SynTone_precision_modified":0.6529418051,"tone-classification_SynTone_recall_modified":0.7187467998,"tone-classification_SynTone_fscore_modified":0.6745690521,"tone-classification_SynTone_acc":0.7888198758,"tone-classification_SynTone_precision":0.6529418051,"tone-classification_SynTone_recall":0.7187467998,"tone-classification_SynTone_fscore":0.6745690521,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4968944099,"tone-classification_SynTone_precision_modified":0.5363835928,"tone-classification_SynTone_recall_modified":0.5772956136,"tone-classification_SynTone_fscore_modified":0.4755414981,"tone-classification_SynTone_acc":0.4968944099,"tone-classification_SynTone_precision":0.5363835928,"tone-classification_SynTone_recall":0.5772956136,"tone-classification_SynTone_fscore":0.4755414981,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8074534161,"tone-classification_SynTone_precision_modified":0.6799424424,"tone-classification_SynTone_recall_modified":0.6338304318,"tone-classification_SynTone_fscore_modified":0.6490392995,"tone-classification_SynTone_acc":0.8074534161,"tone-classification_SynTone_precision":0.6799424424,"tone-classification_SynTone_recall":0.6338304318,"tone-classification_SynTone_fscore":0.6490392995,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6774948824,"tone-classification_SynTone_recall_modified":0.67683866,"tone-classification_SynTone_fscore_modified":0.668356732,"tone-classification_SynTone_acc":0.7911392405,"tone-classification_SynTone_precision":0.6903587093,"tone-classification_SynTone_recall":0.689690027,"tone-classification_SynTone_fscore":0.6810470497,"tone-classification_SynTone_valid_output_ratio":0.9813664596,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.1552795031,"tone-classification_SynTone_precision_modified":0.1130928528,"tone-classification_SynTone_recall_modified":0.2155401891,"tone-classification_SynTone_fscore_modified":0.1465713376,"tone-classification_SynTone_acc":0.3424657534,"tone-classification_SynTone_precision":0.2494239631,"tone-classification_SynTone_recall":0.4753694581,"tone-classification_SynTone_fscore":0.3232600733,"tone-classification_SynTone_valid_output_ratio":0.4534161491,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7339379455,"tone-classification_SynTone_recall_modified":0.6971283495,"tone-classification_SynTone_fscore_modified":0.7109712868,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7339379455,"tone-classification_SynTone_recall":0.6971283495,"tone-classification_SynTone_fscore":0.7109712868,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6583850932,"tone-classification_SynTone_precision_modified":0.541342126,"tone-classification_SynTone_recall_modified":0.5629697742,"tone-classification_SynTone_fscore_modified":0.5384255059,"tone-classification_SynTone_acc":0.6666666667,"tone-classification_SynTone_precision":0.5481514609,"tone-classification_SynTone_recall":0.570051155,"tone-classification_SynTone_fscore":0.5451981537,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0248447205,"tone-classification_SynTone_precision_modified":0.0258799172,"tone-classification_SynTone_recall_modified":0.0266193434,"tone-classification_SynTone_fscore_modified":0.0174833218,"tone-classification_SynTone_acc":0.4,"tone-classification_SynTone_precision":0.4166666667,"tone-classification_SynTone_recall":0.4285714286,"tone-classification_SynTone_fscore":0.2814814815,"tone-classification_SynTone_valid_output_ratio":0.0621118012,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6832298137,"tone-classification_SynTone_precision_modified":0.6414593698,"tone-classification_SynTone_recall_modified":0.7386456733,"tone-classification_SynTone_fscore_modified":0.6520765046,"tone-classification_SynTone_acc":0.6832298137,"tone-classification_SynTone_precision":0.6414593698,"tone-classification_SynTone_recall":0.7386456733,"tone-classification_SynTone_fscore":0.6520765046,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9130434783,"tone-classification_SynTone_precision_modified":0.7790862291,"tone-classification_SynTone_recall_modified":0.747838795,"tone-classification_SynTone_fscore_modified":0.7624467793,"tone-classification_SynTone_acc":0.9130434783,"tone-classification_SynTone_precision":0.7790862291,"tone-classification_SynTone_recall":0.747838795,"tone-classification_SynTone_fscore":0.7624467793,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8881987578,"tone-classification_SynTone_precision_modified":0.7743534483,"tone-classification_SynTone_recall_modified":0.6258725892,"tone-classification_SynTone_fscore_modified":0.6586899664,"tone-classification_SynTone_acc":0.8881987578,"tone-classification_SynTone_precision":0.7743534483,"tone-classification_SynTone_recall":0.6258725892,"tone-classification_SynTone_fscore":0.6586899664,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7478203083,"tone-classification_SynTone_recall_modified":0.6482356204,"tone-classification_SynTone_fscore_modified":0.6776329308,"tone-classification_SynTone_acc":0.850931677,"tone-classification_SynTone_precision":0.7478203083,"tone-classification_SynTone_recall":0.6482356204,"tone-classification_SynTone_fscore":0.6776329308,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6335403727,"tone-classification_SynTone_precision_modified":0.5112987647,"tone-classification_SynTone_recall_modified":0.4686215462,"tone-classification_SynTone_fscore_modified":0.4864069611,"tone-classification_SynTone_acc":0.8947368421,"tone-classification_SynTone_precision":0.7220973783,"tone-classification_SynTone_recall":0.6618251661,"tone-classification_SynTone_fscore":0.6869431644,"tone-classification_SynTone_valid_output_ratio":0.7080745342,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.7763975155,"tone-classification_SynTone_precision_modified":0.6667611945,"tone-classification_SynTone_recall_modified":0.6423579109,"tone-classification_SynTone_fscore_modified":0.6405012061,"tone-classification_SynTone_acc":0.7763975155,"tone-classification_SynTone_precision":0.6667611945,"tone-classification_SynTone_recall":0.6423579109,"tone-classification_SynTone_fscore":0.6405012061,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.850931677,"tone-classification_SynTone_precision_modified":0.7233493732,"tone-classification_SynTone_recall_modified":0.6079611555,"tone-classification_SynTone_fscore_modified":0.6452957705,"tone-classification_SynTone_acc":0.8616352201,"tone-classification_SynTone_precision":0.7324481074,"tone-classification_SynTone_recall":0.6156084656,"tone-classification_SynTone_fscore":0.6534126984,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8260869565,"tone-classification_SynTone_precision_modified":0.7210630279,"tone-classification_SynTone_recall_modified":0.6034242192,"tone-classification_SynTone_fscore_modified":0.6394308021,"tone-classification_SynTone_acc":0.8260869565,"tone-classification_SynTone_precision":0.7210630279,"tone-classification_SynTone_recall":0.6034242192,"tone-classification_SynTone_fscore":0.6394308021,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.7453416149,"tone-classification_SynTone_precision_modified":0.5770588432,"tone-classification_SynTone_recall_modified":0.5070698686,"tone-classification_SynTone_fscore_modified":0.5221093948,"tone-classification_SynTone_acc":0.7547169811,"tone-classification_SynTone_precision":0.584317445,"tone-classification_SynTone_recall":0.513448106,"tone-classification_SynTone_fscore":0.5286768085,"tone-classification_SynTone_valid_output_ratio":0.9875776398,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8322981366,"tone-classification_SynTone_precision_modified":0.7291450859,"tone-classification_SynTone_recall_modified":0.5849377027,"tone-classification_SynTone_fscore_modified":0.6077296942,"tone-classification_SynTone_acc":0.8322981366,"tone-classification_SynTone_precision":0.7291450859,"tone-classification_SynTone_recall":0.5849377027,"tone-classification_SynTone_fscore":0.6077296942,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.6397515528,"tone-classification_SynTone_precision_modified":0.5483185514,"tone-classification_SynTone_recall_modified":0.590333248,"tone-classification_SynTone_fscore_modified":0.530467546,"tone-classification_SynTone_acc":0.6397515528,"tone-classification_SynTone_precision":0.5483185514,"tone-classification_SynTone_recall":0.590333248,"tone-classification_SynTone_fscore":0.530467546,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.2919254658,"tone-classification_SynTone_precision_modified":0.193413297,"tone-classification_SynTone_recall_modified":0.2361166008,"tone-classification_SynTone_fscore_modified":0.1873840673,"tone-classification_SynTone_acc":0.5802469136,"tone-classification_SynTone_precision":0.3844387755,"tone-classification_SynTone_recall":0.4693181818,"tone-classification_SynTone_fscore":0.3724547511,"tone-classification_SynTone_valid_output_ratio":0.5031055901,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.9068322981,"tone-classification_SynTone_precision_modified":0.8215992694,"tone-classification_SynTone_recall_modified":0.7311721283,"tone-classification_SynTone_fscore_modified":0.7679761027,"tone-classification_SynTone_acc":0.9068322981,"tone-classification_SynTone_precision":0.8215992694,"tone-classification_SynTone_recall":0.7311721283,"tone-classification_SynTone_fscore":0.7679761027,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.4347826087,"tone-classification_SynTone_precision_modified":0.3863322077,"tone-classification_SynTone_recall_modified":0.383431452,"tone-classification_SynTone_fscore_modified":0.3837887153,"tone-classification_SynTone_acc":0.7865168539,"tone-classification_SynTone_precision":0.6988706228,"tone-classification_SynTone_recall":0.6936231884,"tone-classification_SynTone_fscore":0.6942694738,"tone-classification_SynTone_valid_output_ratio":0.5527950311,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.347826087,"tone-classification_SynTone_precision_modified":0.4069335674,"tone-classification_SynTone_recall_modified":0.3420272309,"tone-classification_SynTone_fscore_modified":0.3323819164,"tone-classification_SynTone_acc":0.5333333333,"tone-classification_SynTone_precision":0.6239648033,"tone-classification_SynTone_recall":0.5244417541,"tone-classification_SynTone_fscore":0.5096522718,"tone-classification_SynTone_valid_output_ratio":0.652173913,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","tone-classification_SynTone_acc_modified":0.0,"tone-classification_SynTone_precision_modified":0.0,"tone-classification_SynTone_recall_modified":0.0,"tone-classification_SynTone_fscore_modified":0.0,"tone-classification_SynTone_acc":0.0,"tone-classification_SynTone_precision":0.0,"tone-classification_SynTone_recall":0.0,"tone-classification_SynTone_fscore":0.0,"tone-classification_SynTone_valid_output_ratio":0.0062111801,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","tone-classification_SynTone_acc_modified":0.8757763975,"tone-classification_SynTone_precision_modified":0.7230276907,"tone-classification_SynTone_recall_modified":0.6373933265,"tone-classification_SynTone_fscore_modified":0.669281794,"tone-classification_SynTone_acc":0.8757763975,"tone-classification_SynTone_precision":0.7230276907,"tone-classification_SynTone_recall":0.6373933265,"tone-classification_SynTone_fscore":0.669281794,"tone-classification_SynTone_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/topic-classification_sid.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.792,"topic-classification_sid_precision_modified":0.662532811,"topic-classification_sid_recall_modified":0.6635073397,"topic-classification_sid_fscore_modified":0.6583038933,"topic-classification_sid_acc":0.792,"topic-classification_sid_precision":0.662532811,"topic-classification_sid_recall":0.6635073397,"topic-classification_sid_fscore":0.6583038933,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.6241793507}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.658,"topic-classification_sid_precision_modified":0.6006169042,"topic-classification_sid_recall_modified":0.5559595512,"topic-classification_sid_fscore_modified":0.5315039094,"topic-classification_sid_acc":0.6659919028,"topic-classification_sid_precision":0.6079118464,"topic-classification_sid_recall":0.5627120963,"topic-classification_sid_fscore":0.5379594225,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.699116864}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.702,"topic-classification_sid_precision_modified":0.6070538637,"topic-classification_sid_recall_modified":0.5902772191,"topic-classification_sid_fscore_modified":0.5805725473,"topic-classification_sid_acc":0.7076612903,"topic-classification_sid_precision":0.6119494594,"topic-classification_sid_recall":0.5950375192,"topic-classification_sid_fscore":0.585254584,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.6898261633}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.742,"topic-classification_sid_precision_modified":0.6167254178,"topic-classification_sid_recall_modified":0.6125584783,"topic-classification_sid_fscore_modified":0.5990165281,"topic-classification_sid_acc":0.7449799197,"topic-classification_sid_precision":0.6192022267,"topic-classification_sid_recall":0.6150185525,"topic-classification_sid_fscore":0.6014222169,"topic-classification_sid_valid_output_ratio":0.996,"nlu_score":0.6714091535}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.75,"topic-classification_sid_precision_modified":0.6327306402,"topic-classification_sid_recall_modified":0.6347455045,"topic-classification_sid_fscore_modified":0.6231971632,"topic-classification_sid_acc":0.75,"topic-classification_sid_precision":0.6327306402,"topic-classification_sid_recall":0.6347455045,"topic-classification_sid_fscore":0.6231971632,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.3749414991}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932,"nlu_score":0.5661558794}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.664,"topic-classification_sid_precision_modified":0.5946530353,"topic-classification_sid_recall_modified":0.5719701311,"topic-classification_sid_fscore_modified":0.5732259537,"topic-classification_sid_acc":0.6720647773,"topic-classification_sid_precision":0.6018755418,"topic-classification_sid_recall":0.5789171368,"topic-classification_sid_fscore":0.5801882122,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.456845738}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6561573641,"topic-classification_sid_recall_modified":0.6752129415,"topic-classification_sid_fscore_modified":0.6425647774,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6561573641,"topic-classification_sid_recall":0.6752129415,"topic-classification_sid_fscore":0.6425647774,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.368,"topic-classification_sid_precision_modified":0.6014683953,"topic-classification_sid_recall_modified":0.2970267773,"topic-classification_sid_fscore_modified":0.2767247094,"topic-classification_sid_acc":0.3739837398,"topic-classification_sid_precision":0.6112483692,"topic-classification_sid_recall":0.301856481,"topic-classification_sid_fscore":0.2812242981,"topic-classification_sid_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.38,"topic-classification_sid_precision_modified":0.2019117794,"topic-classification_sid_recall_modified":0.1756256003,"topic-classification_sid_fscore_modified":0.1195613397,"topic-classification_sid_acc":0.3830645161,"topic-classification_sid_precision":0.2035401003,"topic-classification_sid_recall":0.1770419358,"topic-classification_sid_fscore":0.1205255441,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.3619547874}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.628,"topic-classification_sid_precision_modified":0.5459969989,"topic-classification_sid_recall_modified":0.52365232,"topic-classification_sid_fscore_modified":0.5199604173,"topic-classification_sid_acc":0.6840958606,"topic-classification_sid_precision":0.5947679727,"topic-classification_sid_recall":0.5704273638,"topic-classification_sid_fscore":0.5664056834,"topic-classification_sid_valid_output_ratio":0.918,"nlu_score":0.3928685253}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.594,"topic-classification_sid_precision_modified":0.5608272475,"topic-classification_sid_recall_modified":0.5329233323,"topic-classification_sid_fscore_modified":0.527792484,"topic-classification_sid_acc":0.6359743041,"topic-classification_sid_precision":0.6004574384,"topic-classification_sid_recall":0.5705817263,"topic-classification_sid_fscore":0.5650883126,"topic-classification_sid_valid_output_ratio":0.934,"nlu_score":0.6800109206}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.734,"topic-classification_sid_precision_modified":0.6555308571,"topic-classification_sid_recall_modified":0.6460010178,"topic-classification_sid_fscore_modified":0.6267962167,"topic-classification_sid_acc":0.734,"topic-classification_sid_precision":0.6555308571,"topic-classification_sid_recall":0.6460010178,"topic-classification_sid_fscore":0.6267962167,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.812,"topic-classification_sid_precision_modified":0.682958996,"topic-classification_sid_recall_modified":0.6932034561,"topic-classification_sid_fscore_modified":0.6715874758,"topic-classification_sid_acc":0.812,"topic-classification_sid_precision":0.682958996,"topic-classification_sid_recall":0.6932034561,"topic-classification_sid_fscore":0.6715874758,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998,"nlu_score":0.6459120734}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.618,"topic-classification_sid_precision_modified":0.6284518226,"topic-classification_sid_recall_modified":0.548003418,"topic-classification_sid_fscore_modified":0.5631911176,"topic-classification_sid_acc":0.618,"topic-classification_sid_precision":0.6284518226,"topic-classification_sid_recall":0.548003418,"topic-classification_sid_fscore":0.5631911176,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.784,"topic-classification_sid_precision_modified":0.6819447861,"topic-classification_sid_recall_modified":0.6518325808,"topic-classification_sid_fscore_modified":0.6519138204,"topic-classification_sid_acc":0.7903225806,"topic-classification_sid_precision":0.6874443408,"topic-classification_sid_recall":0.6570892952,"topic-classification_sid_fscore":0.6571711899,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.7050532433}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.728,"topic-classification_sid_precision_modified":0.6159857721,"topic-classification_sid_recall_modified":0.6134167814,"topic-classification_sid_fscore_modified":0.60399392,"topic-classification_sid_acc":0.728,"topic-classification_sid_precision":0.6159857721,"topic-classification_sid_recall":0.6134167814,"topic-classification_sid_fscore":0.60399392,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.74,"topic-classification_sid_precision_modified":0.6308563241,"topic-classification_sid_recall_modified":0.6658780758,"topic-classification_sid_fscore_modified":0.634021247,"topic-classification_sid_acc":0.7505070994,"topic-classification_sid_precision":0.6398137161,"topic-classification_sid_recall":0.6753327341,"topic-classification_sid_fscore":0.6430235771,"topic-classification_sid_valid_output_ratio":0.986,"nlu_score":0.6914202844}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87,"nlu_score":0.531045981}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.462,"topic-classification_sid_precision_modified":0.4918469172,"topic-classification_sid_recall_modified":0.3261812324,"topic-classification_sid_fscore_modified":0.304777991,"topic-classification_sid_acc":0.5191011236,"topic-classification_sid_precision":0.5526369856,"topic-classification_sid_recall":0.3664957667,"topic-classification_sid_fscore":0.3424471809,"topic-classification_sid_valid_output_ratio":0.89,"nlu_score":0.6262096694}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982,"nlu_score":0.5968415875}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.078,"topic-classification_sid_precision_modified":0.1626278832,"topic-classification_sid_recall_modified":0.0869379377,"topic-classification_sid_fscore_modified":0.061595189,"topic-classification_sid_acc":0.1211180124,"topic-classification_sid_precision":0.2525277689,"topic-classification_sid_recall":0.1349967977,"topic-classification_sid_fscore":0.0956447035,"topic-classification_sid_valid_output_ratio":0.644,"nlu_score":0.3916645306}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.756,"topic-classification_sid_precision_modified":0.6530505866,"topic-classification_sid_recall_modified":0.6684817133,"topic-classification_sid_fscore_modified":0.6358572631,"topic-classification_sid_acc":0.756,"topic-classification_sid_precision":0.6530505866,"topic-classification_sid_recall":0.6684817133,"topic-classification_sid_fscore":0.6358572631,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96,"nlu_score":0.6361186163}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.018,"topic-classification_sid_precision_modified":0.1357154412,"topic-classification_sid_recall_modified":0.1259808206,"topic-classification_sid_fscore_modified":0.0108903706,"topic-classification_sid_acc":0.0184804928,"topic-classification_sid_precision":0.1393382353,"topic-classification_sid_recall":0.1293437584,"topic-classification_sid_fscore":0.0111810786,"topic-classification_sid_valid_output_ratio":0.974,"nlu_score":0.1368924446}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.0,"topic-classification_sid_precision_modified":0.0,"topic-classification_sid_recall_modified":0.0,"topic-classification_sid_fscore_modified":0.0,"topic-classification_sid_acc":0.0,"topic-classification_sid_precision":0.0,"topic-classification_sid_recall":0.0,"topic-classification_sid_fscore":0.0,"topic-classification_sid_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.802,"topic-classification_sid_precision_modified":0.674947982,"topic-classification_sid_recall_modified":0.6917120865,"topic-classification_sid_fscore_modified":0.6712251699,"topic-classification_sid_acc":0.802,"topic-classification_sid_precision":0.674947982,"topic-classification_sid_recall":0.6917120865,"topic-classification_sid_fscore":0.6712251699,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.792,"topic-classification_sid_precision_modified":0.662532811,"topic-classification_sid_recall_modified":0.6635073397,"topic-classification_sid_fscore_modified":0.6583038933,"topic-classification_sid_acc":0.792,"topic-classification_sid_precision":0.662532811,"topic-classification_sid_recall":0.6635073397,"topic-classification_sid_fscore":0.6583038933,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7143086066}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.562,"topic-classification_sid_precision_modified":0.4846723602,"topic-classification_sid_recall_modified":0.454656985,"topic-classification_sid_fscore_modified":0.424509489,"topic-classification_sid_acc":0.5928270042,"topic-classification_sid_precision":0.511257764,"topic-classification_sid_recall":0.4795959757,"topic-classification_sid_fscore":0.4477948196,"topic-classification_sid_valid_output_ratio":0.948,"nlu_score":0.6241793507}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.682,"topic-classification_sid_precision_modified":0.5730049986,"topic-classification_sid_recall_modified":0.5717337985,"topic-classification_sid_fscore_modified":0.5606248332,"topic-classification_sid_acc":0.7239915074,"topic-classification_sid_precision":0.6082855612,"topic-classification_sid_recall":0.6069360918,"topic-classification_sid_fscore":0.595143135,"topic-classification_sid_valid_output_ratio":0.942,"nlu_score":0.6297634971}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.458,"topic-classification_sid_precision_modified":0.3928192753,"topic-classification_sid_recall_modified":0.4072039595,"topic-classification_sid_fscore_modified":0.3544902396,"topic-classification_sid_acc":0.6378830084,"topic-classification_sid_precision":0.5471020547,"topic-classification_sid_recall":0.5671364338,"topic-classification_sid_fscore":0.4937189966,"topic-classification_sid_valid_output_ratio":0.718,"nlu_score":0.6552152029}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.748,"topic-classification_sid_precision_modified":0.6428566774,"topic-classification_sid_recall_modified":0.6633522535,"topic-classification_sid_fscore_modified":0.628605048,"topic-classification_sid_acc":0.748,"topic-classification_sid_precision":0.6428566774,"topic-classification_sid_recall":0.6633522535,"topic-classification_sid_fscore":0.628605048,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6758278127}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":null,"topic-classification_sid_precision_modified":null,"topic-classification_sid_recall_modified":null,"topic-classification_sid_fscore_modified":null,"topic-classification_sid_acc":null,"topic-classification_sid_precision":null,"topic-classification_sid_recall":null,"topic-classification_sid_fscore":null,"topic-classification_sid_valid_output_ratio":null,"nlu_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.658,"topic-classification_sid_precision_modified":0.6006169042,"topic-classification_sid_recall_modified":0.5559595512,"topic-classification_sid_fscore_modified":0.5315039094,"topic-classification_sid_acc":0.6659919028,"topic-classification_sid_precision":0.6079118464,"topic-classification_sid_recall":0.5627120963,"topic-classification_sid_fscore":0.5379594225,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.699116864}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.702,"topic-classification_sid_precision_modified":0.6070538637,"topic-classification_sid_recall_modified":0.5902772191,"topic-classification_sid_fscore_modified":0.5805725473,"topic-classification_sid_acc":0.7076612903,"topic-classification_sid_precision":0.6119494594,"topic-classification_sid_recall":0.5950375192,"topic-classification_sid_fscore":0.585254584,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.6898261633}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.636,"topic-classification_sid_precision_modified":0.6248180645,"topic-classification_sid_recall_modified":0.5200071748,"topic-classification_sid_fscore_modified":0.5212205085,"topic-classification_sid_acc":0.636,"topic-classification_sid_precision":0.6248180645,"topic-classification_sid_recall":0.5200071748,"topic-classification_sid_fscore":0.5212205085,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6460328733}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.742,"topic-classification_sid_precision_modified":0.6167254178,"topic-classification_sid_recall_modified":0.6125584783,"topic-classification_sid_fscore_modified":0.5990165281,"topic-classification_sid_acc":0.7449799197,"topic-classification_sid_precision":0.6192022267,"topic-classification_sid_recall":0.6150185525,"topic-classification_sid_fscore":0.6014222169,"topic-classification_sid_valid_output_ratio":0.996,"nlu_score":0.6714091535}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.75,"topic-classification_sid_precision_modified":0.6327306402,"topic-classification_sid_recall_modified":0.6347455045,"topic-classification_sid_fscore_modified":0.6231971632,"topic-classification_sid_acc":0.75,"topic-classification_sid_precision":0.6327306402,"topic-classification_sid_recall":0.6347455045,"topic-classification_sid_fscore":0.6231971632,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.3749414991}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.656,"topic-classification_sid_precision_modified":0.5819241823,"topic-classification_sid_recall_modified":0.5649560499,"topic-classification_sid_fscore_modified":0.5472284688,"topic-classification_sid_acc":0.7038626609,"topic-classification_sid_precision":0.6243821699,"topic-classification_sid_recall":0.6061760192,"topic-classification_sid_fscore":0.5871550095,"topic-classification_sid_valid_output_ratio":0.932,"nlu_score":0.5661558794}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.664,"topic-classification_sid_precision_modified":0.5946530353,"topic-classification_sid_recall_modified":0.5719701311,"topic-classification_sid_fscore_modified":0.5732259537,"topic-classification_sid_acc":0.6720647773,"topic-classification_sid_precision":0.6018755418,"topic-classification_sid_recall":0.5789171368,"topic-classification_sid_fscore":0.5801882122,"topic-classification_sid_valid_output_ratio":0.988,"nlu_score":0.456845738}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.77,"topic-classification_sid_precision_modified":0.6561573641,"topic-classification_sid_recall_modified":0.6752129415,"topic-classification_sid_fscore_modified":0.6425647774,"topic-classification_sid_acc":0.77,"topic-classification_sid_precision":0.6561573641,"topic-classification_sid_recall":0.6752129415,"topic-classification_sid_fscore":0.6425647774,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6752949557}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.368,"topic-classification_sid_precision_modified":0.6014683953,"topic-classification_sid_recall_modified":0.2970267773,"topic-classification_sid_fscore_modified":0.2767247094,"topic-classification_sid_acc":0.3739837398,"topic-classification_sid_precision":0.6112483692,"topic-classification_sid_recall":0.301856481,"topic-classification_sid_fscore":0.2812242981,"topic-classification_sid_valid_output_ratio":0.984,"nlu_score":0.5121418762}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.38,"topic-classification_sid_precision_modified":0.2019117794,"topic-classification_sid_recall_modified":0.1756256003,"topic-classification_sid_fscore_modified":0.1195613397,"topic-classification_sid_acc":0.3830645161,"topic-classification_sid_precision":0.2035401003,"topic-classification_sid_recall":0.1770419358,"topic-classification_sid_fscore":0.1205255441,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.3619547874}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.628,"topic-classification_sid_precision_modified":0.5459969989,"topic-classification_sid_recall_modified":0.52365232,"topic-classification_sid_fscore_modified":0.5199604173,"topic-classification_sid_acc":0.6840958606,"topic-classification_sid_precision":0.5947679727,"topic-classification_sid_recall":0.5704273638,"topic-classification_sid_fscore":0.5664056834,"topic-classification_sid_valid_output_ratio":0.918,"nlu_score":0.3928685253}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.594,"topic-classification_sid_precision_modified":0.5608272475,"topic-classification_sid_recall_modified":0.5329233323,"topic-classification_sid_fscore_modified":0.527792484,"topic-classification_sid_acc":0.6359743041,"topic-classification_sid_precision":0.6004574384,"topic-classification_sid_recall":0.5705817263,"topic-classification_sid_fscore":0.5650883126,"topic-classification_sid_valid_output_ratio":0.934,"nlu_score":0.6800109206}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.734,"topic-classification_sid_precision_modified":0.6555308571,"topic-classification_sid_recall_modified":0.6460010178,"topic-classification_sid_fscore_modified":0.6267962167,"topic-classification_sid_acc":0.734,"topic-classification_sid_precision":0.6555308571,"topic-classification_sid_recall":0.6460010178,"topic-classification_sid_fscore":0.6267962167,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6833497104}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.812,"topic-classification_sid_precision_modified":0.682958996,"topic-classification_sid_recall_modified":0.6932034561,"topic-classification_sid_fscore_modified":0.6715874758,"topic-classification_sid_acc":0.812,"topic-classification_sid_precision":0.682958996,"topic-classification_sid_recall":0.6932034561,"topic-classification_sid_fscore":0.6715874758,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7207167537}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.704,"topic-classification_sid_precision_modified":0.6679390306,"topic-classification_sid_recall_modified":0.6259469635,"topic-classification_sid_fscore_modified":0.6265189311,"topic-classification_sid_acc":0.7054108216,"topic-classification_sid_precision":0.6692775858,"topic-classification_sid_recall":0.6272013662,"topic-classification_sid_fscore":0.62777448,"topic-classification_sid_valid_output_ratio":0.998,"nlu_score":0.6459120734}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.618,"topic-classification_sid_precision_modified":0.6284518226,"topic-classification_sid_recall_modified":0.548003418,"topic-classification_sid_fscore_modified":0.5631911176,"topic-classification_sid_acc":0.618,"topic-classification_sid_precision":0.6284518226,"topic-classification_sid_recall":0.548003418,"topic-classification_sid_fscore":0.5631911176,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.4824528512}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.784,"topic-classification_sid_precision_modified":0.6819447861,"topic-classification_sid_recall_modified":0.6518325808,"topic-classification_sid_fscore_modified":0.6519138204,"topic-classification_sid_acc":0.7903225806,"topic-classification_sid_precision":0.6874443408,"topic-classification_sid_recall":0.6570892952,"topic-classification_sid_fscore":0.6571711899,"topic-classification_sid_valid_output_ratio":0.992,"nlu_score":0.7050532433}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.728,"topic-classification_sid_precision_modified":0.6159857721,"topic-classification_sid_recall_modified":0.6134167814,"topic-classification_sid_fscore_modified":0.60399392,"topic-classification_sid_acc":0.728,"topic-classification_sid_precision":0.6159857721,"topic-classification_sid_recall":0.6134167814,"topic-classification_sid_fscore":0.60399392,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6944128198}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.74,"topic-classification_sid_precision_modified":0.6308563241,"topic-classification_sid_recall_modified":0.6658780758,"topic-classification_sid_fscore_modified":0.634021247,"topic-classification_sid_acc":0.7505070994,"topic-classification_sid_precision":0.6398137161,"topic-classification_sid_recall":0.6753327341,"topic-classification_sid_fscore":0.6430235771,"topic-classification_sid_valid_output_ratio":0.986,"nlu_score":0.6914202844}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.412,"topic-classification_sid_precision_modified":0.3819473808,"topic-classification_sid_recall_modified":0.2194110821,"topic-classification_sid_fscore_modified":0.166159266,"topic-classification_sid_acc":0.4735632184,"topic-classification_sid_precision":0.439019978,"topic-classification_sid_recall":0.2521966461,"topic-classification_sid_fscore":0.190987662,"topic-classification_sid_valid_output_ratio":0.87,"nlu_score":0.531045981}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.462,"topic-classification_sid_precision_modified":0.4918469172,"topic-classification_sid_recall_modified":0.3261812324,"topic-classification_sid_fscore_modified":0.304777991,"topic-classification_sid_acc":0.5191011236,"topic-classification_sid_precision":0.5526369856,"topic-classification_sid_recall":0.3664957667,"topic-classification_sid_fscore":0.3424471809,"topic-classification_sid_valid_output_ratio":0.89,"nlu_score":0.6262096694}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.56,"topic-classification_sid_precision_modified":0.5309838171,"topic-classification_sid_recall_modified":0.4706044677,"topic-classification_sid_fscore_modified":0.484170357,"topic-classification_sid_acc":0.5702647658,"topic-classification_sid_precision":0.5407167181,"topic-classification_sid_recall":0.4792306188,"topic-classification_sid_fscore":0.4930451701,"topic-classification_sid_valid_output_ratio":0.982,"nlu_score":0.5968415875}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.078,"topic-classification_sid_precision_modified":0.1626278832,"topic-classification_sid_recall_modified":0.0869379377,"topic-classification_sid_fscore_modified":0.061595189,"topic-classification_sid_acc":0.1211180124,"topic-classification_sid_precision":0.2525277689,"topic-classification_sid_recall":0.1349967977,"topic-classification_sid_fscore":0.0956447035,"topic-classification_sid_valid_output_ratio":0.644,"nlu_score":0.3916645306}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.756,"topic-classification_sid_precision_modified":0.6530505866,"topic-classification_sid_recall_modified":0.6684817133,"topic-classification_sid_fscore_modified":0.6358572631,"topic-classification_sid_acc":0.756,"topic-classification_sid_precision":0.6530505866,"topic-classification_sid_recall":0.6684817133,"topic-classification_sid_fscore":0.6358572631,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.7146808531}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.608,"topic-classification_sid_precision_modified":0.5971774069,"topic-classification_sid_recall_modified":0.5095088497,"topic-classification_sid_fscore_modified":0.5160494942,"topic-classification_sid_acc":0.6333333333,"topic-classification_sid_precision":0.6220597988,"topic-classification_sid_recall":0.5307383851,"topic-classification_sid_fscore":0.5375515565,"topic-classification_sid_valid_output_ratio":0.96,"nlu_score":0.6361186163}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.586,"topic-classification_sid_precision_modified":0.5883032084,"topic-classification_sid_recall_modified":0.4720717732,"topic-classification_sid_fscore_modified":0.4937437004,"topic-classification_sid_acc":0.586,"topic-classification_sid_precision":0.5883032084,"topic-classification_sid_recall":0.4720717732,"topic-classification_sid_fscore":0.4937437004,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6255818412}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.018,"topic-classification_sid_precision_modified":0.1357154412,"topic-classification_sid_recall_modified":0.1259808206,"topic-classification_sid_fscore_modified":0.0108903706,"topic-classification_sid_acc":0.0184804928,"topic-classification_sid_precision":0.1393382353,"topic-classification_sid_recall":0.1293437584,"topic-classification_sid_fscore":0.0111810786,"topic-classification_sid_valid_output_ratio":0.974,"nlu_score":0.1368924446}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","topic-classification_sid_acc_modified":0.0,"topic-classification_sid_precision_modified":0.0,"topic-classification_sid_recall_modified":0.0,"topic-classification_sid_fscore_modified":0.0,"topic-classification_sid_acc":0.0,"topic-classification_sid_precision":0.0,"topic-classification_sid_recall":0.0,"topic-classification_sid_fscore":0.0,"topic-classification_sid_valid_output_ratio":0.0,"nlu_score":0.046805056}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","topic-classification_sid_acc_modified":0.802,"topic-classification_sid_precision_modified":0.674947982,"topic-classification_sid_recall_modified":0.6917120865,"topic-classification_sid_fscore_modified":0.6712251699,"topic-classification_sid_acc":0.802,"topic-classification_sid_precision":0.674947982,"topic-classification_sid_recall":0.6917120865,"topic-classification_sid_fscore":0.6712251699,"topic-classification_sid_valid_output_ratio":1.0,"nlu_score":0.6992555201}
|
leaderboard/boards_data/translation-ar2fa_ar2fa.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1371181815,"translation-ar2fa_ar2fa_sahife_bleu":0.1148434226,"translation-ar2fa_ar2fa_nahj_bleu":0.0635817712,"translation-ar2fa_ar2fa_quran_bleu":0.2292525303,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0396780688,"translation-ar2fa_ar2fa_sahife_bleu":0.0355366473,"translation-ar2fa_ar2fa_nahj_bleu":0.0160671452,"translation-ar2fa_ar2fa_quran_bleu":0.0662498677,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1430472928,"translation-ar2fa_ar2fa_sahife_bleu":0.1326018858,"translation-ar2fa_ar2fa_nahj_bleu":0.0532180128,"translation-ar2fa_ar2fa_quran_bleu":0.2388305158,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0228295116,"translation-ar2fa_ar2fa_sahife_bleu":0.024915354,"translation-ar2fa_ar2fa_nahj_bleu":0.0099254821,"translation-ar2fa_ar2fa_quran_bleu":0.0326798966,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1471879954,"translation-ar2fa_ar2fa_sahife_bleu":0.1294214814,"translation-ar2fa_ar2fa_nahj_bleu":0.0642841927,"translation-ar2fa_ar2fa_quran_bleu":0.2437131219,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0186923531,"translation-ar2fa_ar2fa_sahife_bleu":0.0174521967,"translation-ar2fa_ar2fa_nahj_bleu":0.0097734226,"translation-ar2fa_ar2fa_quran_bleu":0.0284054936,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1414109272,"translation-ar2fa_ar2fa_sahife_bleu":0.136408042,"translation-ar2fa_ar2fa_nahj_bleu":0.0653197648,"translation-ar2fa_ar2fa_quran_bleu":0.2187004167,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0226935201,"translation-ar2fa_ar2fa_sahife_bleu":0.0196359142,"translation-ar2fa_ar2fa_nahj_bleu":0.010693835,"translation-ar2fa_ar2fa_quran_bleu":0.0371508269,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1534130086,"translation-ar2fa_ar2fa_sahife_bleu":0.1250461134,"translation-ar2fa_ar2fa_nahj_bleu":0.0624466634,"translation-ar2fa_ar2fa_quran_bleu":0.2681979318,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":null,"translation-ar2fa_ar2fa_sahife_bleu":null,"translation-ar2fa_ar2fa_nahj_bleu":null,"translation-ar2fa_ar2fa_quran_bleu":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.032619077,"translation-ar2fa_ar2fa_sahife_bleu":0.0333185867,"translation-ar2fa_ar2fa_nahj_bleu":0.0106299838,"translation-ar2fa_ar2fa_quran_bleu":0.0528092057,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0465792583,"translation-ar2fa_ar2fa_sahife_bleu":0.023795336,"translation-ar2fa_ar2fa_nahj_bleu":0.0121091058,"translation-ar2fa_ar2fa_quran_bleu":0.1021098256,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0910450298,"translation-ar2fa_ar2fa_sahife_bleu":0.0862679894,"translation-ar2fa_ar2fa_nahj_bleu":0.0558129824,"translation-ar2fa_ar2fa_quran_bleu":0.1292925153,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0985860814,"translation-ar2fa_ar2fa_sahife_bleu":0.0857687109,"translation-ar2fa_ar2fa_nahj_bleu":0.0622600203,"translation-ar2fa_ar2fa_quran_bleu":0.1459132099,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0581992714,"translation-ar2fa_ar2fa_sahife_bleu":0.0540221076,"translation-ar2fa_ar2fa_nahj_bleu":0.0233017704,"translation-ar2fa_ar2fa_quran_bleu":0.095529061,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0594554675,"translation-ar2fa_ar2fa_sahife_bleu":0.0539986603,"translation-ar2fa_ar2fa_nahj_bleu":0.035240584,"translation-ar2fa_ar2fa_quran_bleu":0.0879164142,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0439654732,"translation-ar2fa_ar2fa_sahife_bleu":0.0313283708,"translation-ar2fa_ar2fa_nahj_bleu":0.0308641232,"translation-ar2fa_ar2fa_quran_bleu":0.0690488581,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0302818033,"translation-ar2fa_ar2fa_sahife_bleu":0.0272381325,"translation-ar2fa_ar2fa_nahj_bleu":0.0129029913,"translation-ar2fa_ar2fa_quran_bleu":0.0498353456,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0636385541,"translation-ar2fa_ar2fa_sahife_bleu":0.0557180428,"translation-ar2fa_ar2fa_nahj_bleu":0.0539968488,"translation-ar2fa_ar2fa_quran_bleu":0.0807186853,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.01007549,"translation-ar2fa_ar2fa_sahife_bleu":0.0116017776,"translation-ar2fa_ar2fa_nahj_bleu":0.0067782437,"translation-ar2fa_ar2fa_quran_bleu":0.0116815864,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0608470749,"translation-ar2fa_ar2fa_sahife_bleu":0.0636783644,"translation-ar2fa_ar2fa_nahj_bleu":0.0258604511,"translation-ar2fa_ar2fa_quran_bleu":0.091253078,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1302111402,"translation-ar2fa_ar2fa_sahife_bleu":0.1104606951,"translation-ar2fa_ar2fa_nahj_bleu":0.0742081609,"translation-ar2fa_ar2fa_quran_bleu":0.2031644157,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.143500711,"translation-ar2fa_ar2fa_sahife_bleu":0.1221294429,"translation-ar2fa_ar2fa_nahj_bleu":0.069521493,"translation-ar2fa_ar2fa_quran_bleu":0.235152236,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1371181815,"translation-ar2fa_ar2fa_sahife_bleu":0.1148434226,"translation-ar2fa_ar2fa_nahj_bleu":0.0635817712,"translation-ar2fa_ar2fa_quran_bleu":0.2292525303,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1397574972,"translation-ar2fa_ar2fa_sahife_bleu":0.1273211367,"translation-ar2fa_ar2fa_nahj_bleu":0.0658485892,"translation-ar2fa_ar2fa_quran_bleu":0.2224073202,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0396780688,"translation-ar2fa_ar2fa_sahife_bleu":0.0355366473,"translation-ar2fa_ar2fa_nahj_bleu":0.0160671452,"translation-ar2fa_ar2fa_quran_bleu":0.0662498677,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.15661924,"translation-ar2fa_ar2fa_sahife_bleu":0.1122809429,"translation-ar2fa_ar2fa_nahj_bleu":0.0629397909,"translation-ar2fa_ar2fa_quran_bleu":0.2899530138,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0772188097,"translation-ar2fa_ar2fa_sahife_bleu":0.0610321929,"translation-ar2fa_ar2fa_nahj_bleu":0.0273061824,"translation-ar2fa_ar2fa_quran_bleu":0.1408224224,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1430472928,"translation-ar2fa_ar2fa_sahife_bleu":0.1326018858,"translation-ar2fa_ar2fa_nahj_bleu":0.0532180128,"translation-ar2fa_ar2fa_quran_bleu":0.2388305158,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.1144863268,"translation-ar2fa_ar2fa_sahife_bleu":0.1190971594,"translation-ar2fa_ar2fa_nahj_bleu":0.0648109303,"translation-ar2fa_ar2fa_quran_bleu":0.157067121,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1315367808,"translation-ar2fa_ar2fa_sahife_bleu":0.1063921688,"translation-ar2fa_ar2fa_nahj_bleu":0.0642188893,"translation-ar2fa_ar2fa_quran_bleu":0.2206333896,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0821020713,"translation-ar2fa_ar2fa_sahife_bleu":0.0730469461,"translation-ar2fa_ar2fa_nahj_bleu":0.0579031327,"translation-ar2fa_ar2fa_quran_bleu":0.1141461882,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0352516229,"translation-ar2fa_ar2fa_sahife_bleu":0.031818336,"translation-ar2fa_ar2fa_nahj_bleu":0.0219225394,"translation-ar2fa_ar2fa_quran_bleu":0.0513475391,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.1542520457,"translation-ar2fa_ar2fa_sahife_bleu":0.1283925803,"translation-ar2fa_ar2fa_nahj_bleu":0.0660434951,"translation-ar2fa_ar2fa_quran_bleu":0.2639096342,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0334933514,"translation-ar2fa_ar2fa_sahife_bleu":0.0313812328,"translation-ar2fa_ar2fa_nahj_bleu":0.013862611,"translation-ar2fa_ar2fa_quran_bleu":0.0542546733,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.090408759,"translation-ar2fa_ar2fa_sahife_bleu":0.0778953352,"translation-ar2fa_ar2fa_nahj_bleu":0.0610049198,"translation-ar2fa_ar2fa_quran_bleu":0.13085583,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0381647164,"translation-ar2fa_ar2fa_sahife_bleu":0.0517672982,"translation-ar2fa_ar2fa_nahj_bleu":0.0235396776,"translation-ar2fa_ar2fa_quran_bleu":0.0384559215,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-ar2fa_ar2fa_bleu":0.0056487667,"translation-ar2fa_ar2fa_sahife_bleu":0.0084650778,"translation-ar2fa_ar2fa_nahj_bleu":0.0073044047,"translation-ar2fa_ar2fa_quran_bleu":0.0012595996,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-ar2fa_ar2fa_bleu":0.0228295116,"translation-ar2fa_ar2fa_sahife_bleu":0.024915354,"translation-ar2fa_ar2fa_nahj_bleu":0.0099254821,"translation-ar2fa_ar2fa_quran_bleu":0.0326798966,"nlg_score":0.1151518212}
|
leaderboard/boards_data/translation-en2fa_en2fa.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1745395927,"translation-en2fa_en2fa_epoque_bleu":0.3494300897,"translation-en2fa_en2fa_mizan_bleu":0.1701139909,"translation-en2fa_en2fa_quran_bleu":0.1446863433,"translation-en2fa_en2fa_sahife_bleu":0.0993817781,"translation-en2fa_en2fa_nahj_bleu":0.0486286065,"translation-en2fa_en2fa_tep_bleu":0.0750157717,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0434570993,"translation-en2fa_en2fa_epoque_bleu":0.0864683098,"translation-en2fa_en2fa_mizan_bleu":0.0372126642,"translation-en2fa_en2fa_quran_bleu":0.0580568111,"translation-en2fa_en2fa_sahife_bleu":0.0311184796,"translation-en2fa_en2fa_nahj_bleu":0.0145031404,"translation-en2fa_en2fa_tep_bleu":0.0105327687,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1750457687,"translation-en2fa_en2fa_epoque_bleu":0.3740903807,"translation-en2fa_en2fa_mizan_bleu":0.1593083308,"translation-en2fa_en2fa_quran_bleu":0.1325582833,"translation-en2fa_en2fa_sahife_bleu":0.1002994879,"translation-en2fa_en2fa_nahj_bleu":0.0501235873,"translation-en2fa_en2fa_tep_bleu":0.0652393013,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0281307344,"translation-en2fa_en2fa_epoque_bleu":0.0581118569,"translation-en2fa_en2fa_mizan_bleu":0.0252951725,"translation-en2fa_en2fa_quran_bleu":0.0299637537,"translation-en2fa_en2fa_sahife_bleu":0.0215091569,"translation-en2fa_en2fa_nahj_bleu":0.0081971706,"translation-en2fa_en2fa_tep_bleu":0.0063339652,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1799534349,"translation-en2fa_en2fa_epoque_bleu":0.4004213933,"translation-en2fa_en2fa_mizan_bleu":0.1703393716,"translation-en2fa_en2fa_quran_bleu":0.1225698669,"translation-en2fa_en2fa_sahife_bleu":0.0832764011,"translation-en2fa_en2fa_nahj_bleu":0.0439108113,"translation-en2fa_en2fa_tep_bleu":0.0595417592,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0366912467,"translation-en2fa_en2fa_epoque_bleu":0.0623359898,"translation-en2fa_en2fa_mizan_bleu":0.0442763597,"translation-en2fa_en2fa_quran_bleu":0.0309309044,"translation-en2fa_en2fa_sahife_bleu":0.0330663757,"translation-en2fa_en2fa_nahj_bleu":0.0124767847,"translation-en2fa_en2fa_tep_bleu":0.0116612774,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2018158808,"translation-en2fa_en2fa_epoque_bleu":0.4332944681,"translation-en2fa_en2fa_mizan_bleu":0.1925182751,"translation-en2fa_en2fa_quran_bleu":0.1530925462,"translation-en2fa_en2fa_sahife_bleu":0.1026499453,"translation-en2fa_en2fa_nahj_bleu":0.051968827,"translation-en2fa_en2fa_tep_bleu":0.0708487287,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0600629844,"translation-en2fa_en2fa_epoque_bleu":0.1538216141,"translation-en2fa_en2fa_mizan_bleu":0.0548001026,"translation-en2fa_en2fa_quran_bleu":0.0217436833,"translation-en2fa_en2fa_sahife_bleu":0.0205645274,"translation-en2fa_en2fa_nahj_bleu":0.0172427415,"translation-en2fa_en2fa_tep_bleu":0.0093260061,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1974288311,"translation-en2fa_en2fa_epoque_bleu":0.4102902123,"translation-en2fa_en2fa_mizan_bleu":0.1898606624,"translation-en2fa_en2fa_quran_bleu":0.1638084791,"translation-en2fa_en2fa_sahife_bleu":0.1095493859,"translation-en2fa_en2fa_nahj_bleu":0.0487097316,"translation-en2fa_en2fa_tep_bleu":0.0737497745,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":null,"translation-en2fa_en2fa_epoque_bleu":null,"translation-en2fa_en2fa_mizan_bleu":null,"translation-en2fa_en2fa_quran_bleu":null,"translation-en2fa_en2fa_sahife_bleu":null,"translation-en2fa_en2fa_nahj_bleu":null,"translation-en2fa_en2fa_tep_bleu":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.08817485,"translation-en2fa_en2fa_epoque_bleu":0.1886801725,"translation-en2fa_en2fa_mizan_bleu":0.0879987558,"translation-en2fa_en2fa_quran_bleu":0.0657922023,"translation-en2fa_en2fa_sahife_bleu":0.0296141618,"translation-en2fa_en2fa_nahj_bleu":0.0192266597,"translation-en2fa_en2fa_tep_bleu":0.0366296874,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0439502467,"translation-en2fa_en2fa_epoque_bleu":0.0932804064,"translation-en2fa_en2fa_mizan_bleu":0.0446467932,"translation-en2fa_en2fa_quran_bleu":0.0435800727,"translation-en2fa_en2fa_sahife_bleu":0.0197005921,"translation-en2fa_en2fa_nahj_bleu":0.0132822652,"translation-en2fa_en2fa_tep_bleu":0.0087342692,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1535253787,"translation-en2fa_en2fa_epoque_bleu":0.3553678809,"translation-en2fa_en2fa_mizan_bleu":0.1285441922,"translation-en2fa_en2fa_quran_bleu":0.0857809616,"translation-en2fa_en2fa_sahife_bleu":0.0787025343,"translation-en2fa_en2fa_nahj_bleu":0.0404850935,"translation-en2fa_en2fa_tep_bleu":0.0586129062,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1623218856,"translation-en2fa_en2fa_epoque_bleu":0.3677879105,"translation-en2fa_en2fa_mizan_bleu":0.147599732,"translation-en2fa_en2fa_quran_bleu":0.0938457658,"translation-en2fa_en2fa_sahife_bleu":0.0698903005,"translation-en2fa_en2fa_nahj_bleu":0.0435129812,"translation-en2fa_en2fa_tep_bleu":0.0620337306,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.056370201,"translation-en2fa_en2fa_epoque_bleu":0.1154392548,"translation-en2fa_en2fa_mizan_bleu":0.0484324583,"translation-en2fa_en2fa_quran_bleu":0.0612465488,"translation-en2fa_en2fa_sahife_bleu":0.0466818991,"translation-en2fa_en2fa_nahj_bleu":0.0218444477,"translation-en2fa_en2fa_tep_bleu":0.0118186665,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1458447378,"translation-en2fa_en2fa_epoque_bleu":0.3541508677,"translation-en2fa_en2fa_mizan_bleu":0.1259468635,"translation-en2fa_en2fa_quran_bleu":0.0887225632,"translation-en2fa_en2fa_sahife_bleu":0.0672732746,"translation-en2fa_en2fa_nahj_bleu":0.0407327793,"translation-en2fa_en2fa_tep_bleu":0.0293172873,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.111865825,"translation-en2fa_en2fa_epoque_bleu":0.2732051575,"translation-en2fa_en2fa_mizan_bleu":0.0868500698,"translation-en2fa_en2fa_quran_bleu":0.0529852068,"translation-en2fa_en2fa_sahife_bleu":0.0502293343,"translation-en2fa_en2fa_nahj_bleu":0.0322064942,"translation-en2fa_en2fa_tep_bleu":0.0404507778,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0438887609,"translation-en2fa_en2fa_epoque_bleu":0.0714603918,"translation-en2fa_en2fa_mizan_bleu":0.0595250407,"translation-en2fa_en2fa_quran_bleu":0.0428487402,"translation-en2fa_en2fa_sahife_bleu":0.0258372032,"translation-en2fa_en2fa_nahj_bleu":0.0133722454,"translation-en2fa_en2fa_tep_bleu":0.0142899909,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1217211215,"translation-en2fa_en2fa_epoque_bleu":0.2916268514,"translation-en2fa_en2fa_mizan_bleu":0.091925603,"translation-en2fa_en2fa_quran_bleu":0.065498518,"translation-en2fa_en2fa_sahife_bleu":0.0612237455,"translation-en2fa_en2fa_nahj_bleu":0.0385824628,"translation-en2fa_en2fa_tep_bleu":0.0453883692,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0460704645,"translation-en2fa_en2fa_epoque_bleu":0.1309820272,"translation-en2fa_en2fa_mizan_bleu":0.0316650952,"translation-en2fa_en2fa_quran_bleu":0.0134401079,"translation-en2fa_en2fa_sahife_bleu":0.0141114981,"translation-en2fa_en2fa_nahj_bleu":0.0127654414,"translation-en2fa_en2fa_tep_bleu":0.0065463218,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0951102411,"translation-en2fa_en2fa_epoque_bleu":0.2204131973,"translation-en2fa_en2fa_mizan_bleu":0.0772021612,"translation-en2fa_en2fa_quran_bleu":0.0914129011,"translation-en2fa_en2fa_sahife_bleu":0.0555605793,"translation-en2fa_en2fa_nahj_bleu":0.0296371925,"translation-en2fa_en2fa_tep_bleu":0.0145962694,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.2019260724,"translation-en2fa_en2fa_epoque_bleu":0.4752747269,"translation-en2fa_en2fa_mizan_bleu":0.165706346,"translation-en2fa_en2fa_quran_bleu":0.1194336982,"translation-en2fa_en2fa_sahife_bleu":0.0819129449,"translation-en2fa_en2fa_nahj_bleu":0.0545857968,"translation-en2fa_en2fa_tep_bleu":0.0782996247,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1979467916,"translation-en2fa_en2fa_epoque_bleu":0.4460981632,"translation-en2fa_en2fa_mizan_bleu":0.1745376389,"translation-en2fa_en2fa_quran_bleu":0.137406774,"translation-en2fa_en2fa_sahife_bleu":0.091586235,"translation-en2fa_en2fa_nahj_bleu":0.0490159552,"translation-en2fa_en2fa_tep_bleu":0.072776086,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1745395927,"translation-en2fa_en2fa_epoque_bleu":0.3494300897,"translation-en2fa_en2fa_mizan_bleu":0.1701139909,"translation-en2fa_en2fa_quran_bleu":0.1446863433,"translation-en2fa_en2fa_sahife_bleu":0.0993817781,"translation-en2fa_en2fa_nahj_bleu":0.0486286065,"translation-en2fa_en2fa_tep_bleu":0.0750157717,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2014428857,"translation-en2fa_en2fa_epoque_bleu":0.4710672433,"translation-en2fa_en2fa_mizan_bleu":0.1830885263,"translation-en2fa_en2fa_quran_bleu":0.1141518863,"translation-en2fa_en2fa_sahife_bleu":0.0806159411,"translation-en2fa_en2fa_nahj_bleu":0.0504089542,"translation-en2fa_en2fa_tep_bleu":0.0648627292,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0434570993,"translation-en2fa_en2fa_epoque_bleu":0.0864683098,"translation-en2fa_en2fa_mizan_bleu":0.0372126642,"translation-en2fa_en2fa_quran_bleu":0.0580568111,"translation-en2fa_en2fa_sahife_bleu":0.0311184796,"translation-en2fa_en2fa_nahj_bleu":0.0145031404,"translation-en2fa_en2fa_tep_bleu":0.0105327687,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1718324934,"translation-en2fa_en2fa_epoque_bleu":0.364783925,"translation-en2fa_en2fa_mizan_bleu":0.1532613543,"translation-en2fa_en2fa_quran_bleu":0.1620975016,"translation-en2fa_en2fa_sahife_bleu":0.0967871625,"translation-en2fa_en2fa_nahj_bleu":0.0457580774,"translation-en2fa_en2fa_tep_bleu":0.05756103,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0650794176,"translation-en2fa_en2fa_epoque_bleu":0.1194375779,"translation-en2fa_en2fa_mizan_bleu":0.0640649978,"translation-en2fa_en2fa_quran_bleu":0.0926514743,"translation-en2fa_en2fa_sahife_bleu":0.0392464347,"translation-en2fa_en2fa_nahj_bleu":0.022322883,"translation-en2fa_en2fa_tep_bleu":0.0184227674,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1750457687,"translation-en2fa_en2fa_epoque_bleu":0.3740903807,"translation-en2fa_en2fa_mizan_bleu":0.1593083308,"translation-en2fa_en2fa_quran_bleu":0.1325582833,"translation-en2fa_en2fa_sahife_bleu":0.1002994879,"translation-en2fa_en2fa_nahj_bleu":0.0501235873,"translation-en2fa_en2fa_tep_bleu":0.0652393013,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1506934102,"translation-en2fa_en2fa_epoque_bleu":0.2951032905,"translation-en2fa_en2fa_mizan_bleu":0.1500681264,"translation-en2fa_en2fa_quran_bleu":0.1104277702,"translation-en2fa_en2fa_sahife_bleu":0.092222972,"translation-en2fa_en2fa_nahj_bleu":0.0497623005,"translation-en2fa_en2fa_tep_bleu":0.0692905167,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.1831593088,"translation-en2fa_en2fa_epoque_bleu":0.4052150706,"translation-en2fa_en2fa_mizan_bleu":0.1692823494,"translation-en2fa_en2fa_quran_bleu":0.1400476579,"translation-en2fa_en2fa_sahife_bleu":0.0812805634,"translation-en2fa_en2fa_nahj_bleu":0.048146149,"translation-en2fa_en2fa_tep_bleu":0.0610881446,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1550276898,"translation-en2fa_en2fa_epoque_bleu":0.3721582216,"translation-en2fa_en2fa_mizan_bleu":0.1231599039,"translation-en2fa_en2fa_quran_bleu":0.0882213453,"translation-en2fa_en2fa_sahife_bleu":0.0725213197,"translation-en2fa_en2fa_nahj_bleu":0.0424186358,"translation-en2fa_en2fa_tep_bleu":0.0528718634,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0472831089,"translation-en2fa_en2fa_epoque_bleu":0.0950858392,"translation-en2fa_en2fa_mizan_bleu":0.0348348322,"translation-en2fa_en2fa_quran_bleu":0.0417444578,"translation-en2fa_en2fa_sahife_bleu":0.044168541,"translation-en2fa_en2fa_nahj_bleu":0.0239185439,"translation-en2fa_en2fa_tep_bleu":0.0188699837,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.2099911906,"translation-en2fa_en2fa_epoque_bleu":0.4805793807,"translation-en2fa_en2fa_mizan_bleu":0.1904867707,"translation-en2fa_en2fa_quran_bleu":0.1412389522,"translation-en2fa_en2fa_sahife_bleu":0.0861059288,"translation-en2fa_en2fa_nahj_bleu":0.0528683421,"translation-en2fa_en2fa_tep_bleu":0.0688528109,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0409401575,"translation-en2fa_en2fa_epoque_bleu":0.0902479461,"translation-en2fa_en2fa_mizan_bleu":0.0327725294,"translation-en2fa_en2fa_quran_bleu":0.0443958388,"translation-en2fa_en2fa_sahife_bleu":0.0278897851,"translation-en2fa_en2fa_nahj_bleu":0.0148027555,"translation-en2fa_en2fa_tep_bleu":0.0071499459,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1617787549,"translation-en2fa_en2fa_epoque_bleu":0.3821529147,"translation-en2fa_en2fa_mizan_bleu":0.1337537913,"translation-en2fa_en2fa_quran_bleu":0.0860909143,"translation-en2fa_en2fa_sahife_bleu":0.0770506908,"translation-en2fa_en2fa_nahj_bleu":0.0441728515,"translation-en2fa_en2fa_tep_bleu":0.0587014819,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.1074044673,"translation-en2fa_en2fa_epoque_bleu":0.2689676347,"translation-en2fa_en2fa_mizan_bleu":0.0784179406,"translation-en2fa_en2fa_quran_bleu":0.0573255404,"translation-en2fa_en2fa_sahife_bleu":0.0534655564,"translation-en2fa_en2fa_nahj_bleu":0.0373749355,"translation-en2fa_en2fa_tep_bleu":0.0279497965,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-en2fa_en2fa_bleu":0.0373710986,"translation-en2fa_en2fa_epoque_bleu":0.0773774592,"translation-en2fa_en2fa_mizan_bleu":0.034234366,"translation-en2fa_en2fa_quran_bleu":0.0258474786,"translation-en2fa_en2fa_sahife_bleu":0.0240302635,"translation-en2fa_en2fa_nahj_bleu":0.0149718554,"translation-en2fa_en2fa_tep_bleu":0.0146400693,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-en2fa_en2fa_bleu":0.0281307344,"translation-en2fa_en2fa_epoque_bleu":0.0581118569,"translation-en2fa_en2fa_mizan_bleu":0.0252951725,"translation-en2fa_en2fa_quran_bleu":0.0299637537,"translation-en2fa_en2fa_sahife_bleu":0.0215091569,"translation-en2fa_en2fa_nahj_bleu":0.0081971706,"translation-en2fa_en2fa_tep_bleu":0.0063339652,"nlg_score":0.1151518212}
|
leaderboard/boards_data/translation-fa2ar_fa2ar.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0848646543,"translation-fa2ar_fa2ar_nahj_bleu":0.0378800509,"translation-fa2ar_fa2ar_sahife_bleu":0.0810757988,"translation-fa2ar_fa2ar_quran_bleu":0.1356381134,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0235629204,"translation-fa2ar_fa2ar_nahj_bleu":0.0123939624,"translation-fa2ar_fa2ar_sahife_bleu":0.0198538447,"translation-fa2ar_fa2ar_quran_bleu":0.0384409541,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0341529902,"translation-fa2ar_fa2ar_nahj_bleu":0.0198079243,"translation-fa2ar_fa2ar_sahife_bleu":0.041930434,"translation-fa2ar_fa2ar_quran_bleu":0.0407206123,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0179244872,"translation-fa2ar_fa2ar_nahj_bleu":0.0097189051,"translation-fa2ar_fa2ar_sahife_bleu":0.0109662672,"translation-fa2ar_fa2ar_quran_bleu":0.0330882891,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0761269879,"translation-fa2ar_fa2ar_nahj_bleu":0.0321440801,"translation-fa2ar_fa2ar_sahife_bleu":0.0613632957,"translation-fa2ar_fa2ar_quran_bleu":0.134873588,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0067928767,"translation-fa2ar_fa2ar_nahj_bleu":0.0056689454,"translation-fa2ar_fa2ar_sahife_bleu":0.009024465,"translation-fa2ar_fa2ar_quran_bleu":0.0056852198,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0922998074,"translation-fa2ar_fa2ar_nahj_bleu":0.0511154919,"translation-fa2ar_fa2ar_sahife_bleu":0.0589808221,"translation-fa2ar_fa2ar_quran_bleu":0.1668031083,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0086214593,"translation-fa2ar_fa2ar_nahj_bleu":0.006894051,"translation-fa2ar_fa2ar_sahife_bleu":0.009695506,"translation-fa2ar_fa2ar_quran_bleu":0.0092748209,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0827618418,"translation-fa2ar_fa2ar_nahj_bleu":0.038434531,"translation-fa2ar_fa2ar_sahife_bleu":0.0781455938,"translation-fa2ar_fa2ar_quran_bleu":0.1317054007,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":null,"translation-fa2ar_fa2ar_nahj_bleu":null,"translation-fa2ar_fa2ar_sahife_bleu":null,"translation-fa2ar_fa2ar_quran_bleu":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0098333287,"translation-fa2ar_fa2ar_nahj_bleu":0.0072190824,"translation-fa2ar_fa2ar_sahife_bleu":0.0110570977,"translation-fa2ar_fa2ar_quran_bleu":0.0112238061,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0100630648,"translation-fa2ar_fa2ar_nahj_bleu":0.0071647909,"translation-fa2ar_fa2ar_sahife_bleu":0.0101185743,"translation-fa2ar_fa2ar_quran_bleu":0.0129058292,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0494411806,"translation-fa2ar_fa2ar_nahj_bleu":0.0369805868,"translation-fa2ar_fa2ar_sahife_bleu":0.0567654991,"translation-fa2ar_fa2ar_quran_bleu":0.0545774559,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0507003531,"translation-fa2ar_fa2ar_nahj_bleu":0.0316047659,"translation-fa2ar_fa2ar_sahife_bleu":0.0534488007,"translation-fa2ar_fa2ar_quran_bleu":0.0670474926,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0331262129,"translation-fa2ar_fa2ar_nahj_bleu":0.0202107323,"translation-fa2ar_fa2ar_sahife_bleu":0.0280883311,"translation-fa2ar_fa2ar_quran_bleu":0.0510795752,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0192357288,"translation-fa2ar_fa2ar_nahj_bleu":0.0151369319,"translation-fa2ar_fa2ar_sahife_bleu":0.0245784397,"translation-fa2ar_fa2ar_quran_bleu":0.0179918148,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0058610145,"translation-fa2ar_fa2ar_nahj_bleu":0.0063998692,"translation-fa2ar_fa2ar_sahife_bleu":0.0068172489,"translation-fa2ar_fa2ar_quran_bleu":0.0043275898,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198691873,"translation-fa2ar_fa2ar_nahj_bleu":0.0113771734,"translation-fa2ar_fa2ar_sahife_bleu":0.0154846482,"translation-fa2ar_fa2ar_quran_bleu":0.0327457404,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0351351131,"translation-fa2ar_fa2ar_nahj_bleu":0.0313503027,"translation-fa2ar_fa2ar_sahife_bleu":0.042075565,"translation-fa2ar_fa2ar_quran_bleu":0.0319794715,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0045158138,"translation-fa2ar_fa2ar_nahj_bleu":0.004600061,"translation-fa2ar_fa2ar_sahife_bleu":0.0052362431,"translation-fa2ar_fa2ar_quran_bleu":0.0037111373,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0318976507,"translation-fa2ar_fa2ar_nahj_bleu":0.0222927973,"translation-fa2ar_fa2ar_sahife_bleu":0.0296757253,"translation-fa2ar_fa2ar_quran_bleu":0.0437244293,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0860361971,"translation-fa2ar_fa2ar_nahj_bleu":0.0440530096,"translation-fa2ar_fa2ar_sahife_bleu":0.0833828112,"translation-fa2ar_fa2ar_quran_bleu":0.1306727704,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0689994984,"translation-fa2ar_fa2ar_nahj_bleu":0.0397020785,"translation-fa2ar_fa2ar_sahife_bleu":0.0751264317,"translation-fa2ar_fa2ar_quran_bleu":0.092169985,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0848646543,"translation-fa2ar_fa2ar_nahj_bleu":0.0378800509,"translation-fa2ar_fa2ar_sahife_bleu":0.0810757988,"translation-fa2ar_fa2ar_quran_bleu":0.1356381134,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0652599666,"translation-fa2ar_fa2ar_nahj_bleu":0.0373134355,"translation-fa2ar_fa2ar_sahife_bleu":0.0688517527,"translation-fa2ar_fa2ar_quran_bleu":0.0896147118,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0235629204,"translation-fa2ar_fa2ar_nahj_bleu":0.0123939624,"translation-fa2ar_fa2ar_sahife_bleu":0.0198538447,"translation-fa2ar_fa2ar_quran_bleu":0.0384409541,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.079257203,"translation-fa2ar_fa2ar_nahj_bleu":0.0338415847,"translation-fa2ar_fa2ar_sahife_bleu":0.0570744002,"translation-fa2ar_fa2ar_quran_bleu":0.146855624,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0382543341,"translation-fa2ar_fa2ar_nahj_bleu":0.032191006,"translation-fa2ar_fa2ar_sahife_bleu":0.028980881,"translation-fa2ar_fa2ar_quran_bleu":0.0535911152,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0341529902,"translation-fa2ar_fa2ar_nahj_bleu":0.0198079243,"translation-fa2ar_fa2ar_sahife_bleu":0.041930434,"translation-fa2ar_fa2ar_quran_bleu":0.0407206123,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0852951832,"translation-fa2ar_fa2ar_nahj_bleu":0.0464072569,"translation-fa2ar_fa2ar_sahife_bleu":0.0713426227,"translation-fa2ar_fa2ar_quran_bleu":0.1381356701,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0568324844,"translation-fa2ar_fa2ar_nahj_bleu":0.03267488,"translation-fa2ar_fa2ar_sahife_bleu":0.0579381183,"translation-fa2ar_fa2ar_quran_bleu":0.0798844549,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0423318046,"translation-fa2ar_fa2ar_nahj_bleu":0.0329089717,"translation-fa2ar_fa2ar_sahife_bleu":0.0445101244,"translation-fa2ar_fa2ar_quran_bleu":0.0495763178,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0167121698,"translation-fa2ar_fa2ar_nahj_bleu":0.0182214992,"translation-fa2ar_fa2ar_sahife_bleu":0.0203567578,"translation-fa2ar_fa2ar_quran_bleu":0.0115582526,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0656699851,"translation-fa2ar_fa2ar_nahj_bleu":0.0347167128,"translation-fa2ar_fa2ar_sahife_bleu":0.0732417084,"translation-fa2ar_fa2ar_quran_bleu":0.0890515341,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0198485582,"translation-fa2ar_fa2ar_nahj_bleu":0.0111873845,"translation-fa2ar_fa2ar_sahife_bleu":0.015856468,"translation-fa2ar_fa2ar_quran_bleu":0.032501822,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0483297895,"translation-fa2ar_fa2ar_nahj_bleu":0.0310247441,"translation-fa2ar_fa2ar_sahife_bleu":0.0512375201,"translation-fa2ar_fa2ar_quran_bleu":0.0627271043,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0136530376,"translation-fa2ar_fa2ar_nahj_bleu":0.0110489285,"translation-fa2ar_fa2ar_sahife_bleu":0.0135009036,"translation-fa2ar_fa2ar_quran_bleu":0.0164092807,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2ar_fa2ar_bleu":0.0164489799,"translation-fa2ar_fa2ar_nahj_bleu":0.0152537955,"translation-fa2ar_fa2ar_sahife_bleu":0.0220286512,"translation-fa2ar_fa2ar_quran_bleu":0.012064493,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2ar_fa2ar_bleu":0.0179244872,"translation-fa2ar_fa2ar_nahj_bleu":0.0097189051,"translation-fa2ar_fa2ar_sahife_bleu":0.0109662672,"translation-fa2ar_fa2ar_quran_bleu":0.0330882891,"nlg_score":0.1151518212}
|
leaderboard/boards_data/translation-fa2en_fa2en.jsonl
CHANGED
|
@@ -1,35 +1,35 @@
|
|
| 1 |
-
{"Model Name":"claude-3-7-sonnet-20250219","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022,"nlg_score":0.1779340777}
|
| 2 |
-
{"Model Name":"gemma-3-4b-it","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989,"nlg_score":0.0949943578}
|
| 3 |
-
{"Model Name":"c4ai-command-r-plus","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
|
| 4 |
-
{"Model Name":"gemma-3n-E4B-it","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
|
| 5 |
-
{"Model Name":"gpt-4.1","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
|
| 6 |
-
{"Model Name":"o4-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
| 7 |
-
{"Model Name":"gemma-3-12b-it","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826,"nlg_score":0.1196804312}
|
| 8 |
-
{"Model Name":"gemma-3-27b-it","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184,"nlg_score":0.1067134448}
|
| 9 |
-
{"Model Name":"Qwen3-14B","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118,"nlg_score":0.16056333}
|
| 10 |
-
{"Model Name":"Qwen3-32B","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287,"nlg_score":0.1679338638}
|
| 11 |
-
{"Model Name":"claude-3-5-haiku-20241022","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574,"nlg_score":0.1089333827}
|
| 12 |
-
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339,"nlg_score":0.1319091735}
|
| 13 |
-
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907,"nlg_score":0.112015688}
|
| 14 |
-
{"Model Name":"deepseek-chat","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031,"nlg_score":0.0934094344}
|
| 15 |
-
{"Model Name":"Qwen3-4B","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524,"nlg_score":0.1389297212}
|
| 16 |
-
{"Model Name":"gemma-3-1b-it","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964,"nlg_score":0.0682994522}
|
| 17 |
-
{"Model Name":"aya-expanse-32b","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809,"nlg_score":0.1196400535}
|
| 18 |
-
{"Model Name":"Llama-3.3-70B-Instruct","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279,"nlg_score":0.2010896964}
|
| 19 |
-
{"Model Name":"gpt-4.1-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839,"nlg_score":0.1901206806}
|
| 20 |
-
{"Model Name":"o3","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2370270006,"translation-fa2en_fa2en_tep_bleu":0.1858400323,"translation-fa2en_fa2en_mizan_bleu":0.2008132758,"translation-fa2en_fa2en_quran_bleu":0.1727292787,"translation-fa2en_fa2en_epoque_bleu":0.4507197199,"translation-fa2en_fa2en_nahj_bleu":0.0893284136,"translation-fa2en_fa2en_sahife_bleu":0.1038607373,"nlg_score":0.1764906292}
|
| 21 |
-
{"Model Name":"gpt-4o-mini","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909,"nlg_score":0.1810678527}
|
| 22 |
-
{"Model Name":"c4ai-command-a-03-2025","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0486479291,"translation-fa2en_fa2en_tep_bleu":0.023223206,"translation-fa2en_fa2en_mizan_bleu":0.0397123038,"translation-fa2en_fa2en_quran_bleu":0.0471874873,"translation-fa2en_fa2en_epoque_bleu":0.0972392875,"translation-fa2en_fa2en_nahj_bleu":0.0246695639,"translation-fa2en_fa2en_sahife_bleu":0.0238899949,"nlg_score":0.1137933652}
|
| 23 |
-
{"Model Name":"gemini-2.0-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156,"nlg_score":0.178231145}
|
| 24 |
-
{"Model Name":"gemini-2.5-flash","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039,"nlg_score":0.1368740087}
|
| 25 |
-
{"Model Name":"gemini-2.0-flash-lite","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0858473556,"translation-fa2en_fa2en_tep_bleu":0.0381837678,"translation-fa2en_fa2en_mizan_bleu":0.0750320212,"translation-fa2en_fa2en_quran_bleu":0.0986486354,"translation-fa2en_fa2en_epoque_bleu":0.1513689047,"translation-fa2en_fa2en_nahj_bleu":0.0568182224,"translation-fa2en_fa2en_sahife_bleu":0.0570620784,"nlg_score":0.1659339021}
|
| 26 |
-
{"Model Name":"c4ai-command-r-v01","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759,"nlg_score":0.1641995602}
|
| 27 |
-
{"Model Name":"gpt-4.1-nano","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394,"nlg_score":0.1665903777}
|
| 28 |
-
{"Model Name":"Qwen3-8B","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364,"nlg_score":0.1557270864}
|
| 29 |
-
{"Model Name":"Mistral-7B-Instruct-v0.3","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318,"nlg_score":0.0944140383}
|
| 30 |
-
{"Model Name":"gpt-4o","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494,"nlg_score":0.18964968}
|
| 31 |
-
{"Model Name":"deepseek-reasoner","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908,"nlg_score":0.0880621978}
|
| 32 |
-
{"Model Name":"Qwen3-30B-A3B","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477,"nlg_score":0.164118288}
|
| 33 |
-
{"Model Name":"Llama-3.2-3B-Instruct","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715,"nlg_score":0.1129755187}
|
| 34 |
-
{"Model Name":"Llama-3.2-1B-Instruct","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556,"nlg_score":0.0823387318}
|
| 35 |
-
{"Model Name":"gemini-2.5-pro","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0349431803,"translation-fa2en_fa2en_tep_bleu":0.017140489,"translation-fa2en_fa2en_mizan_bleu":0.0284546797,"translation-fa2en_fa2en_quran_bleu":0.0300397279,"translation-fa2en_fa2en_epoque_bleu":0.0720425155,"translation-fa2en_fa2en_nahj_bleu":0.0166649152,"translation-fa2en_fa2en_sahife_bleu":0.0158679919,"nlg_score":0.1151518212}
|
|
|
|
| 1 |
+
{"Model Name":"claude-3-7-sonnet-20250219","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2247897554,"translation-fa2en_fa2en_tep_bleu":0.1341840946,"translation-fa2en_fa2en_mizan_bleu":0.1909021288,"translation-fa2en_fa2en_quran_bleu":0.1740971535,"translation-fa2en_fa2en_epoque_bleu":0.4544315204,"translation-fa2en_fa2en_nahj_bleu":0.0877235615,"translation-fa2en_fa2en_sahife_bleu":0.0975791022,"nlg_score":0.1779340777}
|
| 2 |
+
{"Model Name":"gemma-3-4b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.016856047,"translation-fa2en_fa2en_tep_bleu":0.0085125001,"translation-fa2en_fa2en_mizan_bleu":0.013661635,"translation-fa2en_fa2en_quran_bleu":0.0181666202,"translation-fa2en_fa2en_epoque_bleu":0.0301282339,"translation-fa2en_fa2en_nahj_bleu":0.0122360126,"translation-fa2en_fa2en_sahife_bleu":0.0110323989,"nlg_score":0.0949943578}
|
| 3 |
+
{"Model Name":"c4ai-command-r-plus","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"104000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2337569687,"translation-fa2en_fa2en_tep_bleu":0.1386371644,"translation-fa2en_fa2en_mizan_bleu":0.2129637469,"translation-fa2en_fa2en_quran_bleu":0.1702102457,"translation-fa2en_fa2en_epoque_bleu":0.478211182,"translation-fa2en_fa2en_nahj_bleu":0.083013513,"translation-fa2en_fa2en_sahife_bleu":0.072000292,"nlg_score":0.1880477876}
|
| 4 |
+
{"Model Name":"gemma-3n-E4B-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7850000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0196181945,"translation-fa2en_fa2en_tep_bleu":0.0101636027,"translation-fa2en_fa2en_mizan_bleu":0.0153753718,"translation-fa2en_fa2en_quran_bleu":0.0231110679,"translation-fa2en_fa2en_epoque_bleu":0.0359429205,"translation-fa2en_fa2en_nahj_bleu":0.0119451943,"translation-fa2en_fa2en_sahife_bleu":0.0117936527,"nlg_score":0.0940241349}
|
| 5 |
+
{"Model Name":"gpt-4.1","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2307102128,"translation-fa2en_fa2en_tep_bleu":0.1527807458,"translation-fa2en_fa2en_mizan_bleu":0.1927067243,"translation-fa2en_fa2en_quran_bleu":0.1628198329,"translation-fa2en_fa2en_epoque_bleu":0.4676472481,"translation-fa2en_fa2en_nahj_bleu":0.0810494281,"translation-fa2en_fa2en_sahife_bleu":0.1009417344,"nlg_score":0.194675133}
|
| 6 |
+
{"Model Name":"o4-mini","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":null,"translation-fa2en_fa2en_tep_bleu":null,"translation-fa2en_fa2en_mizan_bleu":null,"translation-fa2en_fa2en_quran_bleu":null,"translation-fa2en_fa2en_epoque_bleu":null,"translation-fa2en_fa2en_nahj_bleu":null,"translation-fa2en_fa2en_sahife_bleu":null,"nlg_score":null}
|
| 7 |
+
{"Model Name":"gemma-3-12b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"12200000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0257184881,"translation-fa2en_fa2en_tep_bleu":0.011593122,"translation-fa2en_fa2en_mizan_bleu":0.0215328963,"translation-fa2en_fa2en_quran_bleu":0.0262056878,"translation-fa2en_fa2en_epoque_bleu":0.047221295,"translation-fa2en_fa2en_nahj_bleu":0.0178557856,"translation-fa2en_fa2en_sahife_bleu":0.0169922826,"nlg_score":0.1196804312}
|
| 8 |
+
{"Model Name":"gemma-3-27b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"27400000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0199585579,"translation-fa2en_fa2en_tep_bleu":0.0097804397,"translation-fa2en_fa2en_mizan_bleu":0.0144809896,"translation-fa2en_fa2en_quran_bleu":0.0259691427,"translation-fa2en_fa2en_epoque_bleu":0.0345304173,"translation-fa2en_fa2en_nahj_bleu":0.0150589625,"translation-fa2en_fa2en_sahife_bleu":0.0157047184,"nlg_score":0.1067134448}
|
| 9 |
+
{"Model Name":"Qwen3-14B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"14800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2145488085,"translation-fa2en_fa2en_tep_bleu":0.1307272464,"translation-fa2en_fa2en_mizan_bleu":0.1697754862,"translation-fa2en_fa2en_quran_bleu":0.1552415558,"translation-fa2en_fa2en_epoque_bleu":0.4513682579,"translation-fa2en_fa2en_nahj_bleu":0.0842673472,"translation-fa2en_fa2en_sahife_bleu":0.0853787118,"nlg_score":0.16056333}
|
| 10 |
+
{"Model Name":"Qwen3-32B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32800000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.217991447,"translation-fa2en_fa2en_tep_bleu":0.1271542443,"translation-fa2en_fa2en_mizan_bleu":0.1728081337,"translation-fa2en_fa2en_quran_bleu":0.158860515,"translation-fa2en_fa2en_epoque_bleu":0.4572670962,"translation-fa2en_fa2en_nahj_bleu":0.0902445729,"translation-fa2en_fa2en_sahife_bleu":0.0945000287,"nlg_score":0.1679338638}
|
| 11 |
+
{"Model Name":"claude-3-5-haiku-20241022","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0691353117,"translation-fa2en_fa2en_tep_bleu":0.0320908261,"translation-fa2en_fa2en_mizan_bleu":0.0535229905,"translation-fa2en_fa2en_quran_bleu":0.0800143919,"translation-fa2en_fa2en_epoque_bleu":0.133977443,"translation-fa2en_fa2en_nahj_bleu":0.0362958954,"translation-fa2en_fa2en_sahife_bleu":0.0393317574,"nlg_score":0.1089333827}
|
| 12 |
+
{"Model Name":"Mistral-Small-3.1-24B-Instruct-2503","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"24000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1451163884,"translation-fa2en_fa2en_tep_bleu":0.0393307601,"translation-fa2en_fa2en_mizan_bleu":0.1009347025,"translation-fa2en_fa2en_quran_bleu":0.0929688918,"translation-fa2en_fa2en_epoque_bleu":0.3660914464,"translation-fa2en_fa2en_nahj_bleu":0.0536507876,"translation-fa2en_fa2en_sahife_bleu":0.05038339,"nlg_score":0.1319091735}
|
| 13 |
+
{"Model Name":"DeepSeek-R1-0528-Qwen3-8B","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1249013271,"translation-fa2en_fa2en_tep_bleu":0.0564543942,"translation-fa2en_fa2en_mizan_bleu":0.0739643668,"translation-fa2en_fa2en_quran_bleu":0.0677317381,"translation-fa2en_fa2en_epoque_bleu":0.3111968032,"translation-fa2en_fa2en_nahj_bleu":0.0523541092,"translation-fa2en_fa2en_sahife_bleu":0.0479821907,"nlg_score":0.112015688}
|
| 14 |
+
{"Model Name":"deepseek-chat","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0450244679,"translation-fa2en_fa2en_tep_bleu":0.0166138322,"translation-fa2en_fa2en_mizan_bleu":0.0478141187,"translation-fa2en_fa2en_quran_bleu":0.0426202225,"translation-fa2en_fa2en_epoque_bleu":0.0802277942,"translation-fa2en_fa2en_nahj_bleu":0.0252662094,"translation-fa2en_fa2en_sahife_bleu":0.0268950031,"nlg_score":0.0934094344}
|
| 15 |
+
{"Model Name":"Qwen3-4B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"4020000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1840809218,"translation-fa2en_fa2en_tep_bleu":0.1011436783,"translation-fa2en_fa2en_mizan_bleu":0.149157222,"translation-fa2en_fa2en_quran_bleu":0.1377761662,"translation-fa2en_fa2en_epoque_bleu":0.3802946233,"translation-fa2en_fa2en_nahj_bleu":0.0851756367,"translation-fa2en_fa2en_sahife_bleu":0.0857201524,"nlg_score":0.1389297212}
|
| 16 |
+
{"Model Name":"gemma-3-1b-it","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0146059874,"translation-fa2en_fa2en_tep_bleu":0.0065306354,"translation-fa2en_fa2en_mizan_bleu":0.0119363121,"translation-fa2en_fa2en_quran_bleu":0.0152281808,"translation-fa2en_fa2en_epoque_bleu":0.0274143056,"translation-fa2en_fa2en_nahj_bleu":0.0094070307,"translation-fa2en_fa2en_sahife_bleu":0.0093811964,"nlg_score":0.0682994522}
|
| 17 |
+
{"Model Name":"aya-expanse-32b","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"32300000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.14443262,"translation-fa2en_fa2en_tep_bleu":0.0636878051,"translation-fa2en_fa2en_mizan_bleu":0.1045784226,"translation-fa2en_fa2en_quran_bleu":0.1065169191,"translation-fa2en_fa2en_epoque_bleu":0.3331896819,"translation-fa2en_fa2en_nahj_bleu":0.0573420672,"translation-fa2en_fa2en_sahife_bleu":0.0526154809,"nlg_score":0.1196400535}
|
| 18 |
+
{"Model Name":"Llama-3.3-70B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"70600000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2559078555,"translation-fa2en_fa2en_tep_bleu":0.1687480056,"translation-fa2en_fa2en_mizan_bleu":0.2113676707,"translation-fa2en_fa2en_quran_bleu":0.2008290856,"translation-fa2en_fa2en_epoque_bleu":0.5099219192,"translation-fa2en_fa2en_nahj_bleu":0.0984185664,"translation-fa2en_fa2en_sahife_bleu":0.1125739279,"nlg_score":0.2010896964}
|
| 19 |
+
{"Model Name":"gpt-4.1-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2389011537,"translation-fa2en_fa2en_tep_bleu":0.1431825698,"translation-fa2en_fa2en_mizan_bleu":0.2056729072,"translation-fa2en_fa2en_quran_bleu":0.1776018574,"translation-fa2en_fa2en_epoque_bleu":0.4842161688,"translation-fa2en_fa2en_nahj_bleu":0.0886384727,"translation-fa2en_fa2en_sahife_bleu":0.1045044839,"nlg_score":0.1901206806}
|
| 20 |
+
{"Model Name":"o3","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2370270006,"translation-fa2en_fa2en_tep_bleu":0.1858400323,"translation-fa2en_fa2en_mizan_bleu":0.2008132758,"translation-fa2en_fa2en_quran_bleu":0.1727292787,"translation-fa2en_fa2en_epoque_bleu":0.4507197199,"translation-fa2en_fa2en_nahj_bleu":0.0893284136,"translation-fa2en_fa2en_sahife_bleu":0.1038607373,"nlg_score":0.1764906292}
|
| 21 |
+
{"Model Name":"gpt-4o-mini","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2332592983,"translation-fa2en_fa2en_tep_bleu":0.1497847918,"translation-fa2en_fa2en_mizan_bleu":0.1972270386,"translation-fa2en_fa2en_quran_bleu":0.1725699648,"translation-fa2en_fa2en_epoque_bleu":0.4678973942,"translation-fa2en_fa2en_nahj_bleu":0.090543674,"translation-fa2en_fa2en_sahife_bleu":0.1008380909,"nlg_score":0.1810678527}
|
| 22 |
+
{"Model Name":"c4ai-command-a-03-2025","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"111000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0486479291,"translation-fa2en_fa2en_tep_bleu":0.023223206,"translation-fa2en_fa2en_mizan_bleu":0.0397123038,"translation-fa2en_fa2en_quran_bleu":0.0471874873,"translation-fa2en_fa2en_epoque_bleu":0.0972392875,"translation-fa2en_fa2en_nahj_bleu":0.0246695639,"translation-fa2en_fa2en_sahife_bleu":0.0238899949,"nlg_score":0.1137933652}
|
| 23 |
+
{"Model Name":"gemini-2.0-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0757086487,"translation-fa2en_fa2en_tep_bleu":0.0316922994,"translation-fa2en_fa2en_mizan_bleu":0.0530331645,"translation-fa2en_fa2en_quran_bleu":0.1028139165,"translation-fa2en_fa2en_epoque_bleu":0.157367237,"translation-fa2en_fa2en_nahj_bleu":0.0336372263,"translation-fa2en_fa2en_sahife_bleu":0.0279485156,"nlg_score":0.178231145}
|
| 24 |
+
{"Model Name":"gemini-2.5-flash","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0559484689,"translation-fa2en_fa2en_tep_bleu":0.0190401646,"translation-fa2en_fa2en_mizan_bleu":0.0369126121,"translation-fa2en_fa2en_quran_bleu":0.0401048971,"translation-fa2en_fa2en_epoque_bleu":0.1381975553,"translation-fa2en_fa2en_nahj_bleu":0.0232788817,"translation-fa2en_fa2en_sahife_bleu":0.017477039,"nlg_score":0.1368740087}
|
| 25 |
+
{"Model Name":"gemini-2.0-flash-lite","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0858473556,"translation-fa2en_fa2en_tep_bleu":0.0381837678,"translation-fa2en_fa2en_mizan_bleu":0.0750320212,"translation-fa2en_fa2en_quran_bleu":0.0986486354,"translation-fa2en_fa2en_epoque_bleu":0.1513689047,"translation-fa2en_fa2en_nahj_bleu":0.0568182224,"translation-fa2en_fa2en_sahife_bleu":0.0570620784,"nlg_score":0.1659339021}
|
| 26 |
+
{"Model Name":"c4ai-command-r-v01","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"35000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.1892370035,"translation-fa2en_fa2en_tep_bleu":0.1290684643,"translation-fa2en_fa2en_mizan_bleu":0.1721408901,"translation-fa2en_fa2en_quran_bleu":0.1736791408,"translation-fa2en_fa2en_epoque_bleu":0.346100597,"translation-fa2en_fa2en_nahj_bleu":0.0776400174,"translation-fa2en_fa2en_sahife_bleu":0.08279759,"nlg_score":0.1641995602}
|
| 27 |
+
{"Model Name":"gpt-4.1-nano","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.2165819036,"translation-fa2en_fa2en_tep_bleu":0.13491043,"translation-fa2en_fa2en_mizan_bleu":0.1810957829,"translation-fa2en_fa2en_quran_bleu":0.164168601,"translation-fa2en_fa2en_epoque_bleu":0.4383628208,"translation-fa2en_fa2en_nahj_bleu":0.0942939662,"translation-fa2en_fa2en_sahife_bleu":0.0827637394,"nlg_score":0.1665903777}
|
| 28 |
+
{"Model Name":"Qwen3-8B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"8190000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2024225184,"translation-fa2en_fa2en_tep_bleu":0.1163127945,"translation-fa2en_fa2en_mizan_bleu":0.1649009947,"translation-fa2en_fa2en_quran_bleu":0.1513328968,"translation-fa2en_fa2en_epoque_bleu":0.4171232399,"translation-fa2en_fa2en_nahj_bleu":0.0857999462,"translation-fa2en_fa2en_sahife_bleu":0.0929479364,"nlg_score":0.1557270864}
|
| 29 |
+
{"Model Name":"Mistral-7B-Instruct-v0.3","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"7250000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0901939948,"translation-fa2en_fa2en_tep_bleu":0.0521908916,"translation-fa2en_fa2en_mizan_bleu":0.0828690879,"translation-fa2en_fa2en_quran_bleu":0.0756298248,"translation-fa2en_fa2en_epoque_bleu":0.1645619674,"translation-fa2en_fa2en_nahj_bleu":0.048616237,"translation-fa2en_fa2en_sahife_bleu":0.0518842318,"nlg_score":0.0944140383}
|
| 30 |
+
{"Model Name":"gpt-4o","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.234039473,"translation-fa2en_fa2en_tep_bleu":0.1597644653,"translation-fa2en_fa2en_mizan_bleu":0.1946759365,"translation-fa2en_fa2en_quran_bleu":0.1638938233,"translation-fa2en_fa2en_epoque_bleu":0.474760879,"translation-fa2en_fa2en_nahj_bleu":0.0825458621,"translation-fa2en_fa2en_sahife_bleu":0.0952634494,"nlg_score":0.18964968}
|
| 31 |
+
{"Model Name":"deepseek-reasoner","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"671000000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0414094379,"translation-fa2en_fa2en_tep_bleu":0.019539618,"translation-fa2en_fa2en_mizan_bleu":0.0346087447,"translation-fa2en_fa2en_quran_bleu":0.0396858881,"translation-fa2en_fa2en_epoque_bleu":0.0798341141,"translation-fa2en_fa2en_nahj_bleu":0.0244191809,"translation-fa2en_fa2en_sahife_bleu":0.0231626908,"nlg_score":0.0880621978}
|
| 32 |
+
{"Model Name":"Qwen3-30B-A3B","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"30500000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.2177785793,"translation-fa2en_fa2en_tep_bleu":0.1189948472,"translation-fa2en_fa2en_mizan_bleu":0.1793626928,"translation-fa2en_fa2en_quran_bleu":0.1718006478,"translation-fa2en_fa2en_epoque_bleu":0.4500382308,"translation-fa2en_fa2en_nahj_bleu":0.0836776138,"translation-fa2en_fa2en_sahife_bleu":0.1034067477,"nlg_score":0.164118288}
|
| 33 |
+
{"Model Name":"Llama-3.2-3B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"3210000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0862123314,"translation-fa2en_fa2en_tep_bleu":0.0334491641,"translation-fa2en_fa2en_mizan_bleu":0.0758837027,"translation-fa2en_fa2en_quran_bleu":0.0892296624,"translation-fa2en_fa2en_epoque_bleu":0.1688644918,"translation-fa2en_fa2en_nahj_bleu":0.042819328,"translation-fa2en_fa2en_sahife_bleu":0.0473482715,"nlg_score":0.1129755187}
|
| 34 |
+
{"Model Name":"Llama-3.2-1B-Instruct","thinking_method":"❌","model_url":"https:\/\/google.com","parameters_count":"1240000000","source_type":"Open-Source","translation-fa2en_fa2en_bleu":0.0423299736,"translation-fa2en_fa2en_tep_bleu":0.0124774953,"translation-fa2en_fa2en_mizan_bleu":0.0314077643,"translation-fa2en_fa2en_quran_bleu":0.0294898862,"translation-fa2en_fa2en_epoque_bleu":0.1006673489,"translation-fa2en_fa2en_nahj_bleu":0.0117672852,"translation-fa2en_fa2en_sahife_bleu":0.0246608556,"nlg_score":0.0823387318}
|
| 35 |
+
{"Model Name":"gemini-2.5-pro","thinking_method":"✔️","model_url":"https:\/\/google.com","parameters_count":"None","source_type":"Closed-Source","translation-fa2en_fa2en_bleu":0.0349431803,"translation-fa2en_fa2en_tep_bleu":0.017140489,"translation-fa2en_fa2en_mizan_bleu":0.0284546797,"translation-fa2en_fa2en_quran_bleu":0.0300397279,"translation-fa2en_fa2en_epoque_bleu":0.0720425155,"translation-fa2en_fa2en_nahj_bleu":0.0166649152,"translation-fa2en_fa2en_sahife_bleu":0.0158679919,"nlg_score":0.1151518212}
|
leaderboard/leaderboard.py
CHANGED
|
@@ -83,6 +83,7 @@ class LeaderboardApp:
|
|
| 83 |
self._load_global_settings()
|
| 84 |
self._load_model_display_configs()
|
| 85 |
|
|
|
|
| 86 |
def _load_global_settings(self) -> None:
|
| 87 |
if self.config_path and self.config_path.exists():
|
| 88 |
try:
|
|
@@ -593,6 +594,24 @@ class LeaderboardApp:
|
|
| 593 |
if columns_to_drop_existing:
|
| 594 |
processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
|
| 595 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 596 |
if "Rank" in processed_df.columns:
|
| 597 |
# Define the function to replace ranks with medal emojis
|
| 598 |
def format_rank_with_medals(rank_value):
|
|
@@ -612,10 +631,7 @@ class LeaderboardApp:
|
|
| 612 |
|
| 613 |
# Apply the new formatting function to the "Rank" column
|
| 614 |
processed_df["Rank"] = processed_df["Rank"].apply(format_rank_with_medals)
|
| 615 |
-
|
| 616 |
-
cols_order = ["Rank"] + [col for col in processed_df.columns if col != "Rank"]
|
| 617 |
-
processed_df = processed_df[cols_order]
|
| 618 |
-
|
| 619 |
processed_df = processed_df.fillna("")
|
| 620 |
return processed_df
|
| 621 |
|
|
|
|
| 83 |
self._load_global_settings()
|
| 84 |
self._load_model_display_configs()
|
| 85 |
|
| 86 |
+
|
| 87 |
def _load_global_settings(self) -> None:
|
| 88 |
if self.config_path and self.config_path.exists():
|
| 89 |
try:
|
|
|
|
| 594 |
if columns_to_drop_existing:
|
| 595 |
processed_df = processed_df.drop(columns=columns_to_drop_existing, errors='ignore')
|
| 596 |
|
| 597 |
+
# Reorder columns to ensure consistent layout
|
| 598 |
+
cols_order = []
|
| 599 |
+
if "Rank" in processed_df.columns:
|
| 600 |
+
cols_order.append("Rank")
|
| 601 |
+
|
| 602 |
+
model_col = self.model_identifier_column
|
| 603 |
+
if model_col in processed_df.columns:
|
| 604 |
+
cols_order.append(model_col)
|
| 605 |
+
|
| 606 |
+
thinking_col = 'thinking_method'
|
| 607 |
+
if thinking_col in processed_df.columns:
|
| 608 |
+
cols_order.append(thinking_col)
|
| 609 |
+
|
| 610 |
+
remaining_cols = [col for col in processed_df.columns if col not in cols_order]
|
| 611 |
+
cols_order.extend(remaining_cols)
|
| 612 |
+
processed_df = processed_df[cols_order]
|
| 613 |
+
|
| 614 |
+
|
| 615 |
if "Rank" in processed_df.columns:
|
| 616 |
# Define the function to replace ranks with medal emojis
|
| 617 |
def format_rank_with_medals(rank_value):
|
|
|
|
| 631 |
|
| 632 |
# Apply the new formatting function to the "Rank" column
|
| 633 |
processed_df["Rank"] = processed_df["Rank"].apply(format_rank_with_medals)
|
| 634 |
+
|
|
|
|
|
|
|
|
|
|
| 635 |
processed_df = processed_df.fillna("")
|
| 636 |
return processed_df
|
| 637 |
|
leaderboard/leaderboard_config.yaml
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
column_names:
|
| 7 |
# Columns added by the updated refresh.py
|
| 8 |
"Model Name": "Model" # This 'Model Name' is the canonical ID from refresh.py. Its display will be further customized by 'model_display_configs'.
|
|
|
|
| 9 |
"model_url": "Link"
|
| 10 |
"parameters_count": "Parameters"
|
| 11 |
"source_type": "Source Type"
|
|
@@ -68,142 +69,177 @@ model_display_configs:
|
|
| 68 |
"claude-3-7-sonnet-20250219":
|
| 69 |
display_name: "Claude 3.7 Sonnet"
|
| 70 |
url: "https://www.anthropic.com/news/claude-3-7-sonnet"
|
|
|
|
| 71 |
|
| 72 |
"gpt-4.1":
|
| 73 |
display_name: "GPT-4.1"
|
| 74 |
url: "https://openai.com/index/gpt-4-1/"
|
|
|
|
| 75 |
|
| 76 |
"gpt-4o":
|
| 77 |
display_name: "GPT-4o"
|
| 78 |
url: "https://openai.com/index/hello-gpt-4o/"
|
|
|
|
| 79 |
|
| 80 |
"gpt-4.1-mini":
|
| 81 |
display_name: "GPT-4.1 Mini"
|
| 82 |
url: "https://openai.com/index/gpt-4-1/"
|
|
|
|
| 83 |
|
| 84 |
"deepseek-chat":
|
| 85 |
display_name: "DeepSeek-V3"
|
| 86 |
url: "https://api-docs.deepseek.com/"
|
|
|
|
| 87 |
|
| 88 |
"gemma-3-27b-it":
|
| 89 |
display_name: "Gemma 3 27B IT"
|
| 90 |
url: "https://huggingface.co/google/gemma-3-27b-it"
|
|
|
|
| 91 |
|
| 92 |
"gpt-4o-mini":
|
| 93 |
display_name: "GPT-4o Mini"
|
| 94 |
url: "https://openai.com/index/hello-gpt-4o/"
|
|
|
|
| 95 |
|
| 96 |
"Qwen3-32B":
|
| 97 |
display_name: "Qwen3-32B"
|
| 98 |
url: "https://huggingface.co/Qwen/Qwen3-32B"
|
|
|
|
| 99 |
|
| 100 |
"Llama-3.3-70B-Instruct":
|
| 101 |
display_name: "Llama 3.3 70B Instruct"
|
| 102 |
url: "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct"
|
|
|
|
| 103 |
|
| 104 |
"gemma-3-12b-it":
|
| 105 |
display_name: "Gemma 3 12B IT"
|
| 106 |
url: "https://huggingface.co/google/gemma-3-12b-it"
|
|
|
|
| 107 |
|
| 108 |
"Qwen3-14B":
|
| 109 |
display_name: "Qwen3-14B"
|
| 110 |
url: "https://huggingface.co/Qwen/Qwen3-14B"
|
|
|
|
| 111 |
|
| 112 |
"Mistral-Small-3.1-24B-Instruct-2503":
|
| 113 |
display_name: "Mistral Small 3.1 24B Instruct"
|
| 114 |
url: "https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
|
|
|
| 115 |
|
| 116 |
"claude-3-5-haiku-20241022":
|
| 117 |
display_name: "Claude 3.5 Haiku"
|
| 118 |
url: "https://www.anthropic.com/claude/haiku"
|
|
|
|
| 119 |
|
| 120 |
"gpt-4.1-nano":
|
| 121 |
display_name: "GPT-4.1 Nano"
|
| 122 |
url: "https://openai.com/index/gpt-4-1/"
|
|
|
|
| 123 |
|
| 124 |
"Qwen3-8B":
|
| 125 |
display_name: "Qwen3-8B"
|
| 126 |
url: "https://huggingface.co/Qwen/Qwen3-8B"
|
|
|
|
| 127 |
|
| 128 |
"gemma-3-4b-it":
|
| 129 |
display_name: "Gemma 3 4B IT"
|
| 130 |
url: "https://huggingface.co/google/gemma-3-4b-it"
|
|
|
|
| 131 |
|
| 132 |
"aya-expanse-32b":
|
| 133 |
display_name: "Aya Expanse 32B"
|
| 134 |
url: "https://huggingface.co/CohereLabs/aya-expanse-32b"
|
|
|
|
| 135 |
|
| 136 |
"Qwen3-4B":
|
| 137 |
display_name: "Qwen3-4B"
|
| 138 |
url: "https://huggingface.co/Qwen/Qwen3-4B"
|
|
|
|
| 139 |
|
| 140 |
"gemma-3-1b-it":
|
| 141 |
display_name: "Gemma 3 1B IT"
|
| 142 |
url: "https://huggingface.co/google/gemma-3-1b-it"
|
|
|
|
| 143 |
|
| 144 |
"Mistral-7B-Instruct-v0.3":
|
| 145 |
display_name: "Mistral 7B Instruct v0.3"
|
| 146 |
url: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
| 147 |
|
| 148 |
"Llama-3.2-3B-Instruct":
|
| 149 |
display_name: "Llama 3.2 3B Instruct"
|
| 150 |
url: "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct"
|
|
|
|
| 151 |
|
| 152 |
"Llama-3.2-1B-Instruct":
|
| 153 |
display_name: "Llama 3.2 1B Instruct"
|
| 154 |
url: "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct"
|
|
|
|
| 155 |
|
| 156 |
"o4-mini":
|
| 157 |
display_name: "OpenAI o4-mini"
|
| 158 |
url: "https://openai.com/index/introducing-o3-and-o4-mini/"
|
|
|
|
| 159 |
|
| 160 |
"deepseek-reasoner":
|
| 161 |
display_name: "DeepSeek-R1"
|
| 162 |
url: "https://api-docs.deepseek.com/guides/reasoning_model"
|
|
|
|
| 163 |
|
| 164 |
"gemini-2.0-flash":
|
| 165 |
display_name: "Gemini 2.0 Flash"
|
| 166 |
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash"
|
|
|
|
| 167 |
|
| 168 |
"gemini-2.5-flash":
|
| 169 |
display_name: "Gemini 2.5 Flash"
|
| 170 |
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash"
|
|
|
|
| 171 |
|
| 172 |
"Qwen3-30B-A3B":
|
| 173 |
display_name: "Qwen3-30B-A3B"
|
| 174 |
url: "https://huggingface.co/Qwen/Qwen3-30B-A3B"
|
|
|
|
| 175 |
|
| 176 |
"c4ai-command-r-plus":
|
| 177 |
display_name: "Command R Plus"
|
| 178 |
url: "https://huggingface.co/CohereLabs/c4ai-command-r-plus"
|
|
|
|
| 179 |
|
| 180 |
"c4ai-command-r-v01":
|
| 181 |
display_name: "Command R v01"
|
| 182 |
url: "https://huggingface.co/CohereLabs/c4ai-command-r-v01"
|
|
|
|
| 183 |
|
| 184 |
"c4ai-command-a-03-2025":
|
| 185 |
display_name: "Command A"
|
| 186 |
url: "https://huggingface.co/CohereLabs/c4ai-command-a-03-2025"
|
|
|
|
| 187 |
|
| 188 |
"gemini-2.0-flash-lite":
|
| 189 |
display_name: "Gemini 2.0 Flash-Lite"
|
| 190 |
url: "https://deepmind.google/models/gemini/flash-lite/"
|
|
|
|
| 191 |
|
| 192 |
"o3":
|
| 193 |
display_name: "OpenAI o3"
|
| 194 |
url: "https://openai.com/index/introducing-o3-and-o4-mini/"
|
|
|
|
| 195 |
|
| 196 |
"gemma-3n-E4B-it":
|
| 197 |
display_name: "Gemma 3n E4B IT"
|
| 198 |
url: "https://huggingface.co/google/gemma-3n-E4B-it"
|
|
|
|
| 199 |
|
| 200 |
"gemini-2.5-pro":
|
| 201 |
display_name: "Gemini 2.5 Pro"
|
| 202 |
url: "https://deepmind.google/models/gemini/pro/"
|
|
|
|
| 203 |
|
| 204 |
"DeepSeek-R1-0528-Qwen3-8B":
|
| 205 |
display_name: "DeepSeek-R1 Qwen3-8B"
|
| 206 |
url: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
|
|
|
|
| 207 |
|
| 208 |
# Add one entry for each model whose display name or URL you want to customize.
|
| 209 |
# If a model ID from your data is not listed here, its raw ID will be used as its name.
|
|
|
|
| 6 |
column_names:
|
| 7 |
# Columns added by the updated refresh.py
|
| 8 |
"Model Name": "Model" # This 'Model Name' is the canonical ID from refresh.py. Its display will be further customized by 'model_display_configs'.
|
| 9 |
+
"thinking_method": "🧠 Thinking"
|
| 10 |
"model_url": "Link"
|
| 11 |
"parameters_count": "Parameters"
|
| 12 |
"source_type": "Source Type"
|
|
|
|
| 69 |
"claude-3-7-sonnet-20250219":
|
| 70 |
display_name: "Claude 3.7 Sonnet"
|
| 71 |
url: "https://www.anthropic.com/news/claude-3-7-sonnet"
|
| 72 |
+
thinking: "❌" # Sure
|
| 73 |
|
| 74 |
"gpt-4.1":
|
| 75 |
display_name: "GPT-4.1"
|
| 76 |
url: "https://openai.com/index/gpt-4-1/"
|
| 77 |
+
thinking: "❌" # Sure
|
| 78 |
|
| 79 |
"gpt-4o":
|
| 80 |
display_name: "GPT-4o"
|
| 81 |
url: "https://openai.com/index/hello-gpt-4o/"
|
| 82 |
+
thinking: "❌" # Sure
|
| 83 |
|
| 84 |
"gpt-4.1-mini":
|
| 85 |
display_name: "GPT-4.1 Mini"
|
| 86 |
url: "https://openai.com/index/gpt-4-1/"
|
| 87 |
+
thinking: "❌" # Sure
|
| 88 |
|
| 89 |
"deepseek-chat":
|
| 90 |
display_name: "DeepSeek-V3"
|
| 91 |
url: "https://api-docs.deepseek.com/"
|
| 92 |
+
thinking: "❌" # Sure
|
| 93 |
|
| 94 |
"gemma-3-27b-it":
|
| 95 |
display_name: "Gemma 3 27B IT"
|
| 96 |
url: "https://huggingface.co/google/gemma-3-27b-it"
|
| 97 |
+
thinking: "❌" # Sure
|
| 98 |
|
| 99 |
"gpt-4o-mini":
|
| 100 |
display_name: "GPT-4o Mini"
|
| 101 |
url: "https://openai.com/index/hello-gpt-4o/"
|
| 102 |
+
thinking: "❌" # Sure
|
| 103 |
|
| 104 |
"Qwen3-32B":
|
| 105 |
display_name: "Qwen3-32B"
|
| 106 |
url: "https://huggingface.co/Qwen/Qwen3-32B"
|
| 107 |
+
thinking: "❌" # Sure
|
| 108 |
|
| 109 |
"Llama-3.3-70B-Instruct":
|
| 110 |
display_name: "Llama 3.3 70B Instruct"
|
| 111 |
url: "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct"
|
| 112 |
+
thinking: "❌" # Sure
|
| 113 |
|
| 114 |
"gemma-3-12b-it":
|
| 115 |
display_name: "Gemma 3 12B IT"
|
| 116 |
url: "https://huggingface.co/google/gemma-3-12b-it"
|
| 117 |
+
thinking: "❌" # Sure
|
| 118 |
|
| 119 |
"Qwen3-14B":
|
| 120 |
display_name: "Qwen3-14B"
|
| 121 |
url: "https://huggingface.co/Qwen/Qwen3-14B"
|
| 122 |
+
thinking: "❌" # Sure
|
| 123 |
|
| 124 |
"Mistral-Small-3.1-24B-Instruct-2503":
|
| 125 |
display_name: "Mistral Small 3.1 24B Instruct"
|
| 126 |
url: "https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503"
|
| 127 |
+
thinking: "❌" # Sure
|
| 128 |
|
| 129 |
"claude-3-5-haiku-20241022":
|
| 130 |
display_name: "Claude 3.5 Haiku"
|
| 131 |
url: "https://www.anthropic.com/claude/haiku"
|
| 132 |
+
thinking: "❌" # Sure
|
| 133 |
|
| 134 |
"gpt-4.1-nano":
|
| 135 |
display_name: "GPT-4.1 Nano"
|
| 136 |
url: "https://openai.com/index/gpt-4-1/"
|
| 137 |
+
thinking: "❌" # Sure
|
| 138 |
|
| 139 |
"Qwen3-8B":
|
| 140 |
display_name: "Qwen3-8B"
|
| 141 |
url: "https://huggingface.co/Qwen/Qwen3-8B"
|
| 142 |
+
thinking: "❌" # Sure
|
| 143 |
|
| 144 |
"gemma-3-4b-it":
|
| 145 |
display_name: "Gemma 3 4B IT"
|
| 146 |
url: "https://huggingface.co/google/gemma-3-4b-it"
|
| 147 |
+
thinking: "❌" # Sure
|
| 148 |
|
| 149 |
"aya-expanse-32b":
|
| 150 |
display_name: "Aya Expanse 32B"
|
| 151 |
url: "https://huggingface.co/CohereLabs/aya-expanse-32b"
|
| 152 |
+
thinking: "❌" # Sure
|
| 153 |
|
| 154 |
"Qwen3-4B":
|
| 155 |
display_name: "Qwen3-4B"
|
| 156 |
url: "https://huggingface.co/Qwen/Qwen3-4B"
|
| 157 |
+
thinking: "❌" # Sure
|
| 158 |
|
| 159 |
"gemma-3-1b-it":
|
| 160 |
display_name: "Gemma 3 1B IT"
|
| 161 |
url: "https://huggingface.co/google/gemma-3-1b-it"
|
| 162 |
+
thinking: "❌" # Sure
|
| 163 |
|
| 164 |
"Mistral-7B-Instruct-v0.3":
|
| 165 |
display_name: "Mistral 7B Instruct v0.3"
|
| 166 |
url: "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3"
|
| 167 |
+
thinking: "❌" # Sure
|
| 168 |
|
| 169 |
"Llama-3.2-3B-Instruct":
|
| 170 |
display_name: "Llama 3.2 3B Instruct"
|
| 171 |
url: "https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct"
|
| 172 |
+
thinking: "❌" # Sure
|
| 173 |
|
| 174 |
"Llama-3.2-1B-Instruct":
|
| 175 |
display_name: "Llama 3.2 1B Instruct"
|
| 176 |
url: "https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct"
|
| 177 |
+
thinking: "❌" # Sure
|
| 178 |
|
| 179 |
"o4-mini":
|
| 180 |
display_name: "OpenAI o4-mini"
|
| 181 |
url: "https://openai.com/index/introducing-o3-and-o4-mini/"
|
| 182 |
+
thinking: "✔️" # Sure
|
| 183 |
|
| 184 |
"deepseek-reasoner":
|
| 185 |
display_name: "DeepSeek-R1"
|
| 186 |
url: "https://api-docs.deepseek.com/guides/reasoning_model"
|
| 187 |
+
thinking: "✔️" # Sure
|
| 188 |
|
| 189 |
"gemini-2.0-flash":
|
| 190 |
display_name: "Gemini 2.0 Flash"
|
| 191 |
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash"
|
| 192 |
+
thinking: "✔️" # Sure
|
| 193 |
|
| 194 |
"gemini-2.5-flash":
|
| 195 |
display_name: "Gemini 2.5 Flash"
|
| 196 |
url: "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash"
|
| 197 |
+
thinking: "✔️" # Sure
|
| 198 |
|
| 199 |
"Qwen3-30B-A3B":
|
| 200 |
display_name: "Qwen3-30B-A3B"
|
| 201 |
url: "https://huggingface.co/Qwen/Qwen3-30B-A3B"
|
| 202 |
+
thinking: "❌" # Sure
|
| 203 |
|
| 204 |
"c4ai-command-r-plus":
|
| 205 |
display_name: "Command R Plus"
|
| 206 |
url: "https://huggingface.co/CohereLabs/c4ai-command-r-plus"
|
| 207 |
+
thinking: "❌" # Sure
|
| 208 |
|
| 209 |
"c4ai-command-r-v01":
|
| 210 |
display_name: "Command R v01"
|
| 211 |
url: "https://huggingface.co/CohereLabs/c4ai-command-r-v01"
|
| 212 |
+
thinking: "❌" # Sure
|
| 213 |
|
| 214 |
"c4ai-command-a-03-2025":
|
| 215 |
display_name: "Command A"
|
| 216 |
url: "https://huggingface.co/CohereLabs/c4ai-command-a-03-2025"
|
| 217 |
+
thinking: "❌"
|
| 218 |
|
| 219 |
"gemini-2.0-flash-lite":
|
| 220 |
display_name: "Gemini 2.0 Flash-Lite"
|
| 221 |
url: "https://deepmind.google/models/gemini/flash-lite/"
|
| 222 |
+
thinking: "❌" # Sure
|
| 223 |
|
| 224 |
"o3":
|
| 225 |
display_name: "OpenAI o3"
|
| 226 |
url: "https://openai.com/index/introducing-o3-and-o4-mini/"
|
| 227 |
+
thinking: "✔️" # Sure
|
| 228 |
|
| 229 |
"gemma-3n-E4B-it":
|
| 230 |
display_name: "Gemma 3n E4B IT"
|
| 231 |
url: "https://huggingface.co/google/gemma-3n-E4B-it"
|
| 232 |
+
thinking: "❌" # Sure
|
| 233 |
|
| 234 |
"gemini-2.5-pro":
|
| 235 |
display_name: "Gemini 2.5 Pro"
|
| 236 |
url: "https://deepmind.google/models/gemini/pro/"
|
| 237 |
+
thinking: "✔️" # Sure
|
| 238 |
|
| 239 |
"DeepSeek-R1-0528-Qwen3-8B":
|
| 240 |
display_name: "DeepSeek-R1 Qwen3-8B"
|
| 241 |
url: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"
|
| 242 |
+
thinking: "✔️" # Sure
|
| 243 |
|
| 244 |
# Add one entry for each model whose display name or URL you want to customize.
|
| 245 |
# If a model ID from your data is not listed here, its raw ID will be used as its name.
|
leaderboard/refresh.py
CHANGED
|
@@ -29,7 +29,7 @@ TEMPLATE_FOLDER = SCRIPT_DIR / "template_jsons"
|
|
| 29 |
NLU_NLG_TASK_KEYS = ["persian_nlu", "persian_nlg"]
|
| 30 |
|
| 31 |
ALL_LEADERBOARD_COLUMNS = [
|
| 32 |
-
'Model Name', 'model_url', 'parameters_count', 'source_type', 'Average',
|
| 33 |
'Persian IFEval', 'Persian MT-Bench', "PerMMLU",
|
| 34 |
"PerCoR", "Persian NLU", "Persian NLG"
|
| 35 |
]
|
|
@@ -74,6 +74,14 @@ class ModelEvaluationProcessor:
|
|
| 74 |
if not self.tasks_config:
|
| 75 |
logger.error("Tasks config is empty. Processing might be affected.")
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
self.main_scores_map = {
|
| 78 |
"ifeval": "strict_instruction_accuracy",
|
| 79 |
"mt_bench": "score_mean",
|
|
@@ -102,9 +110,6 @@ class ModelEvaluationProcessor:
|
|
| 102 |
merged[k] = self._deep_override(v_base, override[k])
|
| 103 |
else:
|
| 104 |
merged[k] = v_base
|
| 105 |
-
# for k, v_override in override.items():
|
| 106 |
-
# if k not in merged:
|
| 107 |
-
# merged[k] = v_override
|
| 108 |
return merged
|
| 109 |
elif override is not None and override != -1:
|
| 110 |
return override
|
|
@@ -188,7 +193,7 @@ class ModelEvaluationProcessor:
|
|
| 188 |
if subtask_output_path.exists():
|
| 189 |
existing_df = pd.read_json(subtask_output_path, lines=True)
|
| 190 |
if not existing_df.empty and 'Model Name' in existing_df.columns:
|
| 191 |
-
|
| 192 |
|
| 193 |
current_entries.append(row_data)
|
| 194 |
updated_df = pd.DataFrame(current_entries)
|
|
@@ -198,8 +203,12 @@ class ModelEvaluationProcessor:
|
|
| 198 |
logger.error(f"Error updating subtask file {subtask_output_path} for parent {parent_task_key_for_log}: {e}")
|
| 199 |
def process_nlu_nlg_subtasks(self, model_details: Dict[str, Any], model_folder_name: str, canonical_model_name: str) -> None:
|
| 200 |
|
|
|
|
|
|
|
|
|
|
| 201 |
common_subtask_model_info = {
|
| 202 |
"Model Name": canonical_model_name,
|
|
|
|
| 203 |
"model_url": model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https://google.com'))),
|
| 204 |
"parameters_count": str(model_details.get('n_parameters', "N/A")),
|
| 205 |
"source_type": "Closed-Source" # Default, will be refined
|
|
@@ -255,8 +264,8 @@ class ModelEvaluationProcessor:
|
|
| 255 |
model_details = json.load(f)
|
| 256 |
|
| 257 |
canonical_model_name = model_details.get('name_for_leaderboard',
|
| 258 |
-
|
| 259 |
-
|
| 260 |
model_url = model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https_google.com')))
|
| 261 |
if not model_url: model_url = 'https_google.com'
|
| 262 |
|
|
@@ -269,9 +278,12 @@ class ModelEvaluationProcessor:
|
|
| 269 |
if isinstance(parameters_count_raw, (int, float)) and parameters_count_raw > 0:
|
| 270 |
is_open_source_candidate = True
|
| 271 |
elif isinstance(parameters_count_raw, str) and \
|
| 272 |
-
|
| 273 |
is_open_source_candidate = True
|
| 274 |
source_type = "Open-Source" if is_open_source_candidate else "Closed-Source"
|
|
|
|
|
|
|
|
|
|
| 275 |
|
| 276 |
except Exception as e:
|
| 277 |
logger.error(f"Error loading/parsing model info from {model_info_file}: {e}. Skipping '{model_folder_name}'.")
|
|
@@ -281,6 +293,7 @@ class ModelEvaluationProcessor:
|
|
| 281 |
|
| 282 |
current_model_scores_for_summary: Dict[str, Any] = {
|
| 283 |
"Model Name": canonical_model_name,
|
|
|
|
| 284 |
"model_url": model_url,
|
| 285 |
"parameters_count": parameters_count_display,
|
| 286 |
"source_type": source_type
|
|
@@ -291,6 +304,7 @@ class ModelEvaluationProcessor:
|
|
| 291 |
main_score_metric_name = self.main_scores_map.get(task_key)
|
| 292 |
task_data_entry_for_specific_jsonl: Dict[str, Any] = {
|
| 293 |
"Model Name": canonical_model_name,
|
|
|
|
| 294 |
"model_url": model_url,
|
| 295 |
"parameters_count": parameters_count_display,
|
| 296 |
"source_type": source_type
|
|
@@ -303,11 +317,11 @@ class ModelEvaluationProcessor:
|
|
| 303 |
if main_score_metric_name and main_score_metric_name in task_specific_results:
|
| 304 |
score_value = task_specific_results[main_score_metric_name]
|
| 305 |
if task_key == "mt_bench" and score_value is not None:
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
current_model_scores_for_summary[task_display_name] = score_value
|
| 312 |
elif main_score_metric_name:
|
| 313 |
logger.warning(f"Main score metric '{main_score_metric_name}' for task '{task_key}' (Display: {task_display_name}) not found for model '{canonical_model_name}'. Will be NA.")
|
|
@@ -331,13 +345,12 @@ class ModelEvaluationProcessor:
|
|
| 331 |
if not df.empty and main_score_col and main_score_col in df.columns:
|
| 332 |
try:
|
| 333 |
df[main_score_col] = pd.to_numeric(df[main_score_col], errors='coerce')
|
| 334 |
-
# Sort by main score (NaNs will go last or first depending on na_position, default is last)
|
| 335 |
df = df.sort_values(by=main_score_col, ascending=False, na_position='last')
|
| 336 |
except Exception as e:
|
| 337 |
logger.warning(f"Could not sort dataframe for task {task_key} by score {main_score_col}: {e}")
|
| 338 |
final_dataframes[task_key] = df
|
| 339 |
if df.empty:
|
| 340 |
-
|
| 341 |
|
| 342 |
if all_models_summary_data:
|
| 343 |
all_df = pd.DataFrame(all_models_summary_data)
|
|
@@ -353,9 +366,7 @@ class ModelEvaluationProcessor:
|
|
| 353 |
logger.warning(f"Column '{task_display_name_for_avg}' for averaging in 'all' table is not numeric or all NaN. Excluding from average calculation and setting to NA.")
|
| 354 |
if score_cols_for_average:
|
| 355 |
try:
|
| 356 |
-
# Calculate mean; it will be NaN if any constituent score for a row is NaN.
|
| 357 |
all_df["Average"] = all_df[score_cols_for_average].mean(axis=1, skipna=False)
|
| 358 |
-
# Round only non-NaN averages
|
| 359 |
all_df.loc[all_df["Average"].notna(), "Average"] = all_df.loc[all_df["Average"].notna(), "Average"].round(4)
|
| 360 |
except Exception as e:
|
| 361 |
logger.error(f"Error calculating 'Average' for 'all' table: {e}. Average column might be NA or incorrect.")
|
|
@@ -364,9 +375,7 @@ class ModelEvaluationProcessor:
|
|
| 364 |
logger.warning("No valid numeric score columns found to calculate 'Average' for 'all' table.")
|
| 365 |
all_df["Average"] = pd.NA # Assign pd.NA if no columns to average
|
| 366 |
|
| 367 |
-
# Sort 'all' table by Average (NaNs will be placed last by default with ascending=False)
|
| 368 |
if "Average" in all_df.columns: # Check if 'Average' column exists
|
| 369 |
-
# NaNs are typically sorted to the end by default when ascending=False or na_position='last'
|
| 370 |
all_df = all_df.sort_values(by="Average", ascending=False, na_position='last')
|
| 371 |
|
| 372 |
|
|
|
|
| 29 |
NLU_NLG_TASK_KEYS = ["persian_nlu", "persian_nlg"]
|
| 30 |
|
| 31 |
ALL_LEADERBOARD_COLUMNS = [
|
| 32 |
+
'Model Name', 'thinking_method', 'model_url', 'parameters_count', 'source_type', 'Average',
|
| 33 |
'Persian IFEval', 'Persian MT-Bench', "PerMMLU",
|
| 34 |
"PerCoR", "Persian NLU", "Persian NLG"
|
| 35 |
]
|
|
|
|
| 74 |
if not self.tasks_config:
|
| 75 |
logger.error("Tasks config is empty. Processing might be affected.")
|
| 76 |
|
| 77 |
+
self.model_display_configs: Dict[str, Dict[str, Any]] = {}
|
| 78 |
+
try:
|
| 79 |
+
with open(CONFIG_FILE_PATH, 'r', encoding='utf-8') as f:
|
| 80 |
+
config_data = yaml.safe_load(f)
|
| 81 |
+
self.model_display_configs = config_data.get('model_display_configs', {})
|
| 82 |
+
except Exception as e:
|
| 83 |
+
logger.error(f"Error loading model_display_configs from {CONFIG_FILE_PATH}: {e}")
|
| 84 |
+
|
| 85 |
self.main_scores_map = {
|
| 86 |
"ifeval": "strict_instruction_accuracy",
|
| 87 |
"mt_bench": "score_mean",
|
|
|
|
| 110 |
merged[k] = self._deep_override(v_base, override[k])
|
| 111 |
else:
|
| 112 |
merged[k] = v_base
|
|
|
|
|
|
|
|
|
|
| 113 |
return merged
|
| 114 |
elif override is not None and override != -1:
|
| 115 |
return override
|
|
|
|
| 193 |
if subtask_output_path.exists():
|
| 194 |
existing_df = pd.read_json(subtask_output_path, lines=True)
|
| 195 |
if not existing_df.empty and 'Model Name' in existing_df.columns:
|
| 196 |
+
current_entries = existing_df[existing_df['Model Name'] != row_data['Model Name']].to_dict(orient='records')
|
| 197 |
|
| 198 |
current_entries.append(row_data)
|
| 199 |
updated_df = pd.DataFrame(current_entries)
|
|
|
|
| 203 |
logger.error(f"Error updating subtask file {subtask_output_path} for parent {parent_task_key_for_log}: {e}")
|
| 204 |
def process_nlu_nlg_subtasks(self, model_details: Dict[str, Any], model_folder_name: str, canonical_model_name: str) -> None:
|
| 205 |
|
| 206 |
+
model_config = self.model_display_configs.get(canonical_model_name, {})
|
| 207 |
+
thinking_method = model_config.get('thinking', 'N/A')
|
| 208 |
+
|
| 209 |
common_subtask_model_info = {
|
| 210 |
"Model Name": canonical_model_name,
|
| 211 |
+
"thinking_method": thinking_method,
|
| 212 |
"model_url": model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https://google.com'))),
|
| 213 |
"parameters_count": str(model_details.get('n_parameters', "N/A")),
|
| 214 |
"source_type": "Closed-Source" # Default, will be refined
|
|
|
|
| 264 |
model_details = json.load(f)
|
| 265 |
|
| 266 |
canonical_model_name = model_details.get('name_for_leaderboard',
|
| 267 |
+
model_details.get('model_hf_id',
|
| 268 |
+
model_details.get('name', model_folder_name)))
|
| 269 |
model_url = model_details.get('model_url', model_details.get('link', model_details.get('homepage', 'https_google.com')))
|
| 270 |
if not model_url: model_url = 'https_google.com'
|
| 271 |
|
|
|
|
| 278 |
if isinstance(parameters_count_raw, (int, float)) and parameters_count_raw > 0:
|
| 279 |
is_open_source_candidate = True
|
| 280 |
elif isinstance(parameters_count_raw, str) and \
|
| 281 |
+
str(parameters_count_raw).strip().lower() not in ["", "n/a", "unknown", "private", "confidential", "tbd", "null", "closed"]:
|
| 282 |
is_open_source_candidate = True
|
| 283 |
source_type = "Open-Source" if is_open_source_candidate else "Closed-Source"
|
| 284 |
+
|
| 285 |
+
model_config = self.model_display_configs.get(canonical_model_name, {})
|
| 286 |
+
thinking_method = model_config.get('thinking', 'N/A')
|
| 287 |
|
| 288 |
except Exception as e:
|
| 289 |
logger.error(f"Error loading/parsing model info from {model_info_file}: {e}. Skipping '{model_folder_name}'.")
|
|
|
|
| 293 |
|
| 294 |
current_model_scores_for_summary: Dict[str, Any] = {
|
| 295 |
"Model Name": canonical_model_name,
|
| 296 |
+
"thinking_method": thinking_method,
|
| 297 |
"model_url": model_url,
|
| 298 |
"parameters_count": parameters_count_display,
|
| 299 |
"source_type": source_type
|
|
|
|
| 304 |
main_score_metric_name = self.main_scores_map.get(task_key)
|
| 305 |
task_data_entry_for_specific_jsonl: Dict[str, Any] = {
|
| 306 |
"Model Name": canonical_model_name,
|
| 307 |
+
"thinking_method": thinking_method,
|
| 308 |
"model_url": model_url,
|
| 309 |
"parameters_count": parameters_count_display,
|
| 310 |
"source_type": source_type
|
|
|
|
| 317 |
if main_score_metric_name and main_score_metric_name in task_specific_results:
|
| 318 |
score_value = task_specific_results[main_score_metric_name]
|
| 319 |
if task_key == "mt_bench" and score_value is not None:
|
| 320 |
+
try:
|
| 321 |
+
score_value = float(score_value) / 10.0
|
| 322 |
+
except (ValueError, TypeError):
|
| 323 |
+
logger.warning(f"Could not convert mt_bench score '{score_value}' to float for division for model {canonical_model_name}")
|
| 324 |
+
score_value = pd.NA
|
| 325 |
current_model_scores_for_summary[task_display_name] = score_value
|
| 326 |
elif main_score_metric_name:
|
| 327 |
logger.warning(f"Main score metric '{main_score_metric_name}' for task '{task_key}' (Display: {task_display_name}) not found for model '{canonical_model_name}'. Will be NA.")
|
|
|
|
| 345 |
if not df.empty and main_score_col and main_score_col in df.columns:
|
| 346 |
try:
|
| 347 |
df[main_score_col] = pd.to_numeric(df[main_score_col], errors='coerce')
|
|
|
|
| 348 |
df = df.sort_values(by=main_score_col, ascending=False, na_position='last')
|
| 349 |
except Exception as e:
|
| 350 |
logger.warning(f"Could not sort dataframe for task {task_key} by score {main_score_col}: {e}")
|
| 351 |
final_dataframes[task_key] = df
|
| 352 |
if df.empty:
|
| 353 |
+
logger.warning(f"No data processed for task '{task_key}'. Resulting DataFrame is empty.")
|
| 354 |
|
| 355 |
if all_models_summary_data:
|
| 356 |
all_df = pd.DataFrame(all_models_summary_data)
|
|
|
|
| 366 |
logger.warning(f"Column '{task_display_name_for_avg}' for averaging in 'all' table is not numeric or all NaN. Excluding from average calculation and setting to NA.")
|
| 367 |
if score_cols_for_average:
|
| 368 |
try:
|
|
|
|
| 369 |
all_df["Average"] = all_df[score_cols_for_average].mean(axis=1, skipna=False)
|
|
|
|
| 370 |
all_df.loc[all_df["Average"].notna(), "Average"] = all_df.loc[all_df["Average"].notna(), "Average"].round(4)
|
| 371 |
except Exception as e:
|
| 372 |
logger.error(f"Error calculating 'Average' for 'all' table: {e}. Average column might be NA or incorrect.")
|
|
|
|
| 375 |
logger.warning("No valid numeric score columns found to calculate 'Average' for 'all' table.")
|
| 376 |
all_df["Average"] = pd.NA # Assign pd.NA if no columns to average
|
| 377 |
|
|
|
|
| 378 |
if "Average" in all_df.columns: # Check if 'Average' column exists
|
|
|
|
| 379 |
all_df = all_df.sort_values(by="Average", ascending=False, na_position='last')
|
| 380 |
|
| 381 |
|