bluebench

Running

jbnayahu commited on Jun 11

Commit

ab870dd

unverified ·

1 Parent(s): d83a397

Updated list of bluebench tasks (with contributions by [email protected])

Files changed (1) hide show

src/about.py CHANGED Viewed

@@ -12,8 +12,22 @@ class Task:
 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
-    task0 = Task("anli_r1", "acc", "ANLI")
-    task1 = Task("logiqa", "acc_norm", "LogiQA")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------

 # ---------------------------------------------------
 class Tasks(Enum):
     # task_key in the json file, metric_key in the json file, name to display in the leaderboard
+#    task0 = Task("anli_r1", "acc", "ANLI")
+#    task1 = Task("logiqa", "acc_norm", "LogiQA")
+    task0 = Task("bias", "score", "Bias")
+    task1 = Task("chatbot_abilities", "score", "Chatbot Abilities")
+    task2 = Task("entity_extraction", "score", "Entity Extraction")
+    task3 = Task("knowledge", "score", "Knowledge")
+    task4 = Task("legal", "score", "Legal")
+    task5 = Task("news_classification", "score", "News Classification")
+    task6 = Task("product_help", "score", "Product Help")
+    task7 = Task("qa_finance", "score", "QA Fianace")
+    task8 = Task("rag_general", "score", "RAG General")
+    task9 = Task("reasoning", "score", "Reasoning")
+    task10 = Task("safety", "score", "Safety")
+    task11 = Task("summarization", "score", "Summarization")
+    task12 = Task("translation", "score", "Translation")
 NUM_FEWSHOT = 0 # Change with your few shot
 # ---------------------------------------------------