Sean Cho commited on
Commit
8a7d698
β€’
1 Parent(s): 7e4da4b

Remove model size restriction

Browse files
Files changed (2) hide show
  1. app.py +2 -16
  2. src/assets/text_content.py +0 -1
app.py CHANGED
@@ -120,16 +120,6 @@ leaderboard_df = original_df.copy()
120
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
121
 
122
  ## INTERACTION FUNCTIONS
123
- def is_model_acceptable(model: str) -> bool:
124
- # regex for something like '13b'
125
- pattern = r'(\d+)[bB]'
126
- values = re.findall(pattern, model)
127
- for val in values:
128
- if int(val) <= 13:
129
- return True
130
-
131
- return values == []
132
-
133
  def add_new_eval(
134
  model: str,
135
  base_model: str,
@@ -139,9 +129,6 @@ def add_new_eval(
139
  weight_type: str,
140
  model_type: str,
141
  ):
142
- if not is_model_acceptable(model):
143
- return styled_error("Please submit a model with less than 13B parameters.")
144
-
145
  precision = precision.split(" ")[0]
146
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
147
 
@@ -261,8 +248,8 @@ NUMERIC_INTERVALS = {
261
  "~3B": pd.Interval(1.5, 5, closed="right"),
262
  "~7B": pd.Interval(6, 11, closed="right"),
263
  "~13B": pd.Interval(12, 15, closed="right"),
264
- # "~35B": pd.Interval(16, 55, closed="right"),
265
- # "60B+": pd.Interval(55, 10000, closed="right"),
266
  }
267
 
268
  def filter_models(
@@ -571,7 +558,6 @@ with demo:
571
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
572
 
573
  submit_button = gr.Button("Submit Evalulation!")
574
- gr.Markdown("_We accept models with less than or equal to 13B parameters now._")
575
  submission_result = gr.Markdown()
576
  submit_button.click(
577
  add_new_eval,
 
120
  ) = get_evaluation_queue_df(eval_queue, eval_queue_private, EVAL_REQUESTS_PATH, EVAL_COLS)
121
 
122
  ## INTERACTION FUNCTIONS
 
 
 
 
 
 
 
 
 
 
123
  def add_new_eval(
124
  model: str,
125
  base_model: str,
 
129
  weight_type: str,
130
  model_type: str,
131
  ):
 
 
 
132
  precision = precision.split(" ")[0]
133
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
134
 
 
248
  "~3B": pd.Interval(1.5, 5, closed="right"),
249
  "~7B": pd.Interval(6, 11, closed="right"),
250
  "~13B": pd.Interval(12, 15, closed="right"),
251
+ "~35B": pd.Interval(16, 55, closed="right"),
252
+ "60B+": pd.Interval(55, 10000, closed="right"),
253
  }
254
 
255
  def filter_models(
 
558
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
559
 
560
  submit_button = gr.Button("Submit Evalulation!")
 
561
  submission_result = gr.Markdown()
562
  submit_button.click(
563
  add_new_eval,
src/assets/text_content.py CHANGED
@@ -7,7 +7,6 @@ INTRODUCTION_TEXT = f"""
7
  πŸš€ The Open Ko-LLM Leaderboard πŸ‡°πŸ‡· objectively evaluates the performance of Korean Large Language Model (LLM).
8
 
9
  When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
10
- We accept models with less than or equal to 13B parameters now.
11
  The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
12
  The evaluation dataset is exclusively private and only available for evaluation process.
13
  More detailed information about the benchmark dataset is provided on the β€œAbout” page.
 
7
  πŸš€ The Open Ko-LLM Leaderboard πŸ‡°πŸ‡· objectively evaluates the performance of Korean Large Language Model (LLM).
8
 
9
  When you submit a model on the "Submit here!" page, it is automatically evaluated. The GPU used for evaluation is operated with the support of KT.
 
10
  The data used for evaluation consists of datasets to assess reasoning, language understanding, hallucination, and commonsense.
11
  The evaluation dataset is exclusively private and only available for evaluation process.
12
  More detailed information about the benchmark dataset is provided on the β€œAbout” page.