k-mktr commited on
Commit
335d363
·
verified ·
1 Parent(s): 6eaadf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -59
app.py CHANGED
@@ -20,7 +20,6 @@ import openai
20
  import threading
21
  import time
22
  from collections import Counter
23
- from model_suggestions import add_suggestion, get_suggestions_html
24
  from release_notes import get_release_notes_html
25
 
26
 
@@ -85,7 +84,7 @@ def call_ollama_api(model, prompt):
85
  )
86
 
87
  try:
88
- logger.info("Starting API call")
89
  response = client.chat.completions.create(
90
  model=model,
91
  messages=[
@@ -100,10 +99,10 @@ def call_ollama_api(model, prompt):
100
  ],
101
  timeout=180
102
  )
103
- logger.info("Received response")
104
 
105
  if not response or not response.choices:
106
- logger.error("Empty response received")
107
  return [
108
  {"role": "user", "content": prompt},
109
  {"role": "assistant", "content": "Error: Empty response from the model"}
@@ -111,7 +110,7 @@ def call_ollama_api(model, prompt):
111
 
112
  content = response.choices[0].message.content
113
  if not content:
114
- logger.error("Empty content received")
115
  return [
116
  {"role": "user", "content": prompt},
117
  {"role": "assistant", "content": "Error: Empty content from the model"}
@@ -124,30 +123,37 @@ def call_ollama_api(model, prompt):
124
  thinking_content = thinking_match.group(1).strip()
125
  main_content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
126
 
127
- logger.info("Found thinking content, creating structured response")
128
  return [
129
  {"role": "user", "content": prompt},
130
  {"role": "assistant", "content": f"{main_content}\n\n<details><summary>🤔 View thinking process</summary>\n\n{thinking_content}\n\n</details>"}
131
  ]
132
 
133
  # If no thinking tags, return normal content
134
- logger.info("No thinking tags found, returning normal content")
135
  return [
136
  {"role": "user", "content": prompt},
137
  {"role": "assistant", "content": content.strip()}
138
  ]
139
 
140
  except requests.exceptions.Timeout:
141
- logger.error("Timeout error after 180 seconds")
142
  return [
143
  {"role": "user", "content": prompt},
144
  {"role": "assistant", "content": "Error: Model response timed out after 180 seconds"}
145
  ]
 
 
 
 
 
 
 
146
  except Exception as e:
147
- logger.error(f"Error calling Ollama API: {str(e)}", exc_info=True)
148
  return [
149
  {"role": "user", "content": prompt},
150
- {"role": "assistant", "content": f"Error: Unable to get response from the model. Error: {str(e)}"}
151
  ]
152
 
153
  # Generate responses using two randomly selected models
@@ -278,12 +284,11 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
278
  return (
279
  gr.update(value=result_message, visible=True), # Show result as Markdown
280
  get_leaderboard(), # Update leaderboard
281
- get_elo_leaderboard(), # Add this line
282
  gr.update(interactive=False), # Disable left vote button
283
  gr.update(interactive=False), # Disable right vote button
284
  gr.update(interactive=False), # Disable tie button
285
- gr.update(visible=True), # Show model names
286
- get_leaderboard_chart() # Update leaderboard chart
287
  )
288
 
289
  def get_leaderboard_chart():
@@ -426,7 +431,20 @@ with gr.Blocks(css="""
426
 
427
  # Leaderboard Tab (now first)
428
  with gr.Tab("Leaderboard"):
429
- leaderboard = gr.HTML(label="Leaderboard")
 
 
 
 
 
 
 
 
 
 
 
 
 
430
 
431
  # Battle Arena Tab (now second)
432
  with gr.Tab("Battle Arena"):
@@ -473,50 +491,23 @@ with gr.Blocks(css="""
473
 
474
  new_battle_btn = gr.Button("New Battle")
475
 
476
- # Performance Chart Tab
477
- with gr.Tab("Performance Chart"):
478
- leaderboard_chart = gr.Plot(label="Model Performance Chart")
479
-
480
  # ELO Leaderboard Tab
481
  with gr.Tab("ELO Leaderboard"):
482
- elo_leaderboard = gr.HTML(label="ELO Leaderboard")
483
-
484
- # Add this new tab
485
- with gr.Tab("Suggest Models"):
486
- with gr.Row():
487
- model_url_input = gr.Textbox(
488
- label="Model URL",
489
- placeholder="hf.co/username/model-name-GGUF:Q4_K_M",
490
- scale=4
491
- )
492
- submit_suggestion_btn = gr.Button("Submit Suggestion", scale=1, variant="primary")
493
-
494
- suggestion_status = gr.Markdown("Submit a model to see it listed below!")
495
- suggestions_list = gr.HTML(get_suggestions_html())
496
- refresh_suggestions_btn = gr.Button("Refresh List")
497
-
498
- # Update button click handlers
499
- submit_suggestion_btn.click(
500
- add_suggestion,
501
- inputs=[model_url_input],
502
- outputs=[suggestion_status]
503
- ).then(
504
- lambda: (
505
- get_suggestions_html(), # Update suggestions list
506
- "" # Clear model URL input
507
- ),
508
- outputs=[
509
- suggestions_list,
510
- model_url_input
511
- ]
512
- )
513
-
514
- refresh_suggestions_btn.click(
515
- get_suggestions_html,
516
- outputs=[suggestions_list]
517
  )
518
 
519
- # Add this new tab
520
  with gr.Tab("Latest Updates"):
521
  release_notes = gr.HTML(get_release_notes_html())
522
  refresh_notes_btn = gr.Button("Refresh Updates")
@@ -541,14 +532,14 @@ with gr.Blocks(css="""
541
  lambda *args: record_vote(*args, "Left is better"),
542
  inputs=[prompt_input, left_output, right_output, left_model, right_model],
543
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
544
- right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
545
  )
546
 
547
  right_vote_btn.click(
548
  lambda *args: record_vote(*args, "Right is better"),
549
  inputs=[prompt_input, left_output, right_output, left_model, right_model],
550
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
551
- right_vote_btn, tie_btn, model_names_row, leaderboard_chart]
552
  )
553
 
554
  tie_btn.click(
@@ -561,13 +552,12 @@ with gr.Blocks(css="""
561
  new_battle,
562
  outputs=[prompt_input, left_output, right_output, left_model,
563
  right_model, left_vote_btn, right_vote_btn, tie_btn,
564
- result, leaderboard, model_names_row, leaderboard_chart, tie_count]
565
  )
566
 
567
- # Update leaderboard and chart on launch
568
  demo.load(get_leaderboard, outputs=leaderboard)
569
  demo.load(get_elo_leaderboard, outputs=elo_leaderboard)
570
- demo.load(get_leaderboard_chart, outputs=leaderboard_chart)
571
 
572
  if __name__ == "__main__":
573
  # Initialize ELO ratings before launching the app
 
20
  import threading
21
  import time
22
  from collections import Counter
 
23
  from release_notes import get_release_notes_html
24
 
25
 
 
84
  )
85
 
86
  try:
87
+ logger.info(f"Starting API call for model: {model}")
88
  response = client.chat.completions.create(
89
  model=model,
90
  messages=[
 
99
  ],
100
  timeout=180
101
  )
102
+ logger.info(f"Received response for model: {model}")
103
 
104
  if not response or not response.choices:
105
+ logger.error(f"Empty response received for model: {model}")
106
  return [
107
  {"role": "user", "content": prompt},
108
  {"role": "assistant", "content": "Error: Empty response from the model"}
 
110
 
111
  content = response.choices[0].message.content
112
  if not content:
113
+ logger.error(f"Empty content received for model: {model}")
114
  return [
115
  {"role": "user", "content": prompt},
116
  {"role": "assistant", "content": "Error: Empty content from the model"}
 
123
  thinking_content = thinking_match.group(1).strip()
124
  main_content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL).strip()
125
 
126
+ logger.info(f"Found thinking content for model: {model}")
127
  return [
128
  {"role": "user", "content": prompt},
129
  {"role": "assistant", "content": f"{main_content}\n\n<details><summary>🤔 View thinking process</summary>\n\n{thinking_content}\n\n</details>"}
130
  ]
131
 
132
  # If no thinking tags, return normal content
133
+ logger.info(f"No thinking tags found for model: {model}")
134
  return [
135
  {"role": "user", "content": prompt},
136
  {"role": "assistant", "content": content.strip()}
137
  ]
138
 
139
  except requests.exceptions.Timeout:
140
+ logger.error(f"Timeout error after 180 seconds for model: {model}")
141
  return [
142
  {"role": "user", "content": prompt},
143
  {"role": "assistant", "content": "Error: Model response timed out after 180 seconds"}
144
  ]
145
+ except openai.BadRequestError as e:
146
+ error_msg = str(e)
147
+ logger.error(f"Bad request error for model: {model}. Error: {error_msg}")
148
+ return [
149
+ {"role": "user", "content": prompt},
150
+ {"role": "assistant", "content": "Error: Unable to get response from the model"}
151
+ ]
152
  except Exception as e:
153
+ logger.error(f"Error calling Ollama API for model: {model}. Error: {str(e)}", exc_info=True)
154
  return [
155
  {"role": "user", "content": prompt},
156
+ {"role": "assistant", "content": "Error: Unable to get response from the model"}
157
  ]
158
 
159
  # Generate responses using two randomly selected models
 
284
  return (
285
  gr.update(value=result_message, visible=True), # Show result as Markdown
286
  get_leaderboard(), # Update leaderboard
287
+ get_elo_leaderboard(), # Update ELO leaderboard
288
  gr.update(interactive=False), # Disable left vote button
289
  gr.update(interactive=False), # Disable right vote button
290
  gr.update(interactive=False), # Disable tie button
291
+ gr.update(visible=True) # Show model names
 
292
  )
293
 
294
  def get_leaderboard_chart():
 
431
 
432
  # Leaderboard Tab (now first)
433
  with gr.Tab("Leaderboard"):
434
+ gr.Markdown("""
435
+ ### Main Leaderboard
436
+ This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
437
+ **Score = Win Rate * (1 - 1 / (Total Battles + 1))**
438
+
439
+ This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
440
+ """)
441
+ leaderboard = gr.Dataframe(
442
+ headers=["Model", "Score", "Wins", "Losses", "Total Battles", "Win Rate"],
443
+ row_count=10,
444
+ col_count=6,
445
+ interactive=False,
446
+ label="Leaderboard"
447
+ )
448
 
449
  # Battle Arena Tab (now second)
450
  with gr.Tab("Battle Arena"):
 
491
 
492
  new_battle_btn = gr.Button("New Battle")
493
 
 
 
 
 
494
  # ELO Leaderboard Tab
495
  with gr.Tab("ELO Leaderboard"):
496
+ gr.Markdown("""
497
+ ### ELO Rating System
498
+ This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
499
+ Initial ratings are based on model size, with larger models starting at higher ratings.
500
+ The ELO rating is calculated based on wins and losses, with adjustments made based on the relative strengths of opponents.
501
+ """)
502
+ elo_leaderboard = gr.Dataframe(
503
+ headers=["Model", "ELO Rating", "Wins", "Losses", "Total Battles", "Win Rate"],
504
+ row_count=10,
505
+ col_count=6,
506
+ interactive=False,
507
+ label="ELO Leaderboard"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
508
  )
509
 
510
+ # Latest Updates Tab
511
  with gr.Tab("Latest Updates"):
512
  release_notes = gr.HTML(get_release_notes_html())
513
  refresh_notes_btn = gr.Button("Refresh Updates")
 
532
  lambda *args: record_vote(*args, "Left is better"),
533
  inputs=[prompt_input, left_output, right_output, left_model, right_model],
534
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
535
+ right_vote_btn, tie_btn, model_names_row]
536
  )
537
 
538
  right_vote_btn.click(
539
  lambda *args: record_vote(*args, "Right is better"),
540
  inputs=[prompt_input, left_output, right_output, left_model, right_model],
541
  outputs=[result, leaderboard, elo_leaderboard, left_vote_btn,
542
+ right_vote_btn, tie_btn, model_names_row]
543
  )
544
 
545
  tie_btn.click(
 
552
  new_battle,
553
  outputs=[prompt_input, left_output, right_output, left_model,
554
  right_model, left_vote_btn, right_vote_btn, tie_btn,
555
+ result, leaderboard, model_names_row, tie_count]
556
  )
557
 
558
+ # Update leaderboard on launch
559
  demo.load(get_leaderboard, outputs=leaderboard)
560
  demo.load(get_elo_leaderboard, outputs=elo_leaderboard)
 
561
 
562
  if __name__ == "__main__":
563
  # Initialize ELO ratings before launching the app