sudipta26889 commited on
Commit
f9d9584
Β·
1 Parent(s): 635f991

Improve fallback logic with persistent model selection and reset button

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -168,33 +168,44 @@ async def call_inference_api(messages: List[Dict[str, Any]], model_id: str) -> s
168
  except requests.exceptions.RequestException as e:
169
  raise Exception(f"Request failed: {str(e)}")
170
 
 
 
 
 
 
 
171
  async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
172
  """Call model with automatic fallback to smaller models."""
173
  global _current_model
174
 
175
- # Try primary model first
176
  try:
177
- print(f"πŸ”„ Trying model: {_current_model}")
178
  result = await call_inference_api(messages, _current_model)
179
  return result, _current_model
180
  except Exception as e:
181
  error_msg = str(e)
182
  print(f"❌ {_current_model} failed: {error_msg}")
183
 
184
- # If primary model fails, try fallback models
185
- for fallback_model in FALLBACK_MODELS:
 
 
 
 
 
186
  try:
187
- print(f"πŸ”„ Trying fallback model: {fallback_model}")
188
- result = await call_inference_api(messages, fallback_model)
189
- _current_model = fallback_model # Update current model
190
- print(f"βœ… Using fallback model: {fallback_model}")
191
- return result, fallback_model
192
- except Exception as fallback_error:
193
- print(f"❌ {fallback_model} failed: {str(fallback_error)}")
194
  continue
195
 
196
- # If all models fail, raise the original error
197
- raise e
198
 
199
  async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
200
  """Initialize MCP server connection."""
@@ -459,6 +470,7 @@ with gr.Blocks(fill_height=True) as demo:
459
  f"**Provider:** `{PROVIDER}` \n"
460
  "_(Auto-fallback to smaller models if primary is paused)_"
461
  )
 
462
 
463
  with gr.Accordion("πŸ›  Tool Activity (live)", open=True):
464
  tool_log_md = gr.Markdown("_No tool activity yet._")
@@ -481,9 +493,15 @@ with gr.Blocks(fill_height=True) as demo:
481
  history_msgs[-1]["content"] += delta
482
  yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
483
 
 
 
 
 
 
484
  # Wire up event handlers
485
  msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
486
  send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
 
487
 
488
  # ----------------------------
489
  # Launch App
 
168
  except requests.exceptions.RequestException as e:
169
  raise Exception(f"Request failed: {str(e)}")
170
 
171
+ def reset_to_primary_model():
172
+ """Reset to use the primary model on next request."""
173
+ global _current_model
174
+ _current_model = PRIMARY_MODEL
175
+ print(f"πŸ”„ Reset to primary model: {PRIMARY_MODEL}")
176
+
177
  async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
178
  """Call model with automatic fallback to smaller models."""
179
  global _current_model
180
 
181
+ # Try current model first (could be primary or a previously successful fallback)
182
  try:
183
+ print(f"πŸ”„ Trying current model: {_current_model}")
184
  result = await call_inference_api(messages, _current_model)
185
  return result, _current_model
186
  except Exception as e:
187
  error_msg = str(e)
188
  print(f"❌ {_current_model} failed: {error_msg}")
189
 
190
+ # If current model fails, try all models in order (primary + fallbacks)
191
+ all_models = [PRIMARY_MODEL] + FALLBACK_MODELS
192
+
193
+ for model in all_models:
194
+ if model == _current_model: # Skip the one we just tried
195
+ continue
196
+
197
  try:
198
+ print(f"πŸ”„ Trying model: {model}")
199
+ result = await call_inference_api(messages, model)
200
+ _current_model = model # Update current model
201
+ print(f"βœ… Successfully using model: {model}")
202
+ return result, model
203
+ except Exception as model_error:
204
+ print(f"❌ {model} failed: {str(model_error)}")
205
  continue
206
 
207
+ # If all models fail, provide a helpful error message
208
+ raise Exception(f"All models failed. Primary model ({PRIMARY_MODEL}) and fallback models are unavailable. Please try again later.")
209
 
210
  async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
211
  """Initialize MCP server connection."""
 
470
  f"**Provider:** `{PROVIDER}` \n"
471
  "_(Auto-fallback to smaller models if primary is paused)_"
472
  )
473
+ reset_model_btn = gr.Button("πŸ”„ Reset to Primary Model", variant="secondary", size="sm")
474
 
475
  with gr.Accordion("πŸ›  Tool Activity (live)", open=True):
476
  tool_log_md = gr.Markdown("_No tool activity yet._")
 
493
  history_msgs[-1]["content"] += delta
494
  yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
495
 
496
+ def on_reset_model():
497
+ """Reset to primary model and update UI."""
498
+ reset_to_primary_model()
499
+ return gr.update(value=f"**Primary Model:** `{PRIMARY_MODEL}` \n**Current Model:** `{_current_model}` \n**Provider:** `{PROVIDER}` \n_(Auto-fallback to smaller models if primary is paused)_")
500
+
501
  # Wire up event handlers
502
  msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
503
  send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
504
+ reset_model_btn.click(on_reset_model, outputs=[model_info])
505
 
506
  # ----------------------------
507
  # Launch App