Spaces:
Sleeping
Sleeping
Commit
Β·
f9d9584
1
Parent(s):
635f991
Improve fallback logic with persistent model selection and reset button
Browse files
app.py
CHANGED
@@ -168,33 +168,44 @@ async def call_inference_api(messages: List[Dict[str, Any]], model_id: str) -> s
|
|
168 |
except requests.exceptions.RequestException as e:
|
169 |
raise Exception(f"Request failed: {str(e)}")
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
|
172 |
"""Call model with automatic fallback to smaller models."""
|
173 |
global _current_model
|
174 |
|
175 |
-
# Try
|
176 |
try:
|
177 |
-
print(f"π Trying model: {_current_model}")
|
178 |
result = await call_inference_api(messages, _current_model)
|
179 |
return result, _current_model
|
180 |
except Exception as e:
|
181 |
error_msg = str(e)
|
182 |
print(f"β {_current_model} failed: {error_msg}")
|
183 |
|
184 |
-
# If
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
186 |
try:
|
187 |
-
print(f"π Trying
|
188 |
-
result = await call_inference_api(messages,
|
189 |
-
_current_model =
|
190 |
-
print(f"β
|
191 |
-
return result,
|
192 |
-
except Exception as
|
193 |
-
print(f"β {
|
194 |
continue
|
195 |
|
196 |
-
# If all models fail,
|
197 |
-
raise
|
198 |
|
199 |
async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
|
200 |
"""Initialize MCP server connection."""
|
@@ -459,6 +470,7 @@ with gr.Blocks(fill_height=True) as demo:
|
|
459 |
f"**Provider:** `{PROVIDER}` \n"
|
460 |
"_(Auto-fallback to smaller models if primary is paused)_"
|
461 |
)
|
|
|
462 |
|
463 |
with gr.Accordion("π Tool Activity (live)", open=True):
|
464 |
tool_log_md = gr.Markdown("_No tool activity yet._")
|
@@ -481,9 +493,15 @@ with gr.Blocks(fill_height=True) as demo:
|
|
481 |
history_msgs[-1]["content"] += delta
|
482 |
yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
|
483 |
|
|
|
|
|
|
|
|
|
|
|
484 |
# Wire up event handlers
|
485 |
msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
|
486 |
send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
|
|
|
487 |
|
488 |
# ----------------------------
|
489 |
# Launch App
|
|
|
168 |
except requests.exceptions.RequestException as e:
|
169 |
raise Exception(f"Request failed: {str(e)}")
|
170 |
|
171 |
+
def reset_to_primary_model():
|
172 |
+
"""Reset to use the primary model on next request."""
|
173 |
+
global _current_model
|
174 |
+
_current_model = PRIMARY_MODEL
|
175 |
+
print(f"π Reset to primary model: {PRIMARY_MODEL}")
|
176 |
+
|
177 |
async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
|
178 |
"""Call model with automatic fallback to smaller models."""
|
179 |
global _current_model
|
180 |
|
181 |
+
# Try current model first (could be primary or a previously successful fallback)
|
182 |
try:
|
183 |
+
print(f"π Trying current model: {_current_model}")
|
184 |
result = await call_inference_api(messages, _current_model)
|
185 |
return result, _current_model
|
186 |
except Exception as e:
|
187 |
error_msg = str(e)
|
188 |
print(f"β {_current_model} failed: {error_msg}")
|
189 |
|
190 |
+
# If current model fails, try all models in order (primary + fallbacks)
|
191 |
+
all_models = [PRIMARY_MODEL] + FALLBACK_MODELS
|
192 |
+
|
193 |
+
for model in all_models:
|
194 |
+
if model == _current_model: # Skip the one we just tried
|
195 |
+
continue
|
196 |
+
|
197 |
try:
|
198 |
+
print(f"π Trying model: {model}")
|
199 |
+
result = await call_inference_api(messages, model)
|
200 |
+
_current_model = model # Update current model
|
201 |
+
print(f"β
Successfully using model: {model}")
|
202 |
+
return result, model
|
203 |
+
except Exception as model_error:
|
204 |
+
print(f"β {model} failed: {str(model_error)}")
|
205 |
continue
|
206 |
|
207 |
+
# If all models fail, provide a helpful error message
|
208 |
+
raise Exception(f"All models failed. Primary model ({PRIMARY_MODEL}) and fallback models are unavailable. Please try again later.")
|
209 |
|
210 |
async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
|
211 |
"""Initialize MCP server connection."""
|
|
|
470 |
f"**Provider:** `{PROVIDER}` \n"
|
471 |
"_(Auto-fallback to smaller models if primary is paused)_"
|
472 |
)
|
473 |
+
reset_model_btn = gr.Button("π Reset to Primary Model", variant="secondary", size="sm")
|
474 |
|
475 |
with gr.Accordion("π Tool Activity (live)", open=True):
|
476 |
tool_log_md = gr.Markdown("_No tool activity yet._")
|
|
|
493 |
history_msgs[-1]["content"] += delta
|
494 |
yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
|
495 |
|
496 |
+
def on_reset_model():
|
497 |
+
"""Reset to primary model and update UI."""
|
498 |
+
reset_to_primary_model()
|
499 |
+
return gr.update(value=f"**Primary Model:** `{PRIMARY_MODEL}` \n**Current Model:** `{_current_model}` \n**Provider:** `{PROVIDER}` \n_(Auto-fallback to smaller models if primary is paused)_")
|
500 |
+
|
501 |
# Wire up event handlers
|
502 |
msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
|
503 |
send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
|
504 |
+
reset_model_btn.click(on_reset_model, outputs=[model_info])
|
505 |
|
506 |
# ----------------------------
|
507 |
# Launch App
|