Spaces:

sudipta26889
/

gradio-doc

Sleeping

App Files Files Community

sudipta26889 commited on 14 days ago

Commit

f9d9584

1 Parent(s): 635f991

Improve fallback logic with persistent model selection and reset button

Browse files

Files changed (1) hide show

app.py +31 -13

app.py CHANGED Viewed

@@ -168,33 +168,44 @@ async def call_inference_api(messages: List[Dict[str, Any]], model_id: str) -> s
     except requests.exceptions.RequestException as e:
         raise Exception(f"Request failed: {str(e)}")
 async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
     """Call model with automatic fallback to smaller models."""
     global _current_model
-    # Try primary model first
     try:
-        print(f"🔄 Trying model: {_current_model}")
         result = await call_inference_api(messages, _current_model)
         return result, _current_model
     except Exception as e:
         error_msg = str(e)
         print(f"❌ {_current_model} failed: {error_msg}")
-        # If primary model fails, try fallback models
-        for fallback_model in FALLBACK_MODELS:
             try:
-                print(f"🔄 Trying fallback model: {fallback_model}")
-                result = await call_inference_api(messages, fallback_model)
-                _current_model = fallback_model  # Update current model
-                print(f"✅ Using fallback model: {fallback_model}")
-                return result, fallback_model
-            except Exception as fallback_error:
-                print(f"❌ {fallback_model} failed: {str(fallback_error)}")
                 continue
-        # If all models fail, raise the original error
-        raise e
 async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
     """Initialize MCP server connection."""
@@ -459,6 +470,7 @@ with gr.Blocks(fill_height=True) as demo:
                     f"**Provider:** `{PROVIDER}`  \n"
                     "_(Auto-fallback to smaller models if primary is paused)_"
                 )
             with gr.Accordion("🛠 Tool Activity (live)", open=True):
                 tool_log_md = gr.Markdown("_No tool activity yet._")
@@ -481,9 +493,15 @@ with gr.Blocks(fill_height=True) as demo:
                 history_msgs[-1]["content"] += delta
             yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
     # Wire up event handlers
     msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
     send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
 # ----------------------------
 # Launch App

     except requests.exceptions.RequestException as e:
         raise Exception(f"Request failed: {str(e)}")
+def reset_to_primary_model():
+    """Reset to use the primary model on next request."""
+    global _current_model
+    _current_model = PRIMARY_MODEL
+    print(f"🔄 Reset to primary model: {PRIMARY_MODEL}")
 async def call_model_with_fallback(messages: List[Dict[str, Any]]) -> Tuple[str, str]:
     """Call model with automatic fallback to smaller models."""
     global _current_model
+    # Try current model first (could be primary or a previously successful fallback)
     try:
+        print(f"🔄 Trying current model: {_current_model}")
         result = await call_inference_api(messages, _current_model)
         return result, _current_model
     except Exception as e:
         error_msg = str(e)
         print(f"❌ {_current_model} failed: {error_msg}")
+        # If current model fails, try all models in order (primary + fallbacks)
+        all_models = [PRIMARY_MODEL] + FALLBACK_MODELS
+        for model in all_models:
+            if model == _current_model:  # Skip the one we just tried
+                continue
             try:
+                print(f"🔄 Trying model: {model}")
+                result = await call_inference_api(messages, model)
+                _current_model = model  # Update current model
+                print(f"✅ Successfully using model: {model}")
+                return result, model
+            except Exception as model_error:
+                print(f"❌ {model} failed: {str(model_error)}")
                 continue
+        # If all models fail, provide a helpful error message
+        raise Exception(f"All models failed. Primary model ({PRIMARY_MODEL}) and fallback models are unavailable. Please try again later.")
 async def ensure_mcp_init(model_id: str, provider: str, api_key: Optional[str]):
     """Initialize MCP server connection."""
                     f"**Provider:** `{PROVIDER}`  \n"
                     "_(Auto-fallback to smaller models if primary is paused)_"
                 )
+                reset_model_btn = gr.Button("🔄 Reset to Primary Model", variant="secondary", size="sm")
             with gr.Accordion("🛠 Tool Activity (live)", open=True):
                 tool_log_md = gr.Markdown("_No tool activity yet._")
                 history_msgs[-1]["content"] += delta
             yield history_msgs, gr.update(value=chunk.get("tool_log", "")), gr.update(value=chunk.get("citations", ""))
+    def on_reset_model():
+        """Reset to primary model and update UI."""
+        reset_to_primary_model()
+        return gr.update(value=f"**Primary Model:** `{PRIMARY_MODEL}`  \n**Current Model:** `{_current_model}`  \n**Provider:** `{PROVIDER}`  \n_(Auto-fallback to smaller models if primary is paused)_")
     # Wire up event handlers
     msg.submit(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
     send_btn.click(on_submit, inputs=[msg, chat, style], outputs=[chat, tool_log_md, citations_md], queue=True)
+    reset_model_btn.click(on_reset_model, outputs=[model_info])
 # ----------------------------
 # Launch App