Spaces:

SmokeyBandit
/

SletcherSystems

Sleeping

App Files Files Community

SmokeyBandit commited on Jan 7

Commit

6f01f1b

verified ·

1 Parent(s): c766e70

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -28

app.py CHANGED Viewed

@@ -2,9 +2,29 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 import json
 from typing import Dict, List, Any
 # Initialize the client
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def load_site_content() -> Dict[str, Any]:
     """Load the site content from JSON file."""
@@ -72,7 +92,7 @@ Company Information:
     return "\n".join(context_parts)
-def respond(
     message: str,
     history: List[tuple[str, str]],
     system_message: str,
@@ -80,8 +100,18 @@ def respond(
     temperature: float,
     top_p: float,
 ) -> str:
     # Load content
     content = load_site_content()
     # Get relevant context
     context = get_relevant_context(message, content)
@@ -99,33 +129,35 @@ STRICT INSTRUCTIONS:
 4. NEVER invent services or capabilities not listed
 5. Be accurate about our AI and educational technology focus
 6. Acknowledge our cryptocurrency acceptance when relevant
-7. Use exact statistics when they're provided in the context
-Remember: You are representing a proudly South African technology company specializing in AI, educational technology, and enterprise solutions."""
-    # Format conversation history
-    messages = [{"role": "system", "content": enhanced_system_message}]
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            messages.append({"role": "user", "content": user_msg})
-        if assistant_msg:
-            messages.append({"role": "assistant", "content": assistant_msg})
-    messages.append({"role": "user", "content": message})
-    # Stream the response
-    response = ""
-    for msg in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = msg.choices[0].delta.content
-        response += token
-        yield response
 # Create the Gradio interface
 demo = gr.ChatInterface(

 from huggingface_hub import InferenceClient
 import json
 from typing import Dict, List, Any
+import time
+# Initialize the client with retries
+MAX_RETRIES = 3
+RETRY_DELAY = 2
+def create_client(retries=MAX_RETRIES):
+    """Create inference client with retry logic"""
+    for attempt in range(retries):
+        try:
+            return InferenceClient(
+                "HuggingFaceH4/zephyr-7b-beta",
+                timeout=30
+            )
+        except Exception as e:
+            if attempt == retries - 1:
+                print(f"Failed to create client after {retries} attempts: {e}")
+                return None
+            print(f"Attempt {attempt + 1} failed, retrying in {RETRY_DELAY} seconds...")
+            time.sleep(RETRY_DELAY)
 # Initialize the client
+client = create_client()
 def load_site_content() -> Dict[str, Any]:
     """Load the site content from JSON file."""
     return "\n".join(context_parts)
+async def respond(
     message: str,
     history: List[tuple[str, str]],
     system_message: str,
     temperature: float,
     top_p: float,
 ) -> str:
+    global client
+    # Ensure client is available
+    if client is None:
+        client = create_client()
+        if client is None:
+            return "I apologize, but I'm having trouble connecting to the language model. Please try again in a moment."
     # Load content
     content = load_site_content()
+    if not content:
+        return "I apologize, but I'm having trouble accessing the company information. Please try again in a moment."
     # Get relevant context
     context = get_relevant_context(message, content)
 4. NEVER invent services or capabilities not listed
 5. Be accurate about our AI and educational technology focus
 6. Acknowledge our cryptocurrency acceptance when relevant
+7. Use exact statistics when they're provided in the context"""
+    try:
+        # Format conversation history
+        messages = [{"role": "system", "content": enhanced_system_message}]
+        for user_msg, assistant_msg in history:
+            if user_msg:
+                messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+        messages.append({"role": "user", "content": message})
+        # Stream the response
+        response = ""
+        for msg in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = msg.choices[0].delta.content
+            response += token
+            yield response
+    except Exception as e:
+        print(f"Error in chat completion: {e}")
+        # Try to recreate client on error
+        client = create_client()
+        yield "I apologize, but I encountered an error. Please try your question again."
 # Create the Gradio interface
 demo = gr.ChatInterface(