SmokeyBandit commited on
Commit
6f01f1b
·
verified ·
1 Parent(s): c766e70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -28
app.py CHANGED
@@ -2,9 +2,29 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  import json
4
  from typing import Dict, List, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  # Initialize the client
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
  def load_site_content() -> Dict[str, Any]:
10
  """Load the site content from JSON file."""
@@ -72,7 +92,7 @@ Company Information:
72
 
73
  return "\n".join(context_parts)
74
 
75
- def respond(
76
  message: str,
77
  history: List[tuple[str, str]],
78
  system_message: str,
@@ -80,8 +100,18 @@ def respond(
80
  temperature: float,
81
  top_p: float,
82
  ) -> str:
 
 
 
 
 
 
 
 
83
  # Load content
84
  content = load_site_content()
 
 
85
 
86
  # Get relevant context
87
  context = get_relevant_context(message, content)
@@ -99,33 +129,35 @@ STRICT INSTRUCTIONS:
99
  4. NEVER invent services or capabilities not listed
100
  5. Be accurate about our AI and educational technology focus
101
  6. Acknowledge our cryptocurrency acceptance when relevant
102
- 7. Use exact statistics when they're provided in the context
103
 
104
- Remember: You are representing a proudly South African technology company specializing in AI, educational technology, and enterprise solutions."""
105
-
106
- # Format conversation history
107
- messages = [{"role": "system", "content": enhanced_system_message}]
108
-
109
- for user_msg, assistant_msg in history:
110
- if user_msg:
111
- messages.append({"role": "user", "content": user_msg})
112
- if assistant_msg:
113
- messages.append({"role": "assistant", "content": assistant_msg})
114
-
115
- messages.append({"role": "user", "content": message})
116
-
117
- # Stream the response
118
- response = ""
119
- for msg in client.chat_completion(
120
- messages,
121
- max_tokens=max_tokens,
122
- stream=True,
123
- temperature=temperature,
124
- top_p=top_p,
125
- ):
126
- token = msg.choices[0].delta.content
127
- response += token
128
- yield response
 
 
129
 
130
  # Create the Gradio interface
131
  demo = gr.ChatInterface(
 
2
  from huggingface_hub import InferenceClient
3
  import json
4
  from typing import Dict, List, Any
5
+ import time
6
+
7
+ # Initialize the client with retries
8
+ MAX_RETRIES = 3
9
+ RETRY_DELAY = 2
10
+
11
+ def create_client(retries=MAX_RETRIES):
12
+ """Create inference client with retry logic"""
13
+ for attempt in range(retries):
14
+ try:
15
+ return InferenceClient(
16
+ "HuggingFaceH4/zephyr-7b-beta",
17
+ timeout=30
18
+ )
19
+ except Exception as e:
20
+ if attempt == retries - 1:
21
+ print(f"Failed to create client after {retries} attempts: {e}")
22
+ return None
23
+ print(f"Attempt {attempt + 1} failed, retrying in {RETRY_DELAY} seconds...")
24
+ time.sleep(RETRY_DELAY)
25
 
26
  # Initialize the client
27
+ client = create_client()
28
 
29
  def load_site_content() -> Dict[str, Any]:
30
  """Load the site content from JSON file."""
 
92
 
93
  return "\n".join(context_parts)
94
 
95
+ async def respond(
96
  message: str,
97
  history: List[tuple[str, str]],
98
  system_message: str,
 
100
  temperature: float,
101
  top_p: float,
102
  ) -> str:
103
+ global client
104
+
105
+ # Ensure client is available
106
+ if client is None:
107
+ client = create_client()
108
+ if client is None:
109
+ return "I apologize, but I'm having trouble connecting to the language model. Please try again in a moment."
110
+
111
  # Load content
112
  content = load_site_content()
113
+ if not content:
114
+ return "I apologize, but I'm having trouble accessing the company information. Please try again in a moment."
115
 
116
  # Get relevant context
117
  context = get_relevant_context(message, content)
 
129
  4. NEVER invent services or capabilities not listed
130
  5. Be accurate about our AI and educational technology focus
131
  6. Acknowledge our cryptocurrency acceptance when relevant
132
+ 7. Use exact statistics when they're provided in the context"""
133
 
134
+ try:
135
+ # Format conversation history
136
+ messages = [{"role": "system", "content": enhanced_system_message}]
137
+ for user_msg, assistant_msg in history:
138
+ if user_msg:
139
+ messages.append({"role": "user", "content": user_msg})
140
+ if assistant_msg:
141
+ messages.append({"role": "assistant", "content": assistant_msg})
142
+ messages.append({"role": "user", "content": message})
143
+
144
+ # Stream the response
145
+ response = ""
146
+ for msg in client.chat_completion(
147
+ messages,
148
+ max_tokens=max_tokens,
149
+ stream=True,
150
+ temperature=temperature,
151
+ top_p=top_p,
152
+ ):
153
+ token = msg.choices[0].delta.content
154
+ response += token
155
+ yield response
156
+ except Exception as e:
157
+ print(f"Error in chat completion: {e}")
158
+ # Try to recreate client on error
159
+ client = create_client()
160
+ yield "I apologize, but I encountered an error. Please try your question again."
161
 
162
  # Create the Gradio interface
163
  demo = gr.ChatInterface(