aiqcamp commited on
Commit
b519183
·
verified ·
1 Parent(s): 534bc69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +378 -166
app.py CHANGED
@@ -1,179 +1,391 @@
1
  import gradio as gr
2
- import spaces
3
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
4
- import torch
5
- from threading import Thread
 
6
 
7
- phi4_model_path = "microsoft/Phi-4-reasoning-plus"
 
8
 
9
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
-
11
- phi4_model = AutoModelForCausalLM.from_pretrained(phi4_model_path, device_map="auto", torch_dtype="auto")
12
- phi4_tokenizer = AutoTokenizer.from_pretrained(phi4_model_path)
13
-
14
- @spaces.GPU(duration=60)
15
- def generate_response(user_message, max_tokens, temperature, top_k, top_p, repetition_penalty, history_state):
16
- if not user_message.strip():
17
- return history_state, history_state
 
 
 
 
 
18
 
19
- # Phi-4 model settings
20
- model = phi4_model
21
- tokenizer = phi4_tokenizer
22
- start_tag = "<|im_start|>"
23
- sep_tag = "<|im_sep|>"
24
- end_tag = "<|im_end|>"
25
-
26
- # Recommended prompt settings by Microsoft
27
- system_message = "Your role as an assistant involves thoroughly exploring questions through a systematic thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution using the specified format: <think> {Thought section} </think> {Solution section}. In the Thought section, detail your reasoning process in steps. Each step should include detailed considerations such as analysing questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The Solution section should be logical, accurate, and concise and detail necessary steps needed to reach the conclusion. Now, try to solve the following question through the above guidelines:"
28
- prompt = f"{start_tag}system{sep_tag}{system_message}{end_tag}"
29
- for message in history_state:
30
- if message["role"] == "user":
31
- prompt += f"{start_tag}user{sep_tag}{message['content']}{end_tag}"
32
- elif message["role"] == "assistant" and message["content"]:
33
- prompt += f"{start_tag}assistant{sep_tag}{message['content']}{end_tag}"
34
- prompt += f"{start_tag}user{sep_tag}{user_message}{end_tag}{start_tag}assistant{sep_tag}"
35
-
36
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
37
-
38
- do_sample = not (temperature == 1.0 and top_k >= 100 and top_p == 1.0)
39
-
40
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
41
-
42
- # sampling techniques
43
- generation_kwargs = {
44
- "input_ids": inputs["input_ids"],
45
- "attention_mask": inputs["attention_mask"],
46
- "max_new_tokens": int(max_tokens),
47
- "do_sample": True,
48
- "temperature": 0.8,
49
- "top_k": int(top_k),
50
- "top_p": 0.95,
51
- "repetition_penalty": repetition_penalty,
52
- "streamer": streamer,
53
- }
54
-
55
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
56
- thread.start()
57
-
58
- # Stream the response
59
- assistant_response = ""
60
- new_history = history_state + [
61
- {"role": "user", "content": user_message},
62
- {"role": "assistant", "content": ""}
63
- ]
64
- for new_token in streamer:
65
- cleaned_token = new_token.replace("<|im_start|>", "").replace("<|im_sep|>", "").replace("<|im_end|>", "")
66
- assistant_response += cleaned_token
67
- new_history[-1]["content"] = assistant_response.strip()
68
- yield new_history, new_history
69
-
70
- yield new_history, new_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- example_messages = {
73
- "Math reasoning": "If a rectangular prism has a length of 6 cm, a width of 4 cm, and a height of 5 cm, what is the length of the longest line segment that can be drawn from one vertex to another?",
74
- "Logic puzzle": "Four people (Alex, Blake, Casey, and Dana) each have a different favorite color (red, blue, green, yellow) and a different favorite fruit (apple, banana, cherry, date). Given the following clues: 1) The person who likes red doesn't like dates. 2) Alex likes yellow. 3) The person who likes blue likes cherries. 4) Blake doesn't like apples or bananas. 5) Casey doesn't like yellow or green. Who likes what color and what fruit?",
75
- "Physics problem": "A ball is thrown upward with an initial velocity of 15 m/s from a height of 2 meters above the ground. Assuming the acceleration due to gravity is 9.8 m/s², determine: 1) The maximum height the ball reaches. 2) The total time the ball is in the air before hitting the ground. 3) The velocity with which the ball hits the ground."
76
- }
77
 
78
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
- gr.Markdown(
80
- """
81
- # Phi-4-reasoning-plus Chatbot
82
- Welcome to the Phi-4-reasoning-plus Chatbot! This model excels at multi-step reasoning tasks in mathematics, logic, and science.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- The model will provide responses with two sections:
85
- 1. **<think>**: A detailed step-by-step reasoning process showing its work
86
- 2. **Solution**: A concise, accurate final answer based on the reasoning
87
 
88
- Try the example problems below to see how the model breaks down complex reasoning problems.
89
- """
90
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- history_state = gr.State([])
93
-
94
- with gr.Row():
95
- with gr.Column(scale=1):
96
- gr.Markdown("### Settings")
97
- max_tokens_slider = gr.Slider(
98
- minimum=64,
99
- maximum=32768,
100
- step=1024,
101
- value=4096,
102
- label="Max Tokens"
103
- )
104
- with gr.Accordion("Advanced Settings", open=False):
105
- temperature_slider = gr.Slider(
106
- minimum=0.1,
107
- maximum=2.0,
108
- value=0.8,
109
- label="Temperature"
110
- )
111
- top_k_slider = gr.Slider(
112
- minimum=1,
113
- maximum=100,
114
- step=1,
115
- value=50,
116
- label="Top-k"
117
- )
118
- top_p_slider = gr.Slider(
119
- minimum=0.1,
120
- maximum=1.0,
121
- value=0.95,
122
- label="Top-p"
123
  )
124
- repetition_penalty_slider = gr.Slider(
125
- minimum=1.0,
126
- maximum=2.0,
127
- value=1.0,
128
- label="Repetition Penalty"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  )
 
 
 
130
 
131
- with gr.Column(scale=4):
132
- chatbot = gr.Chatbot(label="Chat", type="messages")
133
- with gr.Row():
134
- user_input = gr.Textbox(
135
- label="Your message",
136
- placeholder="Type your message here...",
137
- scale=3
138
- )
139
- submit_button = gr.Button("Send", variant="primary", scale=1)
140
- clear_button = gr.Button("Clear", scale=1)
141
- gr.Markdown("**Try these examples:**")
142
- with gr.Row():
143
- example1_button = gr.Button("Math reasoning")
144
- example2_button = gr.Button("Logic puzzle")
145
- example3_button = gr.Button("Physics problem")
146
-
147
- submit_button.click(
148
- fn=generate_response,
149
- inputs=[user_input, max_tokens_slider, temperature_slider, top_k_slider, top_p_slider, repetition_penalty_slider, history_state],
150
- outputs=[chatbot, history_state]
151
- ).then(
152
- fn=lambda: gr.update(value=""),
153
- inputs=None,
154
- outputs=user_input
155
- )
156
-
157
- clear_button.click(
158
- fn=lambda: ([], []),
159
- inputs=None,
160
- outputs=[chatbot, history_state]
161
- )
162
-
163
- example1_button.click(
164
- fn=lambda: gr.update(value=example_messages["Math reasoning"]),
165
- inputs=None,
166
- outputs=user_input
167
- )
168
- example2_button.click(
169
- fn=lambda: gr.update(value=example_messages["Logic puzzle"]),
170
- inputs=None,
171
- outputs=user_input
172
- )
173
- example3_button.click(
174
- fn=lambda: gr.update(value=example_messages["Physics problem"]),
175
- inputs=None,
176
- outputs=user_input
177
- )
178
 
179
- demo.launch(ssr_mode=False)
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ import requests
4
+ import json
5
+ import time
6
+ from dotenv import load_dotenv
7
 
8
+ # Load .env file (if it exists)
9
+ load_dotenv()
10
 
11
+ def create_deepseek_interface():
12
+ # Get API keys from environment variables
13
+ api_key = os.getenv("FW_API_KEY")
14
+ serphouse_api_key = os.getenv("SERPHOUSE_API_KEY")
15
+
16
+ if not api_key:
17
+ print("Warning: FW_API_KEY environment variable is not set.")
18
+ if not serphouse_api_key:
19
+ print("Warning: SERPHOUSE_API_KEY environment variable is not set.")
20
+
21
+ # Keyword extraction function (LLM-based)
22
+ def extract_keywords_with_llm(query):
23
+ if not api_key:
24
+ return "FW_API_KEY not set for LLM keyword extraction.", query
25
 
26
+ # Extract keywords using LLM (DeepSeek model)
27
+ url = "https://api.fireworks.ai/inference/v1/chat/completions"
28
+ payload = {
29
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
30
+ "max_tokens": 200,
31
+ "temperature": 0.1, # Low temperature for consistent results
32
+ "messages": [
33
+ {
34
+ "role": "system",
35
+ "content": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem. Extract key search terms from the user's question that would be effective for web searches. Provide these as a search query with words separated by spaces only, without commas. For example: 'Prime Minister Han Duck-soo impeachment results'"
36
+ },
37
+ {
38
+ "role": "user",
39
+ "content": query
40
+ }
41
+ ]
42
+ }
43
+ headers = {
44
+ "Accept": "application/json",
45
+ "Content-Type": "application/json",
46
+ "Authorization": f"Bearer {api_key}"
47
+ }
48
+
49
+ try:
50
+ response = requests.post(url, headers=headers, json=payload)
51
+ response.raise_for_status()
52
+ result = response.json()
53
+
54
+ # Extract keywords from response
55
+ keywords = result["choices"][0]["message"]["content"].strip()
56
+
57
+ # Use original query if keywords are too long or improperly formatted
58
+ if len(keywords) > 100:
59
+ return f"Extracted keywords: {keywords}", query
60
+
61
+ return f"Extracted keywords: {keywords}", keywords
62
+
63
+ except Exception as e:
64
+ print(f"Error during keyword extraction: {str(e)}")
65
+ return f"Error during keyword extraction: {str(e)}", query
66
+
67
+ # Search function using SerpHouse API
68
+ def search_with_serphouse(query):
69
+ if not serphouse_api_key:
70
+ return "SERPHOUSE_API_KEY is not set."
71
+
72
+ try:
73
+ # Extract keywords
74
+ extraction_result, search_query = extract_keywords_with_llm(query)
75
+ print(f"Original query: {query}")
76
+ print(extraction_result)
77
+
78
+ # Basic GET method seems best after analyzing documentation
79
+ url = "https://api.serphouse.com/serp/live"
80
+
81
+ # Check if query is in Korean
82
+ is_korean = any('\uAC00' <= c <= '\uD7A3' for c in search_query)
83
+
84
+ # Simplified parameters
85
+ params = {
86
+ "q": search_query,
87
+ "domain": "google.com",
88
+ "serp_type": "web", # Changed to basic web search
89
+ "device": "desktop",
90
+ "lang": "ko" if is_korean else "en"
91
+ }
92
+
93
+ headers = {
94
+ "Authorization": f"Bearer {serphouse_api_key}"
95
+ }
96
+
97
+ print(f"Calling SerpHouse API with basic GET method...")
98
+ print(f"Search term: {search_query}")
99
+ print(f"Request URL: {url} - Parameters: {params}")
100
+
101
+ # Execute GET request
102
+ response = requests.get(url, headers=headers, params=params)
103
+ response.raise_for_status()
104
+
105
+ print(f"SerpHouse API response status code: {response.status_code}")
106
+ search_results = response.json()
107
+
108
+ # Check response structure
109
+ print(f"Response structure: {list(search_results.keys()) if isinstance(search_results, dict) else 'Not a dictionary'}")
110
+
111
+ # Parse and format search results (in Markdown)
112
+ formatted_results = []
113
+ formatted_results.append(f"## Search term: {search_query}\n\n")
114
+
115
+ # Handle various possible response structures
116
+ organic_results = None
117
+
118
+ # Possible response structure 1
119
+ if "results" in search_results and "organic" in search_results["results"]:
120
+ organic_results = search_results["results"]["organic"]
121
+
122
+ # Possible response structure 2
123
+ elif "organic" in search_results:
124
+ organic_results = search_results["organic"]
125
+
126
+ # Possible response structure 3 (nested results)
127
+ elif "results" in search_results and "results" in search_results["results"]:
128
+ if "organic" in search_results["results"]["results"]:
129
+ organic_results = search_results["results"]["results"]["organic"]
130
+
131
+ # Process organic results if available
132
+ if organic_results and len(organic_results) > 0:
133
+ # Output response structure
134
+ print(f"First organic result structure: {organic_results[0].keys() if len(organic_results) > 0 else 'empty'}")
135
+
136
+ for i, result in enumerate(organic_results[:5], 1): # Show only top 5 results
137
+ title = result.get("title", "No title")
138
+ snippet = result.get("snippet", "No content")
139
+ link = result.get("link", "#")
140
+ displayed_link = result.get("displayed_link", link)
141
+
142
+ # Format in Markdown (including number and link)
143
+ formatted_results.append(
144
+ f"### {i}. [{title}]({link})\n\n"
145
+ f"{snippet}\n\n"
146
+ f"**Source**: [{displayed_link}]({link})\n\n"
147
+ f"---\n\n"
148
+ )
149
+
150
+ print(f"Found {len(organic_results)} search results")
151
+ return "".join(formatted_results)
152
+
153
+ # Handle case with no results or unexpected structure
154
+ print("No search results or unexpected response structure")
155
+ print(f"Detailed response structure: {search_results.keys() if hasattr(search_results, 'keys') else 'Unclear structure'}")
156
+
157
+ # Find error messages in response
158
+ error_msg = "No search results found or response format is different than expected"
159
+ if "error" in search_results:
160
+ error_msg = search_results["error"]
161
+ elif "message" in search_results:
162
+ error_msg = search_results["message"]
163
+
164
+ return f"## Results for '{search_query}'\n\n{error_msg}"
165
+
166
+ except Exception as e:
167
+ error_msg = f"Error during search: {str(e)}"
168
+ print(error_msg)
169
+ import traceback
170
+ print(traceback.format_exc())
171
+
172
+ # Add API request details for debugging (in Markdown)
173
+ return f"## Error Occurred\n\n" + \
174
+ f"An error occurred during search: **{str(e)}**\n\n" + \
175
+ f"### API Request Details:\n" + \
176
+ f"- **URL**: {url}\n" + \
177
+ f"- **Search Term**: {search_query}\n" + \
178
+ f"- **Parameters**: {params}\n"
179
+
180
+ # Function to call DeepSeek API with streaming
181
+ def query_deepseek_streaming(message, history, use_deep_research):
182
+ if not api_key:
183
+ yield history, "Environment variable FW_API_KEY is not set. Please check the environment variables on the server."
184
+ return
185
+
186
+ search_context = ""
187
+ search_info = ""
188
+ if use_deep_research:
189
+ try:
190
+ # Start search (first message)
191
+ yield history + [(message, "🔍 Extracting optimal keywords and searching the web...")], ""
192
+
193
+ # Execute search - add logs for debugging
194
+ print(f"Deep Research activated: Starting search for '{message}'")
195
+ search_results = search_with_serphouse(message)
196
+ print(f"Search results received: {search_results[:100]}...") # Output first part of results
197
+
198
+ if not search_results.startswith("Error during search") and not search_results.startswith("SERPHOUSE_API_KEY"):
199
+ search_context = f"""
200
+ Here are recent search results related to the user's question. Use this information to provide an accurate response with the latest information:
201
 
202
+ {search_results}
 
 
 
 
203
 
204
+ Based on the above search results, answer the user's question. If you cannot find a clear answer in the search results, use your knowledge to provide the best answer.
205
+ When citing search results, mention the source, and ensure your answer reflects the latest information.
206
+ """
207
+ search_info = f"🔍 Deep Research feature activated: Generating response based on relevant web search results..."
208
+ else:
209
+ print(f"Search failed or no results: {search_results}")
210
+ except Exception as e:
211
+ print(f"Exception occurred during Deep Research: {str(e)}")
212
+ search_info = f"🔍 Deep Research feature error: {str(e)}"
213
+
214
+ # Prepare conversation history for API request
215
+ messages = []
216
+ for user, assistant in history:
217
+ messages.append({"role": "user", "content": user})
218
+ messages.append({"role": "assistant", "content": assistant})
219
+
220
+ # Add system message with search context if available
221
+ if search_context:
222
+ # DeepSeek model supports system messages
223
+ messages.insert(0, {"role": "system", "content": search_context})
224
 
225
+ # Add new user message
226
+ messages.append({"role": "user", "content": message})
 
227
 
228
+ # Prepare API request
229
+ url = "https://api.fireworks.ai/inference/v1/chat/completions"
230
+ payload = {
231
+ "model": "accounts/fireworks/models/llama4-maverick-instruct-basic",
232
+ "max_tokens": 20480,
233
+ "top_p": 1,
234
+ "top_k": 40,
235
+ "presence_penalty": 0,
236
+ "frequency_penalty": 0,
237
+ "temperature": 0.6,
238
+ "messages": messages,
239
+ "stream": True # Enable streaming
240
+ }
241
+ headers = {
242
+ "Accept": "application/json",
243
+ "Content-Type": "application/json",
244
+ "Authorization": f"Bearer {api_key}"
245
+ }
246
+
247
+ try:
248
+ # Request streaming response
249
+ response = requests.request("POST", url, headers=headers, data=json.dumps(payload), stream=True)
250
+ response.raise_for_status() # Raise exception for HTTP errors
251
+
252
+ # Add message and start with initial response
253
+ new_history = history.copy()
254
+
255
+ # Include search_info in starting message if available
256
+ start_msg = search_info if search_info else ""
257
+ new_history.append((message, start_msg))
258
+
259
+ # Full response text
260
+ full_response = start_msg
261
+
262
+ # Process streaming response
263
+ for line in response.iter_lines():
264
+ if line:
265
+ line_text = line.decode('utf-8')
266
+
267
+ # Remove 'data: ' prefix
268
+ if line_text.startswith("data: "):
269
+ line_text = line_text[6:]
270
+
271
+ # Check for stream end message
272
+ if line_text == "[DONE]":
273
+ break
274
+
275
+ try:
276
+ # Parse JSON
277
+ chunk = json.loads(line_text)
278
+ chunk_content = chunk.get("choices", [{}])[0].get("delta", {}).get("content", "")
279
+
280
+ if chunk_content:
281
+ full_response += chunk_content
282
+ # Update chat history
283
+ new_history[-1] = (message, full_response)
284
+ yield new_history, ""
285
+ except json.JSONDecodeError:
286
+ continue
287
+
288
+ # Return final response
289
+ yield new_history, ""
290
+
291
+ except requests.exceptions.RequestException as e:
292
+ error_msg = f"API error: {str(e)}"
293
+ if hasattr(e, 'response') and e.response and e.response.status_code == 401:
294
+ error_msg = "Authentication failed. Please check your FW_API_KEY environment variable."
295
+ yield history, error_msg
296
 
297
+ # Create Gradio interface
298
+ with gr.Blocks(theme="soft", fill_height=True) as demo:
299
+ # Header section
300
+ gr.Markdown(
301
+ """
302
+ # 🤖 Llama-4-Maverick-17B + Research
303
+ ### Llama-4-Maverick-17B Model + Real-time 'Deep Research' Agentic AI System @ https://discord.gg/openfreeai
304
+ """
305
+ )
306
+
307
+ # Main layout
308
+ with gr.Row():
309
+ # Main content area
310
+ with gr.Column():
311
+ # Chat interface
312
+ chatbot = gr.Chatbot(
313
+ height=500,
314
+ show_label=False,
315
+ container=True
 
 
 
 
 
 
 
 
 
 
 
 
316
  )
317
+
318
+ # Add Deep Research toggle and status display
319
+ with gr.Row():
320
+ with gr.Column(scale=3):
321
+ use_deep_research = gr.Checkbox(
322
+ label="Enable Deep Research",
323
+ info="Utilize optimal keyword extraction and web search for latest information",
324
+ value=False
325
+ )
326
+ with gr.Column(scale=1):
327
+ api_status = gr.Markdown("API Status: Ready")
328
+
329
+ # Check and display API key status
330
+ if not serphouse_api_key:
331
+ api_status.value = "⚠️ SERPHOUSE_API_KEY is not set"
332
+ if not api_key:
333
+ api_status.value = "⚠️ FW_API_KEY is not set"
334
+ if api_key and serphouse_api_key:
335
+ api_status.value = "✅ API keys configured"
336
+
337
+ # Input area
338
+ with gr.Row():
339
+ msg = gr.Textbox(
340
+ label="Message",
341
+ placeholder="Enter your prompt here...",
342
+ show_label=False,
343
+ scale=9
344
+ )
345
+ submit = gr.Button("Send", variant="primary", scale=1)
346
+
347
+ # Clear conversation button
348
+ with gr.Row():
349
+ clear = gr.ClearButton([msg, chatbot], value="🧹 Clear Conversation")
350
+
351
+ # Example queries
352
+ gr.Examples(
353
+ examples=[
354
+ "Explain the difference between Transformers and RNNs in deep learning.",
355
+ "Write a Python function to find prime numbers within a specific range.",
356
+ "Summarize the key concepts of reinforcement learning."
357
+ ],
358
+ inputs=msg
359
  )
360
+
361
+ # Error message display
362
+ error_box = gr.Markdown("")
363
 
364
+ # Connect buttons to functions
365
+ submit.click(
366
+ query_deepseek_streaming,
367
+ inputs=[msg, chatbot, use_deep_research],
368
+ outputs=[chatbot, error_box]
369
+ ).then(
370
+ lambda: "",
371
+ None,
372
+ [msg]
373
+ )
374
+
375
+ # Allow Enter key submission
376
+ msg.submit(
377
+ query_deepseek_streaming,
378
+ inputs=[msg, chatbot, use_deep_research],
379
+ outputs=[chatbot, error_box]
380
+ ).then(
381
+ lambda: "",
382
+ None,
383
+ [msg]
384
+ )
385
+
386
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ # Run interface
389
+ if __name__ == "__main__":
390
+ demo = create_deepseek_interface()
391
+ demo.launch(debug=True)