Daemontatox commited on
Commit
4fa7ac8
·
verified ·
1 Parent(s): 2ea23a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -122
app.py CHANGED
@@ -8,12 +8,12 @@ from PIL import Image
8
  import gradio as gr
9
  from openai import OpenAI # Use the OpenAI client that supports multimodal messages
10
 
11
- # Load API key from environment variable
12
  HF_API_KEY = os.getenv("OPENAI_TOKEN")
13
  if not HF_API_KEY:
14
- raise ValueError("OPENAI_TOKEN environment variable not set")
15
 
16
- # Create the client pointing to the inference endpoint (e.g., OpenRouter)
17
  client = OpenAI(
18
  base_url="https://openrouter.ai/api/v1",
19
  api_key=HF_API_KEY
@@ -50,13 +50,15 @@ def process_pdf_file(file_path):
50
  page = doc[page_num]
51
  page_text = page.get_text("text")
52
  if page_text.strip():
53
- text += f"Page {page_num+1}:\n{page_text}\n\n"
 
54
  # Render page as an image with a zoom factor
55
  zoom = 3
56
  mat = fitz.Matrix(zoom, zoom)
57
  pix = page.get_pixmap(matrix=mat, alpha=False)
58
  img_data = pix.tobytes("png")
59
  img = Image.open(io.BytesIO(img_data)).convert("RGB")
 
60
  # Resize if image is too large
61
  max_size = 1600
62
  if max(img.size) > max_size:
@@ -82,7 +84,7 @@ def process_uploaded_file(file):
82
  if file is None:
83
  return "No file uploaded. Please upload a file."
84
 
85
- # Gradio may pass a dict or a file-like object
86
  if isinstance(file, dict):
87
  file_path = file["name"]
88
  else:
@@ -94,7 +96,7 @@ def process_uploaded_file(file):
94
  doc_state.doc_type = 'pdf'
95
  try:
96
  doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
97
- return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now chat with the bot."
98
  except Exception as e:
99
  return f"Error processing PDF: {str(e)}. Please try a different PDF file."
100
  elif file_ext in image_extensions:
@@ -107,7 +109,7 @@ def process_uploaded_file(file):
107
  new_size = tuple(int(dim * ratio) for dim in img.size)
108
  img = img.resize(new_size, Image.Resampling.LANCZOS)
109
  doc_state.current_doc_images = [img]
110
- return "Image loaded successfully. You can now chat with the bot."
111
  except Exception as e:
112
  return f"Error processing image: {str(e)}. Please try a different image file."
113
  else:
@@ -116,109 +118,196 @@ def process_uploaded_file(file):
116
  logger.error(f"Error in process_uploaded_file: {str(e)}")
117
  return "An error occurred while processing the file. Please try again."
118
 
119
- def clear_context():
120
- """Clear the current document context and chat history."""
121
- doc_state.clear()
122
- return "Document context cleared. You can upload a new document.", []
123
-
124
  # -------------------------------
125
- # Predetermined Prompts
126
  # -------------------------------
127
- predetermined_prompts = {
128
- "NOC Timesheet": (
129
- "Extract structured information from the provided timesheet. The extracted details should include:\n"
130
- "Name, Position Title, Work Location, Contractor, NOC ID, Month and Year, Regular Service Days, "
131
- "Standby Days, Offshore Days, Extended Hitch Days, and approvals. Format the output as valid JSON."
132
- ),
133
- "Aramco Full structured": (
134
- "You are a document parsing assistant designed to extract structured data from various documents such as "
135
- "invoices, timesheets, purchase orders, and travel bookings. Return only valid JSON with no extra text."
136
- ),
137
- "Aramco Timesheet only": (
138
- "Extract time tracking, work details, and approvals. Return a JSON object following the specified structure."
139
- ),
140
- "NOC Invoice": (
141
- "You are a highly accurate data extraction system. Analyze the provided invoice image and extract all data "
142
- "into the following JSON format:\n"
143
- "{\n 'invoiceDetails': { ... },\n 'from': { ... },\n 'to': { ... },\n 'services': [ ... ],\n "
144
- "'totals': { ... },\n 'bankDetails': { ... }\n}"
145
- ),
146
- "Software Tester": (
147
- "Act as a software tester. Analyze the uploaded image of a software interface and generate comprehensive "
148
- "test cases for its features. For each feature, provide test steps, expected results, and any necessary "
149
- "preconditions. Be as detailed as possible."
150
- )
151
- }
152
-
153
- # -------------------------------
154
- # Chat Function (Non-streaming Version)
155
- # -------------------------------
156
- def chat_respond(user_message, history, prompt_option):
157
  """
158
- Append the user message to the conversation history, call the API,
159
- and return the full response.
160
-
161
- Each message passed to the API is now a dictionary with a string value for 'content'.
162
- If an image was uploaded, its data URI is appended to the first user message.
163
- The conversation history is a list of [user_text, assistant_text] pairs.
164
  """
165
- # On the first message, if none is provided, use the predetermined prompt.
166
- if history == []:
167
- if not user_message.strip():
168
- user_message = predetermined_prompts.get(prompt_option, "Hello")
169
- else:
170
- user_message = predetermined_prompts.get(prompt_option, "") + "\n" + user_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- history = history + [[user_message, ""]]
173
 
174
- messages = []
175
- # Build the messages list with each message as a dictionary containing role and a string content.
176
- for i, (user_msg, assistant_msg) in enumerate(history):
177
- # For the very first user message, attach the image (if available) by appending its data URI.
178
- if i == 0 and doc_state.current_doc_images:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  buffered = io.BytesIO()
180
  doc_state.current_doc_images[0].save(buffered, format="PNG")
181
  img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
182
  data_uri = f"data:image/png;base64,{img_b64}"
183
- text_to_send = user_msg + "\n[Attached Image: " + data_uri + "]"
184
- else:
185
- text_to_send = user_msg
186
- messages.append({"role": "user", "content": text_to_send})
187
- if assistant_msg:
188
- messages.append({"role": "assistant", "content": assistant_msg})
189
-
190
- try:
191
- # Call the API without streaming. The messages are now standard dictionaries.
192
- response = client.chat.completions.create(
193
- model="qwen/qwen-vl-plus:free",
194
  messages=messages,
195
- max_tokens=500
 
196
  )
197
- except Exception as e:
198
- logger.error(f"Error calling the API: {str(e)}")
199
- history[-1][1] = "An error occurred while processing your request. Please check your API credentials."
200
- return history, history
 
 
 
 
201
 
202
- # Assuming the API returns a standard completion response, extract the assistant's reply.
203
- try:
204
- full_response = response.choices[0].message["content"]
205
  except Exception as e:
206
- logger.error(f"Error extracting API response: {str(e)}")
207
- full_response = "An error occurred while processing the API response."
208
 
209
- history[-1][1] = full_response
210
- return history, history
 
 
211
 
212
  # -------------------------------
213
  # Create the Gradio Interface
214
  # -------------------------------
215
  with gr.Blocks() as demo:
216
- gr.Markdown("# Document Analyzer & Software Testing Chatbot")
217
- gr.Markdown(
218
- "Upload a PDF or an image (PNG, JPG, JPEG, GIF, BMP, WEBP). Then choose a prompt from the dropdown. "
219
- "For example, select **Software Tester** to have the bot analyze an image of a software interface "
220
- "and generate test cases. You can also chat with the model—the conversation history is preserved."
221
- )
222
 
223
  with gr.Row():
224
  file_upload = gr.File(
@@ -230,38 +319,16 @@ with gr.Blocks() as demo:
230
  with gr.Row():
231
  prompt_dropdown = gr.Dropdown(
232
  label="Select Prompt",
233
- choices=[
234
- "NOC Timesheet",
235
- "Aramco Full structured",
236
- "Aramco Timesheet only",
237
- "NOC Invoice",
238
- "Software Tester"
239
- ],
240
- value="Software Tester"
241
  )
242
- clear_btn = gr.Button("Clear Document Context & Chat History")
243
-
244
- # Set type='messages' to avoid deprecation warnings.
245
- chatbot = gr.Chatbot(label="Chat History", type="messages", elem_id="chatbot")
246
 
247
- with gr.Row():
248
- user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...", show_label=False)
249
- send_btn = gr.Button("Send")
250
-
251
- # State to hold the conversation history
252
- chat_state = gr.State([])
253
-
254
- # When a file is uploaded, process it.
255
- file_upload.change(fn=process_uploaded_file, inputs=file_upload, outputs=upload_status)
256
 
257
- # Clear document context and chat history.
258
- clear_btn.click(fn=clear_context, outputs=[upload_status, chat_state])
259
-
260
- # When the user clicks Send, process the message and update the chat.
261
- send_btn.click(
262
- fn=chat_respond,
263
- inputs=[user_input, chat_state, prompt_dropdown],
264
- outputs=[chatbot, chat_state]
265
- )
266
-
267
- demo.launch(debug=True)
 
8
  import gradio as gr
9
  from openai import OpenAI # Use the OpenAI client that supports multimodal messages
10
 
11
+ # Load API key from environment variable (secrets)
12
  HF_API_KEY = os.getenv("OPENAI_TOKEN")
13
  if not HF_API_KEY:
14
+ raise ValueError("HF_API_KEY environment variable not set")
15
 
16
+ # Create the client pointing to the Hugging Face Inference endpoint
17
  client = OpenAI(
18
  base_url="https://openrouter.ai/api/v1",
19
  api_key=HF_API_KEY
 
50
  page = doc[page_num]
51
  page_text = page.get_text("text")
52
  if page_text.strip():
53
+ text += f"Page {page_num + 1}:\n{page_text}\n\n"
54
+
55
  # Render page as an image with a zoom factor
56
  zoom = 3
57
  mat = fitz.Matrix(zoom, zoom)
58
  pix = page.get_pixmap(matrix=mat, alpha=False)
59
  img_data = pix.tobytes("png")
60
  img = Image.open(io.BytesIO(img_data)).convert("RGB")
61
+
62
  # Resize if image is too large
63
  max_size = 1600
64
  if max(img.size) > max_size:
 
84
  if file is None:
85
  return "No file uploaded. Please upload a file."
86
 
87
+ # Get the file path from the Gradio upload (may be a dict or file-like object)
88
  if isinstance(file, dict):
89
  file_path = file["name"]
90
  else:
 
96
  doc_state.doc_type = 'pdf'
97
  try:
98
  doc_state.current_doc_images, doc_state.current_doc_text = process_pdf_file(file_path)
99
+ return f"PDF processed successfully. Total pages: {len(doc_state.current_doc_images)}. You can now ask questions about the content."
100
  except Exception as e:
101
  return f"Error processing PDF: {str(e)}. Please try a different PDF file."
102
  elif file_ext in image_extensions:
 
109
  new_size = tuple(int(dim * ratio) for dim in img.size)
110
  img = img.resize(new_size, Image.Resampling.LANCZOS)
111
  doc_state.current_doc_images = [img]
112
+ return "Image loaded successfully. You can now ask questions about the content."
113
  except Exception as e:
114
  return f"Error processing image: {str(e)}. Please try a different image file."
115
  else:
 
118
  logger.error(f"Error in process_uploaded_file: {str(e)}")
119
  return "An error occurred while processing the file. Please try again."
120
 
 
 
 
 
 
121
  # -------------------------------
122
+ # Bot Streaming Function Using the Multimodal API
123
  # -------------------------------
124
+ def bot_streaming(prompt_option, max_new_tokens=500):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  """
126
+ Build a multimodal message payload and call the inference API.
127
+ The payload includes:
128
+ - A text segment (the selected prompt and any document context).
129
+ - If available, an image as a data URI (using a base64-encoded PNG).
 
 
130
  """
131
+ try:
132
+ # Predetermined prompts (you can adjust these as needed)
133
+ prompts = {
134
+ "Software Tester": (
135
+ """
136
+ You are TestCraft AI, a specialized large language model designed to be the ultimate software testing expert. Your primary function is to generate comprehensive, effective, and insightful test cases based on provided input, primarily in the form of images (screenshots, UI mockups, diagrams) and PDF documents (requirements specifications, user stories, design documents). You are not a general-purpose chatbot; your focus is exclusively on software testing.
137
+
138
+ **Your Capabilities:**
139
+
140
+ * **Input Interpretation:** You can accurately interpret the content of images and PDFs. This includes:
141
+ * **OCR (Optical Character Recognition):** Extract text from images and PDFs.
142
+ * **Object Detection:** Identify UI elements (buttons, text fields, dropdowns, checkboxes, images, tables, etc.) in images.
143
+ * **Layout Analysis:** Understand the structure and relationships between elements in images and documents (e.g., hierarchical relationships, proximity, alignment).
144
+ * **Document Structure Understanding:** Identify sections, headings, paragraphs, lists, tables, and figures within PDFs.
145
+ * **Requirement Extraction:** Identify explicit and implicit requirements, user stories, and acceptance criteria from textual content.
146
+ * **Diagram Interpretation:** If the image or PDF contains diagrams (flowcharts, state diagrams, etc.), understand their logic and transitions.
147
+
148
+ * **Test Case Generation:** You can generate a wide variety of test cases, including but not limited to:
149
+ * **Functional Tests:** Verify that features work as expected based on the requirements and UI.
150
+ * **UI/UX Tests:** Assess the usability, accessibility, and visual correctness of the user interface.
151
+ * **Boundary Value Tests:** Test input fields with values at the minimum, maximum, and just inside/outside the valid range.
152
+ * **Equivalence Partitioning Tests:** Group similar inputs and test one representative value from each group.
153
+ * **Error Handling Tests:** Verify how the application handles invalid input, unexpected conditions, and errors.
154
+ * **Accessibility Tests:** Check compliance with accessibility guidelines (e.g., WCAG) regarding text alternatives, keyboard navigation, color contrast, etc.
155
+ * **Performance Tests (Basic):** Generate basic performance-related test ideas (e.g., "Verify response time for button click is less than 2 seconds"). *Note: You cannot execute performance tests, only suggest them.*
156
+ * **Security Tests (Basic):** Generate basic security-related test ideas (e.g., "Verify input fields are sanitized against XSS attacks"). *Note: You cannot execute security tests, only suggest them.*
157
+ * **Compatibility Tests (Basic):** Generate basic compatibility testing ideas, if information about target platforms is available (e.g. browsers, OS).
158
+
159
+ * **Test Case Format:** Output test cases in a clear, structured, and consistent format. Each test case MUST include:
160
+ * **Test Case ID:** A unique identifier (e.g., TC-001, TC-002).
161
+ * **Test Case Title:** A brief, descriptive name for the test case.
162
+ * **Test Steps:** A numbered sequence of actions to perform. Be precise and unambiguous. Use user-centric language (e.g., "Click the 'Submit' button," not "Interact with element ID XYZ").
163
+ * **Expected Result:** The anticipated outcome of each step and the overall test case. Be specific.
164
+ * **Test Data (if applicable):** Specific input values or data to be used.
165
+ * **Priority (Optional):** High, Medium, or Low, based on your assessment of the criticality of the feature being tested.
166
+ * **Type (Optional):** Functional, UI, Accessibility, Performance, etc.
167
+ * **Requirement/User Story Reference (if applicable):** Link the test case back to a specific requirement or user story extracted from the input.
168
+
169
+ * **Prioritization and Rationale:** You should be able to prioritize test cases based on risk, importance, and likelihood of finding defects. Explain *why* you assigned a particular priority. If you make any assumptions, state them clearly.
170
+
171
+ * **Contextual Understanding:** You strive to understand the *purpose* of the software being tested. If the input provides clues about the application's domain (e.g., e-commerce, banking, healthcare), tailor your test cases accordingly.
172
+
173
+ * **Continuous Learning (Hypothetical):** *While you cannot truly learn in the traditional sense, state that you are designed to improve your test case generation over time based on feedback and new information.* This sets the expectation of ongoing refinement.
174
+
175
+ **Instructions for Interaction:**
176
+
177
+ 1. **Provide Input:** The user will provide one or more images (PNG, JPG, etc.) or PDF documents.
178
+ 2. **Specify Test Scope (Optional):** The user may optionally specify the scope of testing (e.g., "Focus on the login functionality," "Generate UI tests only," "Test accessibility"). If no scope is provided, generate a comprehensive set of test cases.
179
+ 3. **Generate Test Cases:** You will generate test cases based on the input and any specified scope.
180
+ 4. **Provide Explanations:** Explain your reasoning behind the generated test cases, including any assumptions made, prioritization logic, and references to the input.
181
+ 5. **Handle Ambiguity:** If the input is ambiguous or incomplete, you will:
182
+ * **Make Reasonable Assumptions:** State your assumptions clearly.
183
+ * **Ask Clarifying Questions:** Present the user with specific, concise questions to resolve ambiguities. *Format these as a separate section labeled "Clarifying Questions."* Do *not* proceed with test case generation until the questions are answered.
184
+ 6. **Error Handling:** If you encounter an error (e.g., unable to process an image), provide a clear and informative error message.
185
+
186
+ **Example Output (Illustrative):**
187
+
188
+ **(Assuming input is a screenshot of a login form)**
189
+
190
+ **Test Cases:**
191
+
192
+ | Test Case ID | Test Case Title | Test Steps | Expected Result | Test Data | Priority | Type | Requirement Reference |
193
+ |--------------|--------------------------|-----------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|----------------------|----------|-------------|-----------------------|
194
+ | TC-001 | Valid Login | 1. Enter valid username. 2. Enter valid password. 3. Click the 'Login' button. | User is successfully logged in and redirected to the dashboard. | Username: testuser | High | Functional | Login-001 |
195
+ | | | | | Password: password123 | | | |
196
+ | TC-002 | Invalid Username | 1. Enter invalid username. 2. Enter valid password. 3. Click the 'Login' button. | Error message displayed: "Invalid username or password." User remains on the login page. | Username: invaliduser | High | Functional | Login-001 |
197
+ | | | | | Password: password123 | | | |
198
+ | TC-003 | Empty Username Field | 1. Leave the username field blank. 2. Enter valid password. 3. Click 'Login'. | Error message displayed: "Username is required." User remains on the login page. | Password: password123 | High | Functional | Login-001 |
199
+ | TC-004 | Password Field Masking | 1. Enter characters into the password field. | Characters are masked (e.g., displayed as dots or asterisks). | Any characters | Medium | UI | Login-002 |
200
+ | TC-005 | Forgot Password Link | 1. Click the "Forgot Password" link. | User is redirected to the "Forgot Password" page. | N/A | Medium | Functional | Login-003 |
201
+ | TC-006 | Check color contrast | 1. Inspect the text and background colors. | Text meets WCAG AA standard for color contrast. | N/A | High | Accessibility | Login-004 |
202
+
203
+ **Assumptions:**
204
+
205
+ * The dashboard is the expected landing page after successful login.
206
+ * The "Forgot Password" link exists (it might be present in the provided image).
207
+ * The system is using the most current WCAG standards
208
+
209
+ **Rationale:**
210
+
211
+ * TC-001 and TC-002 are high priority because they test the core login functionality.
212
+ * TC-003 checks for required field validation.
213
+ * TC-004 is a UI test to ensure password security.
214
+ * TC-006 ensure that the text is readable by users.
215
 
216
+ **Clarifying Questions:**
217
 
218
+ * None at this time.
219
+
220
+ ---
221
+
222
+ **Key Design Choices and Explanations:**
223
+
224
+ * **TestCraft AI Persona:** Giving the model a specific name and role helps to reinforce its purpose and limit its responses to the testing domain.
225
+ * **Comprehensive Capabilities:** The prompt explicitly lists the required skills (OCR, object detection, etc.) to ensure the model is capable of handling the input.
226
+ * **Structured Output:** The required test case format is clearly defined, promoting consistency and readability.
227
+ * **Prioritization and Rationale:** The model is explicitly instructed to prioritize and explain its reasoning, making the output more useful and insightful.
228
+ * **Contextual Understanding:** The model is encouraged to understand the *purpose* of the software, leading to more relevant test cases.
229
+ * **Ambiguity Handling:** The model is instructed to handle incomplete or ambiguous input gracefully by making assumptions and asking clarifying questions.
230
+ * **Optional Fields:** Priority and type fields are added in the test case structure.
231
+ * **Basic Testing Types:** Includes basic Performance and Security Testing.
232
+
233
+ **Potential Limitations and Mitigation Strategies:**
234
+
235
+ * **Limited "Real-World" Interaction:** The model cannot interact with a live application. It can only generate test cases based on static input. *Mitigation:* Clearly state this limitation.
236
+ * **Performance and Security Testing:** The model's capabilities in these areas are limited to generating basic test *ideas*. It cannot execute these tests. *Mitigation:* Explicitly state this limitation.
237
+ * **OCR and Object Detection Accuracy:** The accuracy of OCR and object detection may vary depending on the quality of the input images. *Mitigation:* Provide clear error messages if processing fails. Encourage users to provide high-quality images.
238
+ * **Complex Logic:** Interpreting complex business logic from images and PDFs may be challenging. *Mitigation:* The model should ask clarifying questions when necessary. Focus on clear and well-structured input documents.
239
+ * **"Hallucination":** Like all LLMs, there's a risk of the model generating incorrect or nonsensical information. *Mitigation:* Thorough testing and validation of the model's output are crucial. Encourage user feedback to identify and correct errors.
240
+
241
+ This comprehensive system prompt provides a strong foundation for building a powerful and effective software testing model. Remember to thoroughly test and refine the model's output based on real-world usage and feedback.
242
+
243
+ """
244
+ )
245
+ }
246
+
247
+ # Select the appropriate prompt
248
+ selected_prompt = prompts.get(prompt_option, "Invalid prompt selected.")
249
+ context = ""
250
+ if doc_state.current_doc_images and doc_state.current_doc_text:
251
+ context = "\nDocument context:\n" + doc_state.current_doc_text
252
+ full_prompt = selected_prompt + context
253
+
254
+ # Build the message payload in the expected format.
255
+ # The content field is a list of objects—one for text, and (if an image is available) one for the image.
256
+ messages = [
257
+ {
258
+ "role": "user",
259
+ "content": [
260
+ {
261
+ "type": "text",
262
+ "text": full_prompt
263
+ }
264
+ ]
265
+ }
266
+ ]
267
+
268
+ # If an image is available, encode it as a data URI and append it as an image_url message.
269
+ if doc_state.current_doc_images:
270
  buffered = io.BytesIO()
271
  doc_state.current_doc_images[0].save(buffered, format="PNG")
272
  img_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
273
+ # Create a data URI (many APIs accept this format in place of a public URL)
274
  data_uri = f"data:image/png;base64,{img_b64}"
275
+ messages[0]["content"].append({
276
+ "type": "image_url",
277
+ "image_url": {"url": data_uri}
278
+ })
279
+
280
+ # Call the inference API with streaming enabled.
281
+ stream = client.chat.completions.create(
282
+ model="google/gemini-2.0-pro-exp-02-05:free",
 
 
 
283
  messages=messages,
284
+ max_tokens=max_new_tokens,
285
+ stream=True
286
  )
287
+
288
+ buffer = ""
289
+ for chunk in stream:
290
+ # The response structure is similar to the reference: each chunk contains a delta.
291
+ delta = chunk.choices[0].delta.content
292
+ buffer += delta
293
+ time.sleep(0.01)
294
+ yield buffer
295
 
 
 
 
296
  except Exception as e:
297
+ logger.error(f"Error in bot_streaming: {str(e)}")
298
+ yield "An error occurred while processing your request. Please try again."
299
 
300
+ def clear_context():
301
+ """Clear the current document context."""
302
+ doc_state.clear()
303
+ return "Document context cleared. You can upload a new document."
304
 
305
  # -------------------------------
306
  # Create the Gradio Interface
307
  # -------------------------------
308
  with gr.Blocks() as demo:
309
+ gr.Markdown("# Document Analyzer with Predetermined Prompts")
310
+ gr.Markdown("Upload a PDF or image (PNG, JPG, JPEG, GIF, BMP, WEBP) and select a prompt to analyze its contents.")
 
 
 
 
311
 
312
  with gr.Row():
313
  file_upload = gr.File(
 
319
  with gr.Row():
320
  prompt_dropdown = gr.Dropdown(
321
  label="Select Prompt",
322
+ choices=["Software Tester"],
323
+ value="NOC Timesheet"
 
 
 
 
 
 
324
  )
325
+ generate_btn = gr.Button("Generate")
 
 
 
326
 
327
+ clear_btn = gr.Button("Clear Document Context")
328
+ output_text = gr.Textbox(label="Output", interactive=False)
 
 
 
 
 
 
 
329
 
330
+ file_upload.change(fn=process_uploaded_file, inputs=[file_upload], outputs=[upload_status])
331
+ generate_btn.click(fn=bot_streaming, inputs=[prompt_dropdown], outputs=[output_text])
332
+ clear_btn.click(fn=clear_context, outputs=[upload_status])
333
+
334
+ demo.launch(debug=True)