ReallyFloppyPenguin commited on
Commit
230e3e0
·
verified ·
1 Parent(s): bffeb3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -79
app.py CHANGED
@@ -3,7 +3,7 @@ import json
3
  import tempfile
4
  import os
5
  import re # For parsing conversation
6
- from typing import Union, Optional # Add Optional
7
  # Import the actual functions from synthgen
8
  from synthgen import (
9
  generate_synthetic_text,
@@ -154,25 +154,39 @@ def generate_prompts_ui(
154
 
155
  # --- Modified Generation Wrappers ---
156
 
157
- # Wrapper for text generation + JSON preparation
158
  def run_generation_and_prepare_json(
159
  prompt: str,
160
  model: str,
161
  num_samples: int,
162
- temperature: float, # Add settings
163
  top_p: float,
164
  max_tokens: int
165
- ):
166
  """Generates text samples and prepares a JSON file for download."""
167
- # Handle optional settings (Gradio might pass default if not interacted with)
168
- temp_val = temperature if temperature > 0 else None # Allow 0 but treat as None if needed? OpenRouter usually uses >0. Let's map 0 to None.
169
- top_p_val = top_p if 0 < top_p <= 1 else None # top_p must be > 0 and <= 1
170
- max_tokens_val = max_tokens if max_tokens > 0 else None # Max tokens should be positive
 
 
 
 
 
 
 
171
 
172
  if not prompt:
173
- return "Error: Please enter a prompt.", None
 
 
 
 
174
  if num_samples <= 0:
175
- return "Error: Number of samples must be positive.", None
 
 
 
176
 
177
  output_str = f"Generating {num_samples} samples using model '{model}'...\n"
178
  output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
@@ -180,48 +194,58 @@ def run_generation_and_prepare_json(
180
  results_list = []
181
 
182
  for i in range(num_samples):
183
- # Pass settings to the backend function
184
  generated_text = generate_synthetic_text(
185
- prompt,
186
- model,
187
- temperature=temp_val,
188
- top_p=top_p_val,
189
- max_tokens=max_tokens_val
190
  )
191
  output_str += f"--- Sample {i+1} ---\n"
192
  output_str += generated_text + "\n\n"
193
  if not generated_text.startswith("Error:"):
194
  results_list.append(generated_text)
195
- else:
196
- pass
197
 
198
  output_str += "="*20 + "\nGeneration complete (check results above for errors)."
199
  json_filepath = create_json_file(results_list, "text_samples.json")
200
- return output_str, json_filepath
 
 
 
 
 
201
 
202
 
203
- # Wrapper for conversation generation + JSON preparation
204
  def run_conversation_generation_and_prepare_json(
205
  system_prompts_text: str,
206
  model: str,
207
  num_turns: int,
208
- temperature: float, # Add settings
209
  top_p: float,
210
  max_tokens: int
211
- ):
212
  """Generates conversations and prepares a JSON file for download."""
213
  temp_val = temperature if temperature > 0 else None
214
  top_p_val = top_p if 0 < top_p <= 1 else None
215
  max_tokens_val = max_tokens if max_tokens > 0 else None
216
 
 
 
 
217
  if not system_prompts_text:
218
- return "Error: Please enter or generate at least one system prompt/topic.", None
 
 
 
219
  if num_turns <= 0:
220
- return "Error: Number of turns must be positive.", None
 
 
 
221
 
222
  prompts = [p.strip() for p in system_prompts_text.strip().split('\n') if p.strip()]
223
  if not prompts:
224
- return "Error: No valid prompts found in the input.", None
 
 
 
225
 
226
  output_str = f"Generating {len(prompts)} conversations ({num_turns} turns each) using model '{model}'...\n"
227
  output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
@@ -229,70 +253,33 @@ def run_conversation_generation_and_prepare_json(
229
  results_list_structured = []
230
 
231
  for i, prompt in enumerate(prompts):
232
- # Pass settings to the backend function
233
  conversation_text = generate_synthetic_conversation(
234
- prompt,
235
- model,
236
- num_turns,
237
- temperature=temp_val,
238
- top_p=top_p_val,
239
- max_tokens=max_tokens_val
240
  )
241
-
242
  output_str += f"--- Conversation {i+1}/{len(prompts)} ---\n"
243
  output_str += conversation_text + "\n\n"
244
-
245
- # Parse the generated text block for JSON structure
246
- # Note: generate_synthetic_conversation includes a title like "Generated conversation for..."
247
- # We might want to remove that before parsing or adjust the parser.
248
- # Let's assume the core conversation starts after the first line break if a title exists.
249
  core_conversation_text = conversation_text
250
- if "\n\n" in conversation_text:
251
- # Split only if the separator is present and the text doesn't start with Error:
252
- if not conversation_text.startswith("Error:"):
253
- parts = conversation_text.split("\n\n", 1)
254
- if len(parts) > 1:
255
- core_conversation_text = parts[1]
256
- else: # Handle case where title might not have double newline
257
- core_conversation_text = conversation_text # Fallback to full text
258
- else:
259
- core_conversation_text = None # Don't try to parse errors
260
- elif conversation_text.startswith("Error:"):
261
- core_conversation_text = None # Don't try to parse errors
262
- # Else: No double newline, assume the whole text is the conversation (or error)
263
-
264
  if core_conversation_text:
265
  messages = parse_conversation_string(core_conversation_text)
266
- if messages: # Add only if parsing was successful
267
- results_list_structured.append({
268
- "prompt": prompt,
269
- "messages": messages
270
- })
271
- else: # Parsing failed, optionally add raw text or error placeholder
272
- results_list_structured.append({
273
- "prompt": prompt,
274
- "error": "Failed to parse conversation structure.",
275
- "raw_text": core_conversation_text # Include raw text if parsing failed
276
- })
277
- elif conversation_text.startswith("Error:"):
278
- results_list_structured.append({
279
- "prompt": prompt,
280
- "error": conversation_text # Include the error message from generation
281
- })
282
- else: # Handle case where core_conversation_text became None unexpectedly or original text was just a title
283
- results_list_structured.append({
284
- "prompt": prompt,
285
- "error": "Could not extract conversation content for parsing.",
286
- "raw_text": conversation_text
287
- })
288
-
289
 
290
  output_str += "="*40 + "\nGeneration complete (check results above for errors)."
291
-
292
- # Create JSON file from the structured list
293
  json_filepath = create_json_file(results_list_structured, "conversations.json")
294
 
295
- return output_str, json_filepath
 
 
 
 
296
 
297
 
298
  # --- Gradio Interface Definition ---
@@ -419,4 +406,4 @@ if __name__ == "__main__":
419
  print("Launching Gradio App...")
420
  print("Make sure the OPENROUTER_API_KEY environment variable is set.")
421
  # Use share=True for temporary public link if running locally and need to test
422
- demo.launch(share=True) # share=True
 
3
  import tempfile
4
  import os
5
  import re # For parsing conversation
6
+ from typing import Union, Optional, Dict # Import Dict
7
  # Import the actual functions from synthgen
8
  from synthgen import (
9
  generate_synthetic_text,
 
154
 
155
  # --- Modified Generation Wrappers ---
156
 
157
+ # Wrapper for text generation + JSON preparation - RETURNS DICT
158
  def run_generation_and_prepare_json(
159
  prompt: str,
160
  model: str,
161
  num_samples: int,
162
+ temperature: float,
163
  top_p: float,
164
  max_tokens: int
165
+ ) -> Dict[gr.Textbox, str]: # Return type hint (optional but good practice)
166
  """Generates text samples and prepares a JSON file for download."""
167
+ # Handle optional settings
168
+ temp_val = temperature if temperature > 0 else None
169
+ top_p_val = top_p if 0 < top_p <= 1 else None
170
+ max_tokens_val = max_tokens if max_tokens > 0 else None
171
+
172
+ # Define component objects used in return dict keys - MUST MATCH OUTPUTS
173
+ # This requires the components to be defined *before* this function,
174
+ # which isn't the case. So we cannot use component objects as keys here.
175
+ # Gradio handles mapping if the keys are strings matching component labels
176
+ # OR if we return gr.update targeting components.
177
+ # Let's return explicit gr.update for clarity and robustness.
178
 
179
  if not prompt:
180
+ # Return updates for both outputs
181
+ return {
182
+ output_text: gr.update(value="Error: Please enter a prompt."),
183
+ download_file_text: gr.update(value=None) # Clear file output
184
+ }
185
  if num_samples <= 0:
186
+ return {
187
+ output_text: gr.update(value="Error: Number of samples must be positive."),
188
+ download_file_text: gr.update(value=None)
189
+ }
190
 
191
  output_str = f"Generating {num_samples} samples using model '{model}'...\n"
192
  output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
 
194
  results_list = []
195
 
196
  for i in range(num_samples):
 
197
  generated_text = generate_synthetic_text(
198
+ prompt, model, temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
 
 
 
 
199
  )
200
  output_str += f"--- Sample {i+1} ---\n"
201
  output_str += generated_text + "\n\n"
202
  if not generated_text.startswith("Error:"):
203
  results_list.append(generated_text)
 
 
204
 
205
  output_str += "="*20 + "\nGeneration complete (check results above for errors)."
206
  json_filepath = create_json_file(results_list, "text_samples.json")
207
+
208
+ # Return dictionary mapping components to updates
209
+ return {
210
+ output_text: gr.update(value=output_str),
211
+ download_file_text: gr.update(value=json_filepath) # Update file path
212
+ }
213
 
214
 
215
+ # Wrapper for conversation generation + JSON preparation - RETURNS DICT
216
  def run_conversation_generation_and_prepare_json(
217
  system_prompts_text: str,
218
  model: str,
219
  num_turns: int,
220
+ temperature: float,
221
  top_p: float,
222
  max_tokens: int
223
+ ) -> Dict[gr.Textbox, str]: # Return type hint (optional)
224
  """Generates conversations and prepares a JSON file for download."""
225
  temp_val = temperature if temperature > 0 else None
226
  top_p_val = top_p if 0 < top_p <= 1 else None
227
  max_tokens_val = max_tokens if max_tokens > 0 else None
228
 
229
+ # Define component objects used in return dict keys - requires components defined first.
230
+ # Using explicit gr.update instead.
231
+
232
  if not system_prompts_text:
233
+ return {
234
+ output_conv: gr.update(value="Error: Please enter or generate at least one system prompt/topic."),
235
+ download_file_conv: gr.update(value=None)
236
+ }
237
  if num_turns <= 0:
238
+ return {
239
+ output_conv: gr.update(value="Error: Number of turns must be positive."),
240
+ download_file_conv: gr.update(value=None)
241
+ }
242
 
243
  prompts = [p.strip() for p in system_prompts_text.strip().split('\n') if p.strip()]
244
  if not prompts:
245
+ return {
246
+ output_conv: gr.update(value="Error: No valid prompts found in the input."),
247
+ download_file_conv: gr.update(value=None)
248
+ }
249
 
250
  output_str = f"Generating {len(prompts)} conversations ({num_turns} turns each) using model '{model}'...\n"
251
  output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n"
 
253
  results_list_structured = []
254
 
255
  for i, prompt in enumerate(prompts):
 
256
  conversation_text = generate_synthetic_conversation(
257
+ prompt, model, num_turns, temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
 
 
 
 
 
258
  )
 
259
  output_str += f"--- Conversation {i+1}/{len(prompts)} ---\n"
260
  output_str += conversation_text + "\n\n"
261
+ # --- Parsing Logic ---
 
 
 
 
262
  core_conversation_text = conversation_text
263
+ if conversation_text.startswith("Error:"): core_conversation_text = None
264
+ elif "\n\n" in conversation_text:
265
+ parts = conversation_text.split("\n\n", 1)
266
+ core_conversation_text = parts[1] if len(parts) > 1 else conversation_text
 
 
 
 
 
 
 
 
 
 
267
  if core_conversation_text:
268
  messages = parse_conversation_string(core_conversation_text)
269
+ if messages: results_list_structured.append({"prompt": prompt, "messages": messages})
270
+ else: results_list_structured.append({"prompt": prompt, "error": "Failed to parse structure.", "raw_text": core_conversation_text})
271
+ elif conversation_text.startswith("Error:"): results_list_structured.append({"prompt": prompt, "error": conversation_text})
272
+ else: results_list_structured.append({"prompt": prompt, "error": "Could not extract content.", "raw_text": conversation_text})
273
+ # --- End Parsing Logic ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
  output_str += "="*40 + "\nGeneration complete (check results above for errors)."
 
 
276
  json_filepath = create_json_file(results_list_structured, "conversations.json")
277
 
278
+ # Return dictionary mapping components to updates
279
+ return {
280
+ output_conv: gr.update(value=output_str),
281
+ download_file_conv: gr.update(value=json_filepath)
282
+ }
283
 
284
 
285
  # --- Gradio Interface Definition ---
 
406
  print("Launching Gradio App...")
407
  print("Make sure the OPENROUTER_API_KEY environment variable is set.")
408
  # Use share=True for temporary public link if running locally and need to test
409
+ demo.launch() # share=True