Spaces:
Sleeping
Sleeping
import os | |
import json | |
import random | |
import time | |
from pathlib import Path | |
from src.generation.medical_generator import MedicalTextGenerator | |
# Check for Gemini API key | |
if not os.getenv('GEMINI_API_KEY'): | |
print("Please set the GEMINI_API_KEY environment variable:") | |
print("Windows PowerShell: $env:GEMINI_API_KEY='your-api-key-here'") | |
print("Windows CMD: set GEMINI_API_KEY=your-api-key-here") | |
exit(1) | |
# Ensure the output directory exists | |
output_dir = Path("data/synthetic") | |
output_dir.mkdir(parents=True, exist_ok=True) | |
# Initialize the generator | |
generator = MedicalTextGenerator() | |
# Define supported record types (using the keys from the generator's templates) | |
record_types = ["clinical_note", "discharge_summary", "lab_report"] | |
# Generate 100 mixed records | |
records = [] | |
for i in range(100): | |
# Randomly select record type | |
record_type = random.choice(record_types) | |
# Generate record using Hugging Face | |
try: | |
record = generator.generate_record(record_type, use_gemini=False) | |
print(f"Generated record {i+1}/100: {record_type}") | |
# Append record details | |
records.append({ | |
"id": i + 1, | |
"type": record_type, | |
"content": record, | |
"generator": "Hugging Face", | |
"generated_at": time.strftime("%Y-%m-%d %H:%M:%S") | |
}) | |
# Respect rate limits (e.g., 4 seconds between calls) | |
time.sleep(4) | |
except Exception as e: | |
print(f"Error generating record {i+1}: {str(e)}") | |
continue | |
# Save records to a JSON file | |
output_file = output_dir / "synthetic_records.json" | |
with open(output_file, "w") as f: | |
json.dump(records, f, indent=2) | |
print(f"\nGenerated {len(records)} records and saved to {output_file}") |