pdf_explainer / api /demo.py
spagestic's picture
api updated
d1c4aa1
raw
history blame
7.35 kB
#!/usr/bin/env python3
"""
Quick demonstration script for the Enhanced Chatterbox TTS API
Shows how to use the new full-text endpoints for processing long documents
"""
import requests
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def demo_full_text_processing():
"""Demonstrate full-text processing with a sample document"""
# Sample long text (like from a PDF)
sample_document = """
Artificial Intelligence has revolutionized numerous industries and continues to shape our world in unprecedented ways. From healthcare to transportation, AI systems are becoming increasingly sophisticated and capable of performing complex tasks that were once thought to be exclusively human domains.
In healthcare, AI-powered diagnostic systems can now identify diseases with remarkable accuracy, sometimes surpassing human doctors in specific areas. Machine learning algorithms analyze medical images, predict patient outcomes, and assist in drug discovery processes. This technological advancement has the potential to make healthcare more accessible and effective globally.
The transportation sector has also witnessed significant AI integration. Autonomous vehicles use computer vision, sensor fusion, and deep learning to navigate complex environments safely. These systems process vast amounts of real-time data to make split-second decisions, potentially reducing traffic accidents and improving transportation efficiency.
However, with these advancements come important ethical considerations. Issues of privacy, job displacement, and algorithmic bias must be carefully addressed as AI systems become more prevalent in society. It is crucial that we develop AI responsibly, ensuring that these powerful technologies benefit humanity while minimizing potential risks.
The future of AI holds immense promise, but it requires thoughtful implementation and continuous oversight to ensure that its development aligns with human values and societal needs.
"""
endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")
if not endpoint:
print("❌ FULL_TEXT_TTS_ENDPOINT not configured")
print("Please set the environment variable or update your .env file")
return False
print("πŸŽ™οΈ Enhanced Chatterbox TTS API Demo")
print("=" * 50)
print(f"Processing document ({len(sample_document)} characters)...")
try:
# Send request to full-text endpoint
response = requests.post(
endpoint,
json={
"text": sample_document.strip(),
"max_chunk_size": 600, # Smaller chunks for better processing
"silence_duration": 0.6, # Slightly longer pause between chunks
"fade_duration": 0.2, # Smooth transitions
"overlap_sentences": 1 # Overlap for better continuity
},
timeout=180 # Allow time for processing
)
if response.status_code == 200:
# Save the generated audio
Path("demo_output").mkdir(exist_ok=True)
output_file = "demo_output/ai_document_speech.wav"
with open(output_file, "wb") as f:
f.write(response.content)
# Extract processing information from headers
duration = response.headers.get('X-Audio-Duration', 'unknown')
chunks = response.headers.get('X-Chunks-Processed', 'unknown')
characters = response.headers.get('X-Total-Characters', 'unknown')
print("βœ… Success! Audio generated and saved")
print(f"πŸ“ File: {output_file}")
print(f"⏱️ Duration: {duration} seconds")
print(f"🧩 Chunks processed: {chunks}")
print(f"πŸ“ Characters: {characters}")
print(f"πŸ’Ύ File size: {Path(output_file).stat().st_size / 1024:.1f} KB")
return True
else:
print(f"❌ Request failed with status {response.status_code}")
print(f"Response: {response.text}")
return False
except requests.exceptions.Timeout:
print("⏰ Request timed out - the document might be too long")
return False
except Exception as e:
print(f"❌ Error: {e}")
return False
def demo_comparison():
"""Compare standard vs full-text processing"""
short_text = "This is a short text for comparison."
medium_text = """
This is a medium-length text that demonstrates the difference between
standard and full-text processing endpoints. The full-text endpoint
provides better handling for longer content with intelligent chunking
and server-side concatenation.
"""
standard_endpoint = os.getenv("GENERATE_AUDIO_ENDPOINT")
fulltext_endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")
if not (standard_endpoint and fulltext_endpoint):
print("⚠️ Missing endpoint configuration for comparison")
return False
print("\nπŸ” Comparison Demo")
print("=" * 30)
try:
import time
# Test standard endpoint
print("Testing standard endpoint...")
start_time = time.time()
response1 = requests.post(
standard_endpoint,
json={"text": short_text},
timeout=30
)
standard_time = time.time() - start_time
# Test full-text endpoint
print("Testing full-text endpoint...")
start_time = time.time()
response2 = requests.post(
fulltext_endpoint,
json={"text": medium_text.strip(), "max_chunk_size": 400},
timeout=60
)
fulltext_time = time.time() - start_time
print(f"\nπŸ“Š Results:")
print(f"Standard endpoint: {standard_time:.2f}s (short text)")
print(f"Full-text endpoint: {fulltext_time:.2f}s (medium text)")
if response2.status_code == 200:
chunks = response2.headers.get('X-Chunks-Processed', 'unknown')
print(f"Full-text chunks processed: {chunks}")
return True
except Exception as e:
print(f"❌ Comparison error: {e}")
return False
def main():
"""Run the demonstration"""
print("πŸš€ Enhanced Chatterbox TTS API Demonstration")
print("This demo showcases the new full-text processing capabilities")
print()
# Check if .env file exists
if not Path(".env").exists():
print("πŸ“ Creating sample .env file...")
print("Please update it with your actual Modal endpoint URLs")
env_content = """# Enhanced Chatterbox TTS API Endpoints
FULL_TEXT_TTS_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_full_text_audio
GENERATE_AUDIO_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_audio
"""
with open(".env", "w") as f:
f.write(env_content)
print("βœ… Sample .env file created")
return
# Run demonstrations
demo_full_text_processing()
demo_comparison()
print("\nπŸŽ‰ Demo complete!")
print("Check the demo_output/ directory for generated audio files")
if __name__ == "__main__":
main()