Spaces:

Agents-MCP-Hackathon
/

pdf_explainer

Sleeping

App Files Files Community

pdf_explainer / api /demo.py

spagestic

api updated

d1c4aa1 about 1 month ago

raw

history blame

7.35 kB

	#!/usr/bin/env python3
	"""
	Quick demonstration script for the Enhanced Chatterbox TTS API
	Shows how to use the new full-text endpoints for processing long documents
	"""

	import requests
	import os
	from pathlib import Path
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	def demo_full_text_processing():
	"""Demonstrate full-text processing with a sample document"""

	# Sample long text (like from a PDF)
	sample_document = """
	Artificial Intelligence has revolutionized numerous industries and continues to shape our world in unprecedented ways. From healthcare to transportation, AI systems are becoming increasingly sophisticated and capable of performing complex tasks that were once thought to be exclusively human domains.

	In healthcare, AI-powered diagnostic systems can now identify diseases with remarkable accuracy, sometimes surpassing human doctors in specific areas. Machine learning algorithms analyze medical images, predict patient outcomes, and assist in drug discovery processes. This technological advancement has the potential to make healthcare more accessible and effective globally.

	The transportation sector has also witnessed significant AI integration. Autonomous vehicles use computer vision, sensor fusion, and deep learning to navigate complex environments safely. These systems process vast amounts of real-time data to make split-second decisions, potentially reducing traffic accidents and improving transportation efficiency.

	However, with these advancements come important ethical considerations. Issues of privacy, job displacement, and algorithmic bias must be carefully addressed as AI systems become more prevalent in society. It is crucial that we develop AI responsibly, ensuring that these powerful technologies benefit humanity while minimizing potential risks.

	The future of AI holds immense promise, but it requires thoughtful implementation and continuous oversight to ensure that its development aligns with human values and societal needs.
	"""

	endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")
	if not endpoint:
	print("❌ FULL_TEXT_TTS_ENDPOINT not configured")
	print("Please set the environment variable or update your .env file")
	return False

	print("🎙️ Enhanced Chatterbox TTS API Demo")
	print("=" * 50)
	print(f"Processing document ({len(sample_document)} characters)...")

	try:
	# Send request to full-text endpoint
	response = requests.post(
	endpoint,
	json={
	"text": sample_document.strip(),
	"max_chunk_size": 600, # Smaller chunks for better processing
	"silence_duration": 0.6, # Slightly longer pause between chunks
	"fade_duration": 0.2, # Smooth transitions
	"overlap_sentences": 1 # Overlap for better continuity
	},
	timeout=180 # Allow time for processing
	)

	if response.status_code == 200:
	# Save the generated audio
	Path("demo_output").mkdir(exist_ok=True)
	output_file = "demo_output/ai_document_speech.wav"

	with open(output_file, "wb") as f:
	f.write(response.content)

	# Extract processing information from headers
	duration = response.headers.get('X-Audio-Duration', 'unknown')
	chunks = response.headers.get('X-Chunks-Processed', 'unknown')
	characters = response.headers.get('X-Total-Characters', 'unknown')

	print("✅ Success! Audio generated and saved")
	print(f"📁 File: {output_file}")
	print(f"⏱️ Duration: {duration} seconds")
	print(f"🧩 Chunks processed: {chunks}")
	print(f"📝 Characters: {characters}")
	print(f"💾 File size: {Path(output_file).stat().st_size / 1024:.1f} KB")

	return True
	else:
	print(f"❌ Request failed with status {response.status_code}")
	print(f"Response: {response.text}")
	return False

	except requests.exceptions.Timeout:
	print("⏰ Request timed out - the document might be too long")
	return False
	except Exception as e:
	print(f"❌ Error: {e}")
	return False


	def demo_comparison():
	"""Compare standard vs full-text processing"""

	short_text = "This is a short text for comparison."
	medium_text = """
	This is a medium-length text that demonstrates the difference between
	standard and full-text processing endpoints. The full-text endpoint
	provides better handling for longer content with intelligent chunking
	and server-side concatenation.
	"""

	standard_endpoint = os.getenv("GENERATE_AUDIO_ENDPOINT")
	fulltext_endpoint = os.getenv("FULL_TEXT_TTS_ENDPOINT")

	if not (standard_endpoint and fulltext_endpoint):
	print("⚠️ Missing endpoint configuration for comparison")
	return False

	print("\n🔍 Comparison Demo")
	print("=" * 30)

	try:
	import time

	# Test standard endpoint
	print("Testing standard endpoint...")
	start_time = time.time()
	response1 = requests.post(
	standard_endpoint,
	json={"text": short_text},
	timeout=30
	)
	standard_time = time.time() - start_time

	# Test full-text endpoint
	print("Testing full-text endpoint...")
	start_time = time.time()
	response2 = requests.post(
	fulltext_endpoint,
	json={"text": medium_text.strip(), "max_chunk_size": 400},
	timeout=60
	)
	fulltext_time = time.time() - start_time

	print(f"\n📊 Results:")
	print(f"Standard endpoint: {standard_time:.2f}s (short text)")
	print(f"Full-text endpoint: {fulltext_time:.2f}s (medium text)")

	if response2.status_code == 200:
	chunks = response2.headers.get('X-Chunks-Processed', 'unknown')
	print(f"Full-text chunks processed: {chunks}")

	return True

	except Exception as e:
	print(f"❌ Comparison error: {e}")
	return False


	def main():
	"""Run the demonstration"""
	print("🚀 Enhanced Chatterbox TTS API Demonstration")
	print("This demo showcases the new full-text processing capabilities")
	print()

	# Check if .env file exists
	if not Path(".env").exists():
	print("📝 Creating sample .env file...")
	print("Please update it with your actual Modal endpoint URLs")

	env_content = """# Enhanced Chatterbox TTS API Endpoints
	FULL_TEXT_TTS_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_full_text_audio
	GENERATE_AUDIO_ENDPOINT=https://YOUR-MODAL-ENDPOINT.modal.run/generate_audio
	"""
	with open(".env", "w") as f:
	f.write(env_content)
	print("✅ Sample .env file created")
	return

	# Run demonstrations
	demo_full_text_processing()
	demo_comparison()

	print("\n🎉 Demo complete!")
	print("Check the demo_output/ directory for generated audio files")


	if __name__ == "__main__":
	main()