Spaces:

TTS-AGI
/

TTS-Arena-V2

Running on CPU Upgrade

App Files Files Community

102

GitHub Actions commited on May 2

Commit

d462da9

1 Parent(s): 3da69a8

Sync from GitHub repo

Browse files

Files changed (3) hide show

.gitignore +3 -1
app.py +247 -22
templates/arena.html +52 -9

.gitignore CHANGED Viewed

@@ -47,4 +47,6 @@ instance/
 Thumbs.db
 # Uploads
-static/temp_audio

 Thumbs.db
 # Uploads
+static/temp_audio
+votes/

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from huggingface_hub import HfApi, hf_hub_download
 from apscheduler.schedulers.background import BackgroundScheduler
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
 year = datetime.now().year
 month = datetime.now().month
@@ -111,9 +112,20 @@ limiter = Limiter(
     storage_uri="memory://",
 )
-# Create temp directory for audio files
 TEMP_AUDIO_DIR = os.path.join(tempfile.gettempdir(), "tts_arena_audio")
 os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
 # Store active TTS sessions
 app.tts_sessions = {}
@@ -275,12 +287,15 @@ def verify_turnstile():
         return redirect(url_for("turnstile_page", redirect_url=redirect_url))
 with open("harvard_sentences.txt", "r") as f:
-    harvard_sentences = f.readlines()
-    random.shuffle(harvard_sentences)
 @app.route("/")
 def arena():
-    return render_template("arena.html", harvard_sentences=json.dumps(harvard_sentences))
 @app.route("/leaderboard")
@@ -357,20 +372,188 @@ def about():
     return render_template("about.html")
 @app.route("/api/tts/generate", methods=["POST"])
-@limiter.limit("10 per minute")
 def generate_tts():
     # If verification not setup, handle it first
     if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
         return jsonify({"error": "Turnstile verification required"}), 403
     data = request.json
-    text = data.get("text")
     if not text or len(text) > 1000:
         return jsonify({"error": "Invalid or too long text"}), 400
-    # Get two random TTS models
     available_models = Model.query.filter_by(
         model_type=ModelType.TTS, is_active=True
     ).all()
@@ -380,25 +563,28 @@ def generate_tts():
     selected_models = random.sample(available_models, 2)
     try:
-        # Generate TTS for both models concurrently
         audio_files = []
         model_ids = []
-        # Function to process a single model
-        def process_model(model):
-            # Call TTS service
-            audio_path = predict_tts(text, model.id)
-            # Copy to temp dir with unique name
-            file_uuid = str(uuid.uuid4())
-            dest_path = os.path.join(TEMP_AUDIO_DIR, f"{file_uuid}.wav")
-            shutil.copy(audio_path, dest_path)
-            return {"model_id": model.id, "audio_path": dest_path}
         # Use ThreadPoolExecutor to process models concurrently
         with ThreadPoolExecutor(max_workers=2) as executor:
-            results = list(executor.map(process_model, selected_models))
         # Extract results
         for result in results:
@@ -410,7 +596,7 @@ def generate_tts():
         app.tts_sessions[session_id] = {
             "model_a": model_ids[0],
             "model_b": model_ids[1],
-            "audio_a": audio_files[0],
             "audio_b": audio_files[1],
             "text": text,
             "created_at": datetime.utcnow(),
@@ -424,13 +610,23 @@ def generate_tts():
                 "session_id": session_id,
                 "audio_a": f"/api/tts/audio/{session_id}/a",
                 "audio_b": f"/api/tts/audio/{session_id}/b",
-                "expires_in": 1800,  # 30 minutes in seconds
             }
         )
     except Exception as e:
-        app.logger.error(f"TTS generation error: {str(e)}")
         return jsonify({"error": "Failed to generate TTS"}), 500
 @app.route("/api/tts/audio/<session_id>/<model_key>")
@@ -865,9 +1061,12 @@ def setup_cleanup():
                 cleanup_conversational_session(sid)
             app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS and {len(expired_conv_sessions)} conversational sessions.")
     # Run cleanup every 15 minutes
-    scheduler = BackgroundScheduler()
     scheduler.add_job(cleanup_expired_sessions, "interval", minutes=15)
     scheduler.start()
     print("Cleanup scheduler started") # Use print for startup messages
@@ -1000,11 +1199,36 @@ def toggle_leaderboard_visibility():
     })
 if __name__ == "__main__":
     with app.app_context():
         # Ensure ./instance and ./votes directories exist
         os.makedirs("instance", exist_ok=True)
         os.makedirs("./votes", exist_ok=True) # Create votes directory if it doesn't exist
         # Download database if it doesn't exist (only on initial space start)
         if IS_SPACES and not os.path.exists(app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "")):
@@ -1025,6 +1249,7 @@ if __name__ == "__main__":
         db.create_all()  # Create tables if they don't exist
         insert_initial_models()
         # Setup background tasks
         setup_cleanup()
         setup_periodic_tasks() # Renamed function call

 from apscheduler.schedulers.background import BackgroundScheduler
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime
+import threading # Added for locking
 year = datetime.now().year
 month = datetime.now().month
     storage_uri="memory://",
 )
+# TTS Cache Configuration - Read from environment
+TTS_CACHE_SIZE = int(os.getenv("TTS_CACHE_SIZE", "10"))
+CACHE_AUDIO_SUBDIR = "cache"
+tts_cache = {} # sentence -> {model_a, model_b, audio_a, audio_b, created_at}
+tts_cache_lock = threading.Lock()
+cache_executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix='CacheReplacer')
+all_harvard_sentences = [] # Keep the full list available
+# Create temp directories
 TEMP_AUDIO_DIR = os.path.join(tempfile.gettempdir(), "tts_arena_audio")
+CACHE_AUDIO_DIR = os.path.join(TEMP_AUDIO_DIR, CACHE_AUDIO_SUBDIR)
 os.makedirs(TEMP_AUDIO_DIR, exist_ok=True)
+os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache subdir exists
 # Store active TTS sessions
 app.tts_sessions = {}
         return redirect(url_for("turnstile_page", redirect_url=redirect_url))
 with open("harvard_sentences.txt", "r") as f:
+    # Store all sentences and clean them up
+    all_harvard_sentences = [line.strip() for line in f.readlines() if line.strip()]
+    # Shuffle for initial random selection if needed, but main list remains ordered
+    initial_sentences = random.sample(all_harvard_sentences, min(len(all_harvard_sentences), 500)) # Limit initial pass for template
 @app.route("/")
 def arena():
+    # Pass a subset of sentences for the random button fallback
+    return render_template("arena.html", harvard_sentences=json.dumps(initial_sentences))
 @app.route("/leaderboard")
     return render_template("about.html")
+# --- TTS Caching Functions ---
+def generate_and_save_tts(text, model_id, output_dir):
+    """Generates TTS and saves it to a specific directory, returning the full path."""
+    temp_audio_path = None # Initialize to None
+    try:
+        app.logger.debug(f"[TTS Gen {model_id}] Starting generation for: '{text[:30]}...'")
+        # If predict_tts saves file itself and returns path:
+        temp_audio_path = predict_tts(text, model_id)
+        app.logger.debug(f"[TTS Gen {model_id}] predict_tts returned: {temp_audio_path}")
+        if not temp_audio_path or not os.path.exists(temp_audio_path):
+            app.logger.warning(f"[TTS Gen {model_id}] predict_tts failed or returned invalid path: {temp_audio_path}")
+            raise ValueError("predict_tts did not return a valid path or file does not exist")
+        file_uuid = str(uuid.uuid4())
+        dest_path = os.path.join(output_dir, f"{file_uuid}.wav")
+        app.logger.debug(f"[TTS Gen {model_id}] Moving {temp_audio_path} to {dest_path}")
+        # Move the file generated by predict_tts to the target cache directory
+        shutil.move(temp_audio_path, dest_path)
+        app.logger.debug(f"[TTS Gen {model_id}] Move successful. Returning {dest_path}")
+        return dest_path
+    except Exception as e:
+        app.logger.error(f"Error generating/saving TTS for model {model_id} and text '{text[:30]}...': {str(e)}")
+        # Ensure temporary file from predict_tts (if any) is cleaned up
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            try:
+                app.logger.debug(f"[TTS Gen {model_id}] Cleaning up temporary file {temp_audio_path} after error.")
+                os.remove(temp_audio_path)
+            except OSError:
+                pass # Ignore error if file couldn't be removed
+        return None
+def _generate_cache_entry_task(sentence):
+    """Task function to generate audio for a sentence and add to cache."""
+    # Wrap the entire task in an application context
+    with app.app_context():
+        if not sentence:
+            # Select a new sentence if not provided (for replacement)
+            with tts_cache_lock:
+                cached_keys = set(tts_cache.keys())
+            available_sentences = [s for s in all_harvard_sentences if s not in cached_keys]
+            if not available_sentences:
+                app.logger.warning("No more unique Harvard sentences available for caching.")
+                return
+            sentence = random.choice(available_sentences)
+        # app.logger.info removed duplicate log
+        print(f"[Cache Task] Querying models for: '{sentence[:50]}...'")
+        available_models = Model.query.filter_by(
+            model_type=ModelType.TTS, is_active=True
+        ).all()
+        if len(available_models) < 2:
+            app.logger.error("Not enough active TTS models to generate cache entry.")
+            return
+        try:
+            models = random.sample(available_models, 2)
+            model_a_id = models[0].id
+            model_b_id = models[1].id
+            # Generate audio concurrently using a local executor for clarity within the task
+            with ThreadPoolExecutor(max_workers=2, thread_name_prefix='AudioGen') as audio_executor:
+                future_a = audio_executor.submit(generate_and_save_tts, sentence, model_a_id, CACHE_AUDIO_DIR)
+                future_b = audio_executor.submit(generate_and_save_tts, sentence, model_b_id, CACHE_AUDIO_DIR)
+                timeout_seconds = 120
+                audio_a_path = future_a.result(timeout=timeout_seconds)
+                audio_b_path = future_b.result(timeout=timeout_seconds)
+            if audio_a_path and audio_b_path:
+                with tts_cache_lock:
+                    # Only add if the sentence isn't already back in the cache
+                    # And ensure cache size doesn't exceed limit
+                    if sentence not in tts_cache and len(tts_cache) < TTS_CACHE_SIZE:
+                        tts_cache[sentence] = {
+                            "model_a": model_a_id,
+                            "model_b": model_b_id,
+                            "audio_a": audio_a_path,
+                            "audio_b": audio_b_path,
+                            "created_at": datetime.utcnow(),
+                        }
+                        app.logger.info(f"Successfully cached entry for: '{sentence[:50]}...'")
+                    elif sentence in tts_cache:
+                         app.logger.warning(f"Sentence '{sentence[:50]}...' already re-cached. Discarding new generation.")
+                         # Clean up the newly generated files if not added
+                         if os.path.exists(audio_a_path): os.remove(audio_a_path)
+                         if os.path.exists(audio_b_path): os.remove(audio_b_path)
+                    else: # Cache is full
+                        app.logger.warning(f"Cache is full ({len(tts_cache)} entries). Discarding new generation for '{sentence[:50]}...'.")
+                        # Clean up the newly generated files if not added
+                        if os.path.exists(audio_a_path): os.remove(audio_a_path)
+                        if os.path.exists(audio_b_path): os.remove(audio_b_path)
+            else:
+                app.logger.error(f"Failed to generate one or both audio files for cache: '{sentence[:50]}...'")
+                # Clean up whichever file might have been created
+                if audio_a_path and os.path.exists(audio_a_path): os.remove(audio_a_path)
+                if audio_b_path and os.path.exists(audio_b_path): os.remove(audio_b_path)
+        except Exception as e:
+            # Log the exception within the app context
+            app.logger.error(f"Exception in _generate_cache_entry_task for '{sentence[:50]}...': {str(e)}", exc_info=True)
+def initialize_tts_cache():
+    print("Initializing TTS cache")
+    """Selects initial sentences and starts generation tasks."""
+    with app.app_context(): # Ensure access to models
+        if not all_harvard_sentences:
+            app.logger.error("Harvard sentences not loaded. Cannot initialize cache.")
+            return
+        initial_selection = random.sample(all_harvard_sentences, min(len(all_harvard_sentences), TTS_CACHE_SIZE))
+        app.logger.info(f"Initializing TTS cache with {len(initial_selection)} sentences...")
+        for sentence in initial_selection:
+            # Use the main cache_executor for initial population too
+            cache_executor.submit(_generate_cache_entry_task, sentence)
+        app.logger.info("Submitted initial cache generation tasks.")
+# --- End TTS Caching Functions ---
 @app.route("/api/tts/generate", methods=["POST"])
+@limiter.limit("10 per minute") # Keep limit, cached responses are still requests
 def generate_tts():
     # If verification not setup, handle it first
     if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
         return jsonify({"error": "Turnstile verification required"}), 403
     data = request.json
+    text = data.get("text", "").strip() # Ensure text is stripped
     if not text or len(text) > 1000:
         return jsonify({"error": "Invalid or too long text"}), 400
+    # --- Cache Check ---
+    cache_hit = False
+    session_data_from_cache = None
+    with tts_cache_lock:
+        if text in tts_cache:
+            cache_hit = True
+            cached_entry = tts_cache.pop(text) # Remove from cache immediately
+            app.logger.info(f"TTS Cache HIT for: '{text[:50]}...'")
+            # Prepare session data using cached info
+            session_id = str(uuid.uuid4())
+            session_data_from_cache = {
+                "model_a": cached_entry["model_a"],
+                "model_b": cached_entry["model_b"],
+                "audio_a": cached_entry["audio_a"], # Paths are now from cache_dir
+                "audio_b": cached_entry["audio_b"],
+                "text": text,
+                "created_at": datetime.utcnow(),
+                "expires_at": datetime.utcnow() + timedelta(minutes=30),
+                "voted": False,
+            }
+            app.tts_sessions[session_id] = session_data_from_cache
+            # Trigger background task to replace the used cache entry
+            cache_executor.submit(_generate_cache_entry_task, None) # Pass None to signal replacement
+    if cache_hit and session_data_from_cache:
+        # Return response using cached data
+        # Note: The files are now managed by the session lifecycle (cleanup_session)
+        return jsonify(
+            {
+                "session_id": session_id,
+                "audio_a": f"/api/tts/audio/{session_id}/a",
+                "audio_b": f"/api/tts/audio/{session_id}/b",
+                "expires_in": 1800,  # 30 minutes in seconds
+                "cache_hit": True,
+            }
+        )
+    # --- End Cache Check ---
+    # --- Cache Miss: Generate on the fly ---
+    app.logger.info(f"TTS Cache MISS for: '{text[:50]}...'. Generating on the fly.")
     available_models = Model.query.filter_by(
         model_type=ModelType.TTS, is_active=True
     ).all()
     selected_models = random.sample(available_models, 2)
     try:
         audio_files = []
         model_ids = []
+        # Function to process a single model (generate directly to TEMP_AUDIO_DIR, not cache subdir)
+        def process_model_on_the_fly(model):
+             # Generate and save directly to the main temp dir
+             # Assume predict_tts handles saving temporary files
+             temp_audio_path = predict_tts(text, model.id)
+             if not temp_audio_path or not os.path.exists(temp_audio_path):
+                 raise ValueError(f"predict_tts failed for model {model.id}")
+             # Create a unique name in the main TEMP_AUDIO_DIR for the session
+             file_uuid = str(uuid.uuid4())
+             dest_path = os.path.join(TEMP_AUDIO_DIR, f"{file_uuid}.wav")
+             shutil.move(temp_audio_path, dest_path) # Move from predict_tts's temp location
+             return {"model_id": model.id, "audio_path": dest_path}
         # Use ThreadPoolExecutor to process models concurrently
         with ThreadPoolExecutor(max_workers=2) as executor:
+            results = list(executor.map(process_model_on_the_fly, selected_models))
         # Extract results
         for result in results:
         app.tts_sessions[session_id] = {
             "model_a": model_ids[0],
             "model_b": model_ids[1],
+            "audio_a": audio_files[0], # Paths are now from TEMP_AUDIO_DIR directly
             "audio_b": audio_files[1],
             "text": text,
             "created_at": datetime.utcnow(),
                 "session_id": session_id,
                 "audio_a": f"/api/tts/audio/{session_id}/a",
                 "audio_b": f"/api/tts/audio/{session_id}/b",
+                "expires_in": 1800,
+                "cache_hit": False,
             }
         )
     except Exception as e:
+        app.logger.error(f"TTS on-the-fly generation error: {str(e)}", exc_info=True)
+        # Cleanup any files potentially created during the failed attempt
+        if 'results' in locals():
+             for res in results:
+                 if 'audio_path' in res and os.path.exists(res['audio_path']):
+                     try:
+                         os.remove(res['audio_path'])
+                     except OSError:
+                         pass
         return jsonify({"error": "Failed to generate TTS"}), 500
+    # --- End Cache Miss ---
 @app.route("/api/tts/audio/<session_id>/<model_key>")
                 cleanup_conversational_session(sid)
             app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS and {len(expired_conv_sessions)} conversational sessions.")
+    # Also cleanup potentially expired cache entries (e.g., > 1 hour old)
+    # This prevents stale cache entries if generation is slow or failing
+    # cleanup_stale_cache_entries()
     # Run cleanup every 15 minutes
+    scheduler = BackgroundScheduler(daemon=True) # Run scheduler as daemon thread
     scheduler.add_job(cleanup_expired_sessions, "interval", minutes=15)
     scheduler.start()
     print("Cleanup scheduler started") # Use print for startup messages
     })
+@app.route("/api/tts/cached-sentences")
+def get_cached_sentences():
+    """Returns a list of sentences currently available in the TTS cache."""
+    with tts_cache_lock:
+        cached_keys = list(tts_cache.keys())
+    return jsonify(cached_keys)
 if __name__ == "__main__":
     with app.app_context():
         # Ensure ./instance and ./votes directories exist
         os.makedirs("instance", exist_ok=True)
         os.makedirs("./votes", exist_ok=True) # Create votes directory if it doesn't exist
+        os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache audio dir exists
+        # Clean up old cache audio files on startup
+        try:
+            app.logger.info(f"Clearing old cache audio files from {CACHE_AUDIO_DIR}")
+            for filename in os.listdir(CACHE_AUDIO_DIR):
+                file_path = os.path.join(CACHE_AUDIO_DIR, filename)
+                try:
+                    if os.path.isfile(file_path) or os.path.islink(file_path):
+                        os.unlink(file_path)
+                    elif os.path.isdir(file_path):
+                        shutil.rmtree(file_path)
+                except Exception as e:
+                    app.logger.error(f'Failed to delete {file_path}. Reason: {e}')
+        except Exception as e:
+             app.logger.error(f"Error clearing cache directory {CACHE_AUDIO_DIR}: {e}")
         # Download database if it doesn't exist (only on initial space start)
         if IS_SPACES and not os.path.exists(app.config["SQLALCHEMY_DATABASE_URI"].replace("sqlite:///", "")):
         db.create_all()  # Create tables if they don't exist
         insert_initial_models()
         # Setup background tasks
+        initialize_tts_cache() # Start populating the cache
         setup_cleanup()
         setup_periodic_tasks() # Renamed function call

templates/arena.html CHANGED Viewed

@@ -188,6 +188,10 @@
         </div>
     </div>
 </div>
 {% endblock %}
 {% block extra_head %}
@@ -915,10 +919,6 @@
             border-color: var(--border-color);
         }
-        .random-script-btn:hover {
-            background-color: rgba(255, 255, 255, 0.1);
-        }
         .line-input {
             background-color: var(--light-gray);
             color: var(--text-color);
@@ -1015,6 +1015,7 @@
         let currentSessionId = null;
         let modelNames = { a: '', b: '' };
         let wavePlayers = { a: null, b: null };
         // Initialize WavePlayers with mobile settings
         wavePlayerContainers.forEach(container => {
@@ -1026,8 +1027,33 @@
             });
         });
-        // Random text options
-        const randomTexts = {{ harvard_sentences|safe }};
         // Check URL hash for direct tab access
         function checkHashAndSetTab() {
@@ -1284,9 +1310,23 @@
         }
         function handleRandom() {
-            // Select a random text from the array
-            const randomText = randomTexts[Math.floor(Math.random() * randomTexts.length)];
-            textInput.value = randomText;
             textInput.focus();
         }
@@ -1366,6 +1406,9 @@
         // Add event listener for next round button
         nextRoundBtn.addEventListener('click', resetToInitialState);
     });
 </script>

         </div>
     </div>
 </div>
+<!-- Hidden element to store fallback sentences data -->
+<div id="fallback-sentences-data" data-sentences="{{ harvard_sentences | tojson | safe }}" style="display: none;"></div>
 {% endblock %}
 {% block extra_head %}
             border-color: var(--border-color);
         }
         .line-input {
             background-color: var(--light-gray);
             color: var(--text-color);
         let currentSessionId = null;
         let modelNames = { a: '', b: '' };
         let wavePlayers = { a: null, b: null };
+        let cachedSentences = []; // To store sentences available in cache
         // Initialize WavePlayers with mobile settings
         wavePlayerContainers.forEach(container => {
             });
         });
+        // Fallback random text options (if cache is unavailable)
+        let fallbackRandomTexts = [];
+        try {
+            const dataElement = document.getElementById('fallback-sentences-data');
+            if (dataElement && dataElement.dataset.sentences) {
+                fallbackRandomTexts = JSON.parse(dataElement.dataset.sentences);
+            } else {
+                 console.error("Fallback sentences data element not found or empty.");
+            }
+        } catch (e) {
+            console.error("Error parsing fallback sentences from data attribute:", e);
+            // fallbackRandomTexts remains an empty array if parsing fails
+        }
+        // Fetch cached sentences on load
+        function fetchCachedSentences() {
+            fetch('/api/tts/cached-sentences')
+                .then(response => response.ok ? response.json() : Promise.reject('Failed to fetch cached sentences'))
+                .then(data => {
+                    cachedSentences = data;
+                    console.log(`Fetched ${cachedSentences.length} cached sentences.`);
+                })
+                .catch(error => {
+                    console.error('Error fetching cached sentences:', error);
+                    // Keep cachedSentences as empty array, fallback will be used
+                });
+        }
         // Check URL hash for direct tab access
         function checkHashAndSetTab() {
         }
         function handleRandom() {
+            let selectedText = '';
+            if (cachedSentences && cachedSentences.length > 0) {
+                // Select a random text from the cache
+                selectedText = cachedSentences[Math.floor(Math.random() * cachedSentences.length)];
+                console.log("Using random sentence from cache.");
+            } else {
+                // Fallback to the initial list if cache is empty or failed to load
+                console.log("Cache empty or unavailable, using random sentence from fallback list.");
+                if (fallbackRandomTexts && fallbackRandomTexts.length > 0) {
+                    selectedText = fallbackRandomTexts[Math.floor(Math.random() * fallbackRandomTexts.length)];
+                } else {
+                    // Absolute fallback if both cache and initial list fail
+                    openToast("No random sentences available.", "warning");
+                    return;
+                }
+            }
+            textInput.value = selectedText;
             textInput.focus();
         }
         // Add event listener for next round button
         nextRoundBtn.addEventListener('click', resetToInitialState);
+        // Fetch cached sentences when the DOM is ready
+        fetchCachedSentences();
     });
 </script>