Spaces:

Agents-MCP-Hackathon
/

pdf_explainer

Sleeping

App Files Files Community

spagestic commited on Jun 9

Commit

91e586d

1 Parent(s): 94d063a

feat: Update audio processing to support parallel chunking and enhance text chunking logic

Browse files

Files changed (7) hide show

requirements.txt +2 -1
src/processors/audio_concatenator.py +194 -0
src/processors/audio_processor.py +157 -3
src/processors/parallel_processor.py +170 -0
src/processors/pdf_processor.py +14 -14
src/processors/text_chunker.py +173 -0
src/ui_components/interface.py +2 -2

requirements.txt CHANGED Viewed

@@ -36,7 +36,7 @@ pydantic_core==2.33.2
 pydub==0.25.1
 Pygments==2.19.1
 python-dateutil==2.9.0.post0
-python-dotenv==1.1.0
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
@@ -45,6 +45,7 @@ requests==2.32.3
 rich==14.0.0
 ruff==0.11.13
 safehttpx==0.1.6
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0

 pydub==0.25.1
 Pygments==2.19.1
 python-dateutil==2.9.0.post0
+python-dotenv==1.1.1
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.2
 rich==14.0.0
 ruff==0.11.13
 safehttpx==0.1.6
+scipy==1.15.3
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0

src/processors/audio_concatenator.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""Audio concatenation utility for combining multiple audio chunks into a single audio file."""
+import numpy as np
+from typing import List, Tuple, Optional
+import gradio as gr
+class AudioConcatenator:
+    """Handles concatenation of multiple audio chunks."""
+    def __init__(self, silence_duration: float = 0.5, fade_duration: float = 0.1):
+        """
+        Initialize the audio concatenator.
+        Args:
+            silence_duration: Duration of silence between chunks (seconds)
+            fade_duration: Duration of fade in/out effects (seconds)
+        """
+        self.silence_duration = silence_duration
+        self.fade_duration = fade_duration
+    def concatenate_audio_chunks(
+        self,
+        audio_chunks: List[Tuple[int, np.ndarray]],
+        progress_callback: Optional[callable] = None
+    ) -> Tuple[int, np.ndarray]:
+        """
+        Concatenate multiple audio chunks into a single audio file.
+        Args:
+            audio_chunks: List of (sample_rate, audio_data) tuples
+            progress_callback: Optional callback for progress updates
+        Returns:
+            Tuple of (sample_rate, concatenated_audio_data)
+        """
+        if not audio_chunks:
+            raise gr.Error("No audio chunks to concatenate")
+        if len(audio_chunks) == 1:
+            return audio_chunks[0]
+        if progress_callback:
+            progress_callback(0.1, desc="Preparing audio concatenation...")
+        # Verify all chunks have the same sample rate
+        sample_rates = [chunk[0] for chunk in audio_chunks]
+        if len(set(sample_rates)) > 1:
+            raise gr.Error(f"Inconsistent sample rates found: {set(sample_rates)}. All chunks must have the same sample rate.")
+        sample_rate = sample_rates[0]
+        if progress_callback:
+            progress_callback(0.2, desc="Normalizing audio chunks...")
+        # Normalize and prepare audio data
+        normalized_chunks = []
+        for i, (_, audio_data) in enumerate(audio_chunks):
+            # Ensure audio data is in the correct format
+            if audio_data.ndim == 1:
+                normalized_audio = audio_data
+            elif audio_data.ndim == 2:
+                # Convert stereo to mono by averaging channels
+                normalized_audio = np.mean(audio_data, axis=1)
+            else:
+                raise gr.Error(f"Unsupported audio format in chunk {i + 1}: {audio_data.shape}")
+            # Normalize audio levels
+            normalized_audio = self._normalize_audio(normalized_audio)
+            # Apply fade effects
+            normalized_audio = self._apply_fade_effects(normalized_audio, sample_rate)
+            normalized_chunks.append(normalized_audio)
+            if progress_callback:
+                progress = 0.2 + (0.5 * (i + 1) / len(audio_chunks))
+                progress_callback(progress, desc=f"Processed chunk {i + 1}/{len(audio_chunks)}")
+        if progress_callback:
+            progress_callback(0.7, desc="Creating silence segments...")
+        # Create silence segments
+        silence_samples = int(self.silence_duration * sample_rate)
+        silence = np.zeros(silence_samples, dtype=np.float32)
+        if progress_callback:
+            progress_callback(0.8, desc="Concatenating audio segments...")
+        # Concatenate all chunks with silence in between
+        concatenated_segments = []
+        for i, chunk in enumerate(normalized_chunks):
+            concatenated_segments.append(chunk)
+            # Add silence between chunks (but not after the last chunk)
+            if i < len(normalized_chunks) - 1:
+                concatenated_segments.append(silence)
+            if progress_callback:
+                progress = 0.8 + (0.15 * (i + 1) / len(normalized_chunks))
+                progress_callback(progress, desc=f"Concatenated {i + 1}/{len(normalized_chunks)} chunks")
+        # Combine all segments
+        final_audio = np.concatenate(concatenated_segments)
+        if progress_callback:
+            progress_callback(0.95, desc="Finalizing audio...")
+        # Final normalization and cleanup
+        final_audio = self._normalize_audio(final_audio)
+        final_audio = self._remove_clicks_and_pops(final_audio)
+        if progress_callback:
+            progress_callback(1.0, desc="Audio concatenation complete!")
+        return sample_rate, final_audio
+    def _normalize_audio(self, audio_data: np.ndarray) -> np.ndarray:
+        """Normalize audio to prevent clipping."""
+        # Find the maximum absolute value
+        max_val = np.max(np.abs(audio_data))
+        if max_val == 0:
+            return audio_data
+        # Normalize to 95% of maximum to leave some headroom
+        normalized = audio_data * (0.95 / max_val)
+        return normalized.astype(np.float32)
+    def _apply_fade_effects(self, audio_data: np.ndarray, sample_rate: int) -> np.ndarray:
+        """Apply fade in and fade out effects to reduce pops and clicks."""
+        fade_samples = int(self.fade_duration * sample_rate)
+        if len(audio_data) < 2 * fade_samples:
+            # If audio is too short for fade effects, return as-is
+            return audio_data
+        audio_with_fades = audio_data.copy()
+          # Apply fade in
+        fade_in = np.linspace(0, 1, fade_samples)
+        audio_with_fades[:fade_samples] *= fade_in
+        # Apply fade out
+        fade_out = np.linspace(1, 0, fade_samples)
+        audio_with_fades[-fade_samples:] *= fade_out
+        return audio_with_fades
+    def _remove_clicks_and_pops(self, audio_data: np.ndarray) -> np.ndarray:
+        """Apply basic filtering to remove clicks and pops."""
+        try:
+            # Simple high-pass filter to remove DC offset and low-frequency artifacts
+            from scipy import signal
+            # Design a high-pass filter (removes frequencies below 80 Hz)
+            # This helps remove some pops and clicks while preserving speech
+            sos = signal.butter(2, 80, btype='highpass', fs=22050, output='sos')
+            filtered_audio = signal.sosfilt(sos, audio_data)
+            return filtered_audio.astype(np.float32)
+        except ImportError:
+            # If scipy is not available, return audio as-is
+            return audio_data.astype(np.float32)
+    def get_concatenation_info(self, audio_chunks: List[Tuple[int, np.ndarray]]) -> dict:
+        """Get information about the concatenation process."""
+        if not audio_chunks:
+            return {}
+        total_duration = 0
+        total_silence_duration = 0
+        chunk_durations = []
+        sample_rate = audio_chunks[0][0]
+        for _, audio_data in audio_chunks:
+            duration = len(audio_data) / sample_rate
+            chunk_durations.append(duration)
+            total_duration += duration
+        # Add silence duration (between chunks)
+        if len(audio_chunks) > 1:
+            total_silence_duration = (len(audio_chunks) - 1) * self.silence_duration
+            total_duration += total_silence_duration
+        return {
+            "num_chunks": len(audio_chunks),
+            "total_duration": total_duration,
+            "total_silence_duration": total_silence_duration,
+            "chunk_durations": chunk_durations,
+            "average_chunk_duration": np.mean(chunk_durations),
+            "sample_rate": sample_rate
+        }

src/processors/audio_processor.py CHANGED Viewed

@@ -1,17 +1,171 @@
 """Audio generation functionality."""
 import gradio as gr
 class AudioProcessor:
-    """Handles audio generation operations."""
-    def generate_audio(self, explanation_text):
-        """Generate TTS audio for explanations."""
         if not explanation_text or explanation_text.strip() == "":
             raise gr.Error("No explanations available to convert to audio. Please generate explanations first.")
         try:
             from .generate_tts_audio import generate_tts_audio
             clean_text = explanation_text.strip()
             audio_result = generate_tts_audio(clean_text, None)
             return audio_result, gr.update(visible=True)
         except Exception as e:
             raise gr.Error(f"Error generating audio: {str(e)}")

 """Audio generation functionality."""
 import gradio as gr
+from typing import Tuple, Optional
+import numpy as np
+from .text_chunker import TextChunker
+from .parallel_processor import ParallelAudioProcessor
+from .audio_concatenator import AudioConcatenator
 class AudioProcessor:
+    """Handles audio generation operations with parallel processing and chunking."""
+    def __init__(self,
+                 max_chunk_size: int = 800,
+                 max_workers: int = 4,
+                 silence_duration: float = 0.5,
+                 enable_parallel: bool = True):
+        """
+        Initialize the audio processor.
+        Args:
+            max_chunk_size: Maximum characters per chunk
+            max_workers: Maximum parallel workers
+            silence_duration: Silence between chunks (seconds)
+            enable_parallel: Whether to use parallel processing
+        """
+        self.text_chunker = TextChunker(max_chunk_size=max_chunk_size)
+        self.parallel_processor = ParallelAudioProcessor(max_workers=max_workers)
+        self.audio_concatenator = AudioConcatenator(silence_duration=silence_duration)
+        self.enable_parallel = enable_parallel
+    def generate_audio(self, explanation_text: str, progress=None) -> Tuple[Tuple[int, np.ndarray], dict]:
+        """
+        Generate TTS audio for explanations with chunking and parallel processing.
+        Args:
+            explanation_text: The text to convert to audio
+            progress: Optional progress callback
+        Returns:
+            Tuple of (audio_result, update_dict) where audio_result is (sample_rate, audio_data)
+        """
+        if not explanation_text or explanation_text.strip() == "":
+            raise gr.Error("No explanations available to convert to audio. Please generate explanations first.")
+        try:
+            clean_text = explanation_text.strip()
+            if progress:
+                progress(0.05, desc="Analyzing text for chunking...")
+            # Step 1: Chunk the text
+            text_chunks = self.text_chunker.chunk_text(clean_text)
+            chunk_info = self.text_chunker.get_chunk_info(text_chunks)
+            if progress:
+                progress(0.1, desc=f"Split text into {len(text_chunks)} chunks")
+            # If only one chunk and it's small enough, use simple processing
+            if len(text_chunks) == 1 and len(text_chunks[0]) <= 1000:
+                if progress:
+                    progress(0.2, desc="Processing single chunk...")
+                from .generate_tts_audio import generate_tts_audio
+                audio_result = generate_tts_audio(text_chunks[0], None, progress=progress)
+                if progress:
+                    progress(1.0, desc="Audio generation complete!")
+                return audio_result, gr.update(visible=True)
+            # Step 2: Process chunks in parallel (or sequentially if disabled)
+            if self.enable_parallel and len(text_chunks) > 1:
+                if progress:
+                    progress(0.15, desc="Starting parallel audio processing...")
+                # Import the audio generation function
+                from .generate_tts_audio import generate_tts_audio
+                # Process chunks in parallel
+                def progress_wrapper(p, desc=""):
+                    if progress:
+                        # Map parallel progress to 15-80% of total progress
+                        mapped_progress = 0.15 + (p * 0.65)
+                        progress(mapped_progress, desc)
+                audio_chunks = self.parallel_processor.process_chunks_parallel(
+                    text_chunks,
+                    generate_tts_audio,
+                    progress_callback=progress_wrapper
+                )
+            else:
+                # Sequential processing for single chunk or when parallel is disabled
+                if progress:
+                    progress(0.15, desc="Processing chunks sequentially...")
+                from .generate_tts_audio import generate_tts_audio
+                audio_chunks = []
+                for i, chunk in enumerate(text_chunks):
+                    if progress:
+                        chunk_progress = 0.15 + (0.65 * i / len(text_chunks))
+                        progress(chunk_progress, desc=f"Processing chunk {i + 1}/{len(text_chunks)}")
+                    audio_result = generate_tts_audio(chunk, None)
+                    audio_chunks.append(audio_result)
+            # Step 3: Concatenate audio chunks
+            if progress:
+                progress(0.8, desc="Concatenating audio chunks...")
+            def concat_progress_wrapper(p, desc=""):
+                if progress:
+                    # Map concatenation progress to 80-100% of total progress
+                    mapped_progress = 0.8 + (p * 0.2)
+                    progress(mapped_progress, desc)
+            final_audio = self.audio_concatenator.concatenate_audio_chunks(
+                audio_chunks,
+                progress_callback=concat_progress_wrapper
+            )
+            if progress:
+                progress(1.0, desc=f"Generated audio from {len(text_chunks)} chunks!")
+            return final_audio, gr.update(visible=True)
+        except Exception as e:
+            raise gr.Error(f"Error generating audio: {str(e)}")
+    def generate_audio_legacy(self, explanation_text: str) -> Tuple[Tuple[int, np.ndarray], dict]:
+        """
+        Legacy audio generation method (for backward compatibility).
+        """
         if not explanation_text or explanation_text.strip() == "":
             raise gr.Error("No explanations available to convert to audio. Please generate explanations first.")
         try:
             from .generate_tts_audio import generate_tts_audio
             clean_text = explanation_text.strip()
+            # Use the original truncation logic for legacy mode
+            if len(clean_text) > 1000:
+                sentences = clean_text[:950].split('.')
+                if len(sentences) > 1:
+                    clean_text = '.'.join(sentences[:-1]) + '.'
+                else:
+                    clean_text = clean_text[:950]
+                clean_text += " [Text has been truncated for audio generation]"
             audio_result = generate_tts_audio(clean_text, None)
             return audio_result, gr.update(visible=True)
         except Exception as e:
             raise gr.Error(f"Error generating audio: {str(e)}")
+    def get_processing_info(self, text: str) -> dict:
+        """Get information about how the text would be processed."""
+        if not text or not text.strip():
+            return {"error": "No text provided"}
+        chunks = self.text_chunker.chunk_text(text.strip())
+        chunk_info = self.text_chunker.get_chunk_info(chunks)
+        estimated_time = self.parallel_processor.estimate_processing_time(chunks)
+        return {
+            "processing_mode": "parallel" if self.enable_parallel and len(chunks) > 1 else "sequential",
+            "chunk_info": chunk_info,
+            "estimated_time_seconds": estimated_time,
+            "estimated_time_readable": f"{estimated_time:.1f} seconds" if estimated_time < 60 else f"{estimated_time/60:.1f} minutes"
+        }

src/processors/parallel_processor.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""Parallel audio processing for generating multiple audio chunks concurrently."""
+import asyncio
+import concurrent.futures
+from typing import List, Tuple, Optional, Callable
+import numpy as np
+import gradio as gr
+class ParallelAudioProcessor:
+    """Handles parallel processing of multiple audio chunks."""
+    def __init__(self, max_workers: int = 4):
+        """
+        Initialize the parallel processor.
+        Args:
+            max_workers: Maximum number of concurrent workers for audio generation
+        """
+        self.max_workers = max_workers
+    def process_chunks_parallel(
+        self,
+        text_chunks: List[str],
+        audio_generator_func: Callable,
+        progress_callback: Optional[Callable] = None
+    ) -> List[Tuple[int, np.ndarray]]:
+        """
+        Process multiple text chunks in parallel to generate audio.
+        Args:
+            text_chunks: List of text chunks to process
+            audio_generator_func: Function to generate audio from text
+            progress_callback: Optional callback for progress updates
+        Returns:
+            List of tuples containing (sample_rate, audio_data) for each chunk
+        """
+        if not text_chunks:
+            return []
+        total_chunks = len(text_chunks)
+        completed_chunks = 0
+        results = [None] * total_chunks
+        def update_progress(chunk_index: int, desc: str = ""):
+            nonlocal completed_chunks
+            if progress_callback:
+                progress = completed_chunks / total_chunks
+                progress_callback(progress, desc=f"Processing chunk {completed_chunks + 1}/{total_chunks}{': ' + desc if desc else ''}")
+        def process_single_chunk(chunk_index: int, text_chunk: str) -> Tuple[int, Tuple[int, np.ndarray]]:
+            """Process a single chunk and return the result with its index."""
+            try:
+                # Create a local progress callback for this chunk
+                def chunk_progress(progress: float, desc: str = ""):
+                    update_progress(chunk_index, f"Chunk {chunk_index + 1}: {desc}")
+                # Generate audio for this chunk
+                audio_result = audio_generator_func(text_chunk, None, progress=chunk_progress)
+                return chunk_index, audio_result
+            except Exception as e:
+                raise Exception(f"Error processing chunk {chunk_index + 1}: {str(e)}")
+        # Use ThreadPoolExecutor for parallel processing
+        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            # Submit all chunks for processing
+            future_to_index = {
+                executor.submit(process_single_chunk, i, chunk): i
+                for i, chunk in enumerate(text_chunks)
+            }
+            # Collect results as they complete
+            for future in concurrent.futures.as_completed(future_to_index):
+                chunk_index = future_to_index[future]
+                try:
+                    index, audio_result = future.result()
+                    results[index] = audio_result
+                    completed_chunks += 1
+                    if progress_callback:
+                        progress = completed_chunks / total_chunks
+                        progress_callback(
+                            progress,
+                            desc=f"Completed {completed_chunks}/{total_chunks} audio chunks"
+                        )
+                except Exception as e:
+                    raise gr.Error(f"Failed to process chunk {chunk_index + 1}: {str(e)}")
+        # Filter out any None results (shouldn't happen, but just in case)
+        valid_results = [result for result in results if result is not None]
+        if len(valid_results) != total_chunks:
+            raise gr.Error(f"Only {len(valid_results)} out of {total_chunks} chunks processed successfully")
+        return valid_results
+    async def process_chunks_async(
+        self,
+        text_chunks: List[str],
+        audio_generator_func: Callable,
+        progress_callback: Optional[Callable] = None
+    ) -> List[Tuple[int, np.ndarray]]:
+        """
+        Async version of parallel chunk processing.
+        Args:
+            text_chunks: List of text chunks to process
+            audio_generator_func: Function to generate audio from text
+            progress_callback: Optional callback for progress updates
+        Returns:
+            List of tuples containing (sample_rate, audio_data) for each chunk
+        """
+        if not text_chunks:
+            return []
+        async def process_chunk_async(chunk_index: int, text_chunk: str):
+            """Process a single chunk asynchronously."""
+            loop = asyncio.get_event_loop()
+            def chunk_progress(progress: float, desc: str = ""):
+                if progress_callback:
+                    progress_callback(
+                        (chunk_index + progress) / len(text_chunks),
+                        desc=f"Chunk {chunk_index + 1}: {desc}"
+                    )
+            # Run the audio generation in a thread pool
+            audio_result = await loop.run_in_executor(
+                None,
+                lambda: audio_generator_func(text_chunk, None, progress=chunk_progress)
+            )
+            return chunk_index, audio_result
+        # Create tasks for all chunks
+        tasks = [
+            process_chunk_async(i, chunk)
+            for i, chunk in enumerate(text_chunks)
+        ]
+        # Process all chunks concurrently
+        try:
+            results = await asyncio.gather(*tasks)
+            # Sort results by chunk index to maintain order
+            results.sort(key=lambda x: x[0])
+            return [result[1] for result in results]
+        except Exception as e:
+            raise gr.Error(f"Error in async processing: {str(e)}")
+    def estimate_processing_time(self, text_chunks: List[str], avg_time_per_char: float = 0.1) -> float:
+        """
+        Estimate total processing time for all chunks.
+        Args:
+            text_chunks: List of text chunks
+            avg_time_per_char: Average processing time per character (seconds)
+        Returns:
+            Estimated processing time in seconds
+        """
+        total_chars = sum(len(chunk) for chunk in text_chunks)
+        sequential_time = total_chars * avg_time_per_char
+        # Account for parallelization
+        parallel_efficiency = min(len(text_chunks), self.max_workers) / len(text_chunks) if text_chunks else 1
+        estimated_time = sequential_time * parallel_efficiency
+        return estimated_time

src/processors/pdf_processor.py CHANGED Viewed

@@ -35,24 +35,24 @@ class PDFProcessor:
                 # Show explanations immediately, update status for audio loading
                 yield extracted_text, gr.update(value="Generating audio..."), explanations, None, gr.update(visible=False)
-                # Step 3: Generate audio
                 try:
-                    from .generate_tts_audio import generate_tts_audio
-                    # Clean up the text for better TTS
-                    clean_text = explanations.strip()
-                    # Limit text length for TTS (assuming 1000 character limit)
-                    if len(clean_text) > 1000:
-                        sentences = clean_text[:950].split('.')
-                        if len(sentences) > 1:
-                            clean_text = '.'.join(sentences[:-1]) + '.'
-                        else:
-                            clean_text = clean_text[:950]
-                        clean_text += " [Text has been truncated for audio generation]"
-                    audio_result = generate_tts_audio(clean_text, None)
                     # Show everything, update status to complete
                     yield extracted_text, gr.update(value="All steps complete!"), explanations, audio_result, gr.update(visible=True)

                 # Show explanations immediately, update status for audio loading
                 yield extracted_text, gr.update(value="Generating audio..."), explanations, None, gr.update(visible=False)
+                  # Step 3: Generate audio
                 try:
+                    from .audio_processor import AudioProcessor
+                    # Create audio processor with parallel processing enabled
+                    audio_processor = AudioProcessor(
+                        max_chunk_size=800,
+                        max_workers=4,
+                        silence_duration=0.5,
+                        enable_parallel=True
+                    )
+                    # Generate progress callback for audio processing
+                    def audio_progress(progress, desc=""):
+                        yield extracted_text, gr.update(value=f"Generating audio: {desc}"), explanations, None, gr.update(visible=False)
+                    # Generate audio using the new parallel processor
+                    audio_result, _ = audio_processor.generate_audio(explanations, progress=audio_progress)
                     # Show everything, update status to complete
                     yield extracted_text, gr.update(value="All steps complete!"), explanations, audio_result, gr.update(visible=True)

src/processors/text_chunker.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""Text chunking utility for breaking down large text into smaller chunks for audio processing."""
+import re
+from typing import List
+class TextChunker:
+    """Handles intelligent text chunking for audio processing."""
+    def __init__(self, max_chunk_size: int = 800, overlap_sentences: int = 0):
+        """
+        Initialize the text chunker.
+        Args:
+            max_chunk_size: Maximum number of characters per chunk
+            overlap_sentences: Number of sentences to overlap between chunks for continuity
+        """
+        self.max_chunk_size = max_chunk_size
+        self.overlap_sentences = overlap_sentences
+    def chunk_text(self, text: str) -> List[str]:
+        """
+        Break text into smaller chunks based on paragraphs and sentence boundaries.
+        Args:
+            text: The input text to chunk
+        Returns:
+            List of text chunks
+        """
+        if not text or not text.strip():
+            return []
+        # Clean the text
+        text = text.strip()
+        # If text is within the limit, return as single chunk
+        if len(text) <= self.max_chunk_size:
+            return [text]
+        chunks = []
+        # First, try to split by paragraphs
+        paragraphs = self._split_into_paragraphs(text)
+        current_chunk = ""
+        for paragraph in paragraphs:
+            # If adding this paragraph would exceed the limit
+            if len(current_chunk) + len(paragraph) + 1 > self.max_chunk_size:
+                # If we have content in current chunk, save it
+                if current_chunk.strip():
+                    chunks.append(current_chunk.strip())
+                    current_chunk = ""
+                # If the paragraph itself is too long, split it by sentences
+                if len(paragraph) > self.max_chunk_size:
+                    sentence_chunks = self._split_paragraph_into_sentences(paragraph)
+                    for sentence_chunk in sentence_chunks:
+                        if len(current_chunk) + len(sentence_chunk) + 1 > self.max_chunk_size:
+                            if current_chunk.strip():
+                                chunks.append(current_chunk.strip())
+                            current_chunk = sentence_chunk
+                        else:
+                            if current_chunk:
+                                current_chunk += " " + sentence_chunk
+                            else:
+                                current_chunk = sentence_chunk
+                else:
+                    current_chunk = paragraph
+            else:
+                # Add paragraph to current chunk
+                if current_chunk:
+                    current_chunk += "\n\n" + paragraph
+                else:
+                    current_chunk = paragraph
+        # Add any remaining content
+        if current_chunk.strip():
+            chunks.append(current_chunk.strip())
+        # Apply overlap if specified
+        if self.overlap_sentences > 0 and len(chunks) > 1:
+            chunks = self._add_overlap(chunks)
+        return chunks
+    def _split_into_paragraphs(self, text: str) -> List[str]:
+        """Split text into paragraphs."""
+        # Split by double newlines or multiple spaces
+        paragraphs = re.split(r'\n\s*\n|(?:\n\s*){2,}', text)
+        # Filter out empty paragraphs and strip whitespace
+        return [p.strip() for p in paragraphs if p.strip()]
+    def _split_paragraph_into_sentences(self, paragraph: str) -> List[str]:
+        """Split a long paragraph into sentence-based chunks."""
+        # Split by sentence boundaries
+        sentences = re.split(r'(?<=[.!?])\s+', paragraph)
+        chunks = []
+        current_chunk = ""
+        for sentence in sentences:
+            # If a single sentence is longer than max_chunk_size, we need to force-split it
+            if len(sentence) > self.max_chunk_size:
+                # Save current chunk if it has content
+                if current_chunk.strip():
+                    chunks.append(current_chunk.strip())
+                    current_chunk = ""
+                # Force-split the long sentence into smaller pieces
+                while len(sentence) > self.max_chunk_size:
+                    # Find a good breaking point (prefer spaces)
+                    break_point = self.max_chunk_size
+                    if ' ' in sentence[:self.max_chunk_size]:
+                        # Find the last space within the limit
+                        break_point = sentence[:self.max_chunk_size].rfind(' ')
+                    chunk_part = sentence[:break_point]
+                    chunks.append(chunk_part)
+                    sentence = sentence[break_point:].strip()
+                # Add the remaining part of the sentence
+                if sentence:
+                    current_chunk = sentence
+            elif len(current_chunk) + len(sentence) + 1 > self.max_chunk_size:
+                if current_chunk.strip():
+                    chunks.append(current_chunk.strip())
+                current_chunk = sentence
+            else:
+                if current_chunk:
+                    current_chunk += " " + sentence
+                else:
+                    current_chunk = sentence
+        if current_chunk.strip():
+            chunks.append(current_chunk.strip())
+        return chunks
+    def _add_overlap(self, chunks: List[str]) -> List[str]:
+        """Add sentence overlap between chunks for better continuity."""
+        if len(chunks) <= 1:
+            return chunks
+        overlapped_chunks = [chunks[0]]  # First chunk stays the same
+        for i in range(1, len(chunks)):
+            # Get last few sentences from previous chunk
+            prev_chunk = chunks[i - 1]
+            current_chunk = chunks[i]
+            prev_sentences = re.split(r'(?<=[.!?])\s+', prev_chunk)
+            overlap_text = " ".join(prev_sentences[-self.overlap_sentences:]) if len(prev_sentences) > self.overlap_sentences else ""
+            if overlap_text:
+                overlapped_chunk = overlap_text + " " + current_chunk
+            else:
+                overlapped_chunk = current_chunk
+            overlapped_chunks.append(overlapped_chunk)
+        return overlapped_chunks
+    def get_chunk_info(self, chunks: List[str]) -> dict:
+        """Get information about the chunks."""
+        return {
+            "total_chunks": len(chunks),
+            "total_characters": sum(len(chunk) for chunk in chunks),
+            "avg_chunk_size": sum(len(chunk) for chunk in chunks) / len(chunks) if chunks else 0,
+            "max_chunk_size": max(len(chunk) for chunk in chunks) if chunks else 0,
+            "min_chunk_size": min(len(chunk) for chunk in chunks) if chunks else 0
+        }

src/ui_components/interface.py CHANGED Viewed

@@ -41,14 +41,14 @@ def build_interface(process_pdf_fn):
                             lines=15,
                             placeholder="Explanations will be automatically generated after text extraction...",
                             show_copy_button=True,
-                            interactive=False
-                        )
                 gr.Markdown("### 🔊 Audio Generation")
                 audio_output = gr.Audio(
                     label="Generated Explanation Audio",
                     interactive=False,
                     visible=False
                 )
         pdf_input.upload(
             fn=process_pdf_fn,
             inputs=[pdf_input],

                             lines=15,
                             placeholder="Explanations will be automatically generated after text extraction...",
                             show_copy_button=True,
+                            interactive=False                        )
                 gr.Markdown("### 🔊 Audio Generation")
                 audio_output = gr.Audio(
                     label="Generated Explanation Audio",
                     interactive=False,
                     visible=False
                 )
         pdf_input.upload(
             fn=process_pdf_fn,
             inputs=[pdf_input],