Spaces:

yalrashed
/

ScriptLLM

Sleeping

App Files Files Community

yalrashed commited on Dec 5, 2024

Commit

760bc4d

verified ·

1 Parent(s): b7ab371

Update src/analysis/coverage_generator.py

Browse files

Files changed (1) hide show

src/analysis/coverage_generator.py +37 -123

src/analysis/coverage_generator.py CHANGED Viewed

@@ -1,33 +1,19 @@
 import os
 import google.generativeai as genai
 from pathlib import Path
-from tqdm import tqdm
 import logging
-# Set up logging
-logging.basicConfig(level=logging.DEBUG,
-                   format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 class CoverageGenerator:
     def __init__(self):
-        # Initialize Gemini
         api_key = os.getenv("GOOGLE_API_KEY")
         if not api_key:
             raise ValueError("GOOGLE_API_KEY not found")
         genai.configure(api_key=api_key)
         self.model = genai.GenerativeModel('gemini-pro')
-        # Add token tracking
-        self.token_usage = {
-            'prompt_tokens': 0,
-            'completion_tokens': 0,
-            'total_tokens': 0
-        }
-        # Set chunk size (in estimated tokens)
-        self.chunk_size = 8000  # Conservative size to avoid issues
     def count_tokens(self, text: str) -> int:
         """Estimate token count using simple word-based estimation"""
@@ -38,177 +24,105 @@ class CoverageGenerator:
         """Split screenplay into chunks with overlap for context"""
         logger.info("Chunking screenplay...")
-        # Split into scenes (looking for standard screenplay headers)
         scenes = text.split("\n\n")
         chunks = []
         current_chunk = []
         current_size = 0
-        overlap_scenes = 2  # Number of scenes to overlap
         for i, scene in enumerate(scenes):
             scene_size = self.count_tokens(scene)
             if current_size + scene_size > self.chunk_size and current_chunk:
-                # Get overlap scenes from the end of current chunk
                 overlap = current_chunk[-overlap_scenes:] if len(current_chunk) > overlap_scenes else current_chunk
-                # Join current chunk and add to chunks
                 chunks.append("\n\n".join(current_chunk))
-                # Start new chunk with overlap for context
                 current_chunk = overlap + [scene]
                 current_size = sum(self.count_tokens(s) for s in current_chunk)
             else:
                 current_chunk.append(scene)
                 current_size += scene_size
-        # Add the last chunk if it exists
         if current_chunk:
             chunks.append("\n\n".join(current_chunk))
         logger.info(f"Split screenplay into {len(chunks)} chunks with context overlap")
         return chunks
-    def read_screenplay(self, filepath: Path) -> str:
-        """Read the cleaned screenplay file"""
-        try:
-            logger.info(f"Reading screenplay from: {filepath}")
-            with open(filepath, 'r', encoding='utf-8') as file:
-                text = file.read()
-                tokens = self.count_tokens(text)
-                logger.info(f"Successfully read screenplay. Length: {tokens} tokens (estimated)")
-                return text
-        except Exception as e:
-            logger.error(f"Error reading screenplay: {e}")
-            logger.error(f"Tried to read from: {filepath}")
-            return None
     def generate_synopsis(self, chunk: str, chunk_num: int = 1, total_chunks: int = 1) -> str:
         """Generate synopsis for a single chunk"""
         prompt = f"""As an experienced script analyst, analyze this section ({chunk_num}/{total_chunks}) of the screenplay.
-        Important: This section may overlap with others to maintain context. Focus on:
-        - Key plot developments and their implications for the larger story
-        - Character appearances and development
-        - How this section connects to the ongoing narrative
-        - Major themes or motifs that emerge
-        Provide a summary that captures both the specific events and their significance to the larger narrative.
         Screenplay section:
         {chunk}"""
         try:
-            prompt_tokens = self.count_tokens(prompt)
-            logger.debug(f"Chunk {chunk_num} prompt length: {prompt_tokens} tokens")
-            with tqdm(total=1, desc=f"Processing chunk {chunk_num}/{total_chunks}") as pbar:
-                response = self.model.generate_content(prompt)
-                completion_tokens = self.count_tokens(response.text)
-                pbar.update(1)
-            self.token_usage['prompt_tokens'] += prompt_tokens
-            self.token_usage['completion_tokens'] += completion_tokens
-            self.token_usage['total_tokens'] += (prompt_tokens + completion_tokens)
             return response.text
         except Exception as e:
             logger.error(f"Error processing chunk {chunk_num}: {str(e)}")
-            logger.error("Full error details:", exc_info=True)
             return None
     def generate_final_synopsis(self, chunk_synopses: list) -> str:
-        """Combine chunk synopses into a final, coherent synopsis with strong narrative focus"""
         combined_text = "\n\n".join([f"Section {i+1}:\n{synopsis}"
                                    for i, synopsis in enumerate(chunk_synopses)])
-        prompt = f"""As an experienced script analyst, synthesize these section summaries into a comprehensive,
-        narratively cohesive synopsis of the entire screenplay.
-        You should have distinct sections on:
-        1. The complete narrative arc from beginning to end
-        2. Character development across the full story
-        3. Major themes and how they evolve
-        4. Key turning points and their impact
-        5. The core conflict and its resolution
-        Ensure the synopsis flows naturally and captures the full story without revealing the seams between sections.
         Section summaries:
         {combined_text}"""
         try:
-            logger.info("Generating final synopsis")
-            with tqdm(total=1, desc="Creating final synopsis") as pbar:
-                response = self.model.generate_content(prompt)
-                pbar.update(1)
             return response.text
         except Exception as e:
             logger.error(f"Error generating final synopsis: {str(e)}")
             return None
     def generate_coverage(self, screenplay_path: Path) -> bool:
-        """Main method to generate full coverage document"""
         logger.info("Starting coverage generation")
-        self.token_usage = {
-            'prompt_tokens': 0,
-            'completion_tokens': 0,
-            'total_tokens': 0
-        }
-        with tqdm(total=4, desc="Generating coverage") as pbar:
-            # Read screenplay
-            screenplay_text = self.read_screenplay(screenplay_path)
-            if not screenplay_text:
-                return False
-            pbar.update(1)
-            # Split into chunks
             chunks = self.chunk_screenplay(screenplay_text)
-            pbar.update(1)
-            # Process each chunk
             chunk_synopses = []
             for i, chunk in enumerate(chunks, 1):
                 synopsis = self.generate_synopsis(chunk, i, len(chunks))
                 if synopsis:
                     chunk_synopses.append(synopsis)
                 else:
                     logger.error(f"Failed to process chunk {i}")
                     return False
-            pbar.update(1)
-            # Generate final synopsis
             final_synopsis = self.generate_final_synopsis(chunk_synopses)
             if not final_synopsis:
                 return False
-            # Save coverage
-            output_dir = screenplay_path.parent
-            output_path = output_dir / "coverage.txt"
-            try:
-                with open(output_path, 'w', encoding='utf-8') as f:
-                    f.write("SCREENPLAY COVERAGE\n\n")
-                    f.write("### SYNOPSIS ###\n\n")
-                    f.write(final_synopsis)
-                    # Add token usage summary
-                    f.write("\n\n### TOKEN USAGE SUMMARY ###\n")
-                    f.write(f"Prompt Tokens: {self.token_usage['prompt_tokens']}\n")
-                    f.write(f"Completion Tokens: {self.token_usage['completion_tokens']}\n")
-                    f.write(f"Total Tokens: {self.token_usage['total_tokens']}\n")
-                logger.info("\nFinal Token Usage Summary:")
-                logger.info(f"Prompt Tokens: {self.token_usage['prompt_tokens']}")
-                logger.info(f"Completion Tokens: {self.token_usage['completion_tokens']}")
-                logger.info(f"Total Tokens: {self.token_usage['total_tokens']}")
-                pbar.update(1)
-                return True
-            except Exception as e:
-                logger.error(f"Error saving coverage: {str(e)}")
-                logger.error("Full error details:", exc_info=True)
-                return False

 import os
 import google.generativeai as genai
 from pathlib import Path
 import logging
 logger = logging.getLogger(__name__)
 class CoverageGenerator:
     def __init__(self):
         api_key = os.getenv("GOOGLE_API_KEY")
         if not api_key:
             raise ValueError("GOOGLE_API_KEY not found")
         genai.configure(api_key=api_key)
         self.model = genai.GenerativeModel('gemini-pro')
+        self.chunk_size = 8000
     def count_tokens(self, text: str) -> int:
         """Estimate token count using simple word-based estimation"""
         """Split screenplay into chunks with overlap for context"""
         logger.info("Chunking screenplay...")
         scenes = text.split("\n\n")
         chunks = []
         current_chunk = []
         current_size = 0
+        overlap_scenes = 2
         for i, scene in enumerate(scenes):
             scene_size = self.count_tokens(scene)
             if current_size + scene_size > self.chunk_size and current_chunk:
                 overlap = current_chunk[-overlap_scenes:] if len(current_chunk) > overlap_scenes else current_chunk
                 chunks.append("\n\n".join(current_chunk))
                 current_chunk = overlap + [scene]
                 current_size = sum(self.count_tokens(s) for s in current_chunk)
             else:
                 current_chunk.append(scene)
                 current_size += scene_size
         if current_chunk:
             chunks.append("\n\n".join(current_chunk))
         logger.info(f"Split screenplay into {len(chunks)} chunks with context overlap")
         return chunks
     def generate_synopsis(self, chunk: str, chunk_num: int = 1, total_chunks: int = 1) -> str:
         """Generate synopsis for a single chunk"""
+        logger.debug(f"Generating synopsis for chunk {chunk_num}/{total_chunks}")
         prompt = f"""As an experienced script analyst, analyze this section ({chunk_num}/{total_chunks}) of the screenplay.
+        Focus on: plot developments, character development, narrative connections, themes
         Screenplay section:
         {chunk}"""
         try:
+            response = self.model.generate_content(prompt)
+            logger.debug(f"Generated synopsis for chunk {chunk_num}")
             return response.text
         except Exception as e:
             logger.error(f"Error processing chunk {chunk_num}: {str(e)}")
             return None
     def generate_final_synopsis(self, chunk_synopses: list) -> str:
+        """Combine chunk synopses into final coverage"""
+        logger.info("Generating final synopsis")
         combined_text = "\n\n".join([f"Section {i+1}:\n{synopsis}"
                                    for i, synopsis in enumerate(chunk_synopses)])
+        prompt = f"""Synthesize these section summaries into a comprehensive coverage document with:
+        1. Complete narrative arc
+        2. Character development
+        3. Major themes
+        4. Key turning points
+        5. Core conflict and resolution
         Section summaries:
         {combined_text}"""
         try:
+            response = self.model.generate_content(prompt)
+            logger.info("Final synopsis generated")
             return response.text
         except Exception as e:
             logger.error(f"Error generating final synopsis: {str(e)}")
             return None
     def generate_coverage(self, screenplay_path: Path) -> bool:
+        """Main method to generate coverage document"""
         logger.info("Starting coverage generation")
+        try:
+            with open(screenplay_path, 'r', encoding='utf-8') as f:
+                screenplay_text = f.read()
             chunks = self.chunk_screenplay(screenplay_text)
             chunk_synopses = []
             for i, chunk in enumerate(chunks, 1):
+                logger.info(f"Processing chunk {i}/{len(chunks)}")
                 synopsis = self.generate_synopsis(chunk, i, len(chunks))
                 if synopsis:
                     chunk_synopses.append(synopsis)
                 else:
                     logger.error(f"Failed to process chunk {i}")
                     return False
             final_synopsis = self.generate_final_synopsis(chunk_synopses)
             if not final_synopsis:
                 return False
+            output_path = screenplay_path.parent / "coverage.txt"
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write("SCREENPLAY COVERAGE\n\n")
+                f.write(final_synopsis)
+            logger.info("Coverage generation complete")
+            return True
+        except Exception as e:
+            logger.error(f"Error in coverage generation: {str(e)}")
+            return False