Spaces:

yalrashed
/

ScriptLLM

Sleeping

App Files Files Community

yalrashed commited on Dec 5, 2024

Commit

e2cc090

verified ·

1 Parent(s): a6e1129

Upload analysis_post_processor.py

Browse files

Files changed (1) hide show

src/analysis/analysis_post_processor.py +101 -0

src/analysis/analysis_post_processor.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import google.generativeai as genai
+from pathlib import Path
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class AnalysisPostProcessor:
+    def __init__(self):
+        api_key = os.getenv("GOOGLE_API_KEY")
+        if not api_key:
+            raise ValueError("GOOGLE_API_KEY not found")
+        genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel('gemini-pro')
+    def read_sections(self, filepath: str) -> dict:
+        """Read and separate the analysis into sections"""
+        with open(filepath, 'r') as f:
+            content = f.read()
+        sections = {}
+        current_section = None
+        current_content = []
+        for line in content.split('\n'):
+            if line.startswith('### ') and line.endswith(' ###'):
+                if current_section:
+                    sections[current_section] = '\n'.join(current_content)
+                current_section = line.strip('#').strip()
+                current_content = []
+            else:
+                current_content.append(line)
+        if current_section:
+            sections[current_section] = '\n'.join(current_content)
+        return sections
+    def clean_section(self, title: str, content: str) -> str:
+        """Clean individual section using Gemini"""
+        prompt = f"""You are processing a section of screenplay analysis titled "{title}".
+        The original analysis was generated by analyzing chunks of the screenplay,
+        which may have led to some redundancy and discontinuity.
+        Your task:
+        1. Remove any redundant observations
+        2. Stitch together related insights that may be separated
+        3. Ensure the analysis flows naturally from beginning to end
+        4. Preserve ALL unique insights and specific examples
+        5. Maintain the analytical depth while making it more coherent
+        Original {title} section:
+        {content}
+        Provide the cleaned and coherent version maintaining the same analytical depth."""
+        try:
+            response = self.model.generate_content(prompt)
+            return response.text
+        except Exception as e:
+            logger.error(f"Error cleaning {title}: {str(e)}")
+            return content
+    def process_analysis(self, input_path: str, output_path: str):
+        """Process the entire analysis file"""
+        try:
+            # Read and separate sections
+            sections = self.read_sections(input_path)
+            # Process each section
+            cleaned_sections = {}
+            for title, content in sections.items():
+                logger.info(f"Processing {title}")
+                cleaned_sections[title] = self.clean_section(title, content)
+            # Combine sections
+            final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n"
+            for title, content in cleaned_sections.items():
+                final_analysis += f"### {title} ###\n\n{content}\n\n"
+            # Save result
+            with open(output_path, 'w') as f:
+                f.write(final_analysis)
+            logger.info(f"Cleaned analysis saved to: {output_path}")
+            return True
+        except Exception as e:
+            logger.error(f"Error in post-processing: {str(e)}")
+            return False
+def main():
+    processor = AnalysisPostProcessor()
+    input_file = "path/to/creative_analysis.txt"
+    output_file = "path/to/cleaned_creative_analysis.txt"
+    processor.process_analysis(input_file, output_file)
+if __name__ == "__main__":
+    main()