Spaces:

yalrashed
/

pdf-to-podcast-test

Paused

App Files Files Community

yalrashed commited on Nov 17, 2024

Commit

b5c2af5

verified ·

1 Parent(s): b475860

Update src/dramatize_podcast.py

Browse files

Files changed (1) hide show

src/dramatize_podcast.py +25 -39

src/dramatize_podcast.py CHANGED Viewed

@@ -103,7 +103,7 @@ def query(payload):
             raise
 def clean_generated_text(text: str):
-    """Clean and validate the generated text"""
     try:
         # Find and extract the list content
         start_idx = text.find('[')
@@ -117,49 +117,35 @@ def clean_generated_text(text: str):
         # Remove any <|im_end|> markers
         list_text = list_text.split('<|im_end|>')[0].strip()
-        # Replace curly quotes with straight quotes
-        list_text = list_text.replace('"', '"')
-        list_text = list_text.replace('"', '"')
-        list_text = list_text.replace(''', "'")
-        list_text = list_text.replace(''', "'")
-        # Clean up formatting
-        list_text = list_text.replace('\n    ', '\n')
-        list_text = list_text.replace('  ', ' ')
-        # Try to parse
-        try:
-            dialogue_tuples = ast.literal_eval(list_text)
-        except SyntaxError as e:
-            print(f"Parse error: {str(e)}")
-            print("Attempting cleanup...")
-            # Try additional cleanup
-            list_text = list_text.strip()
-            dialogue_tuples = ast.literal_eval(list_text)
-        if not isinstance(dialogue_tuples, list):
-            raise Exception("Not a valid list of tuples")
-        # Validate and clean tuples
-        cleaned_tuples = []
-        for item in dialogue_tuples:
-            if not isinstance(item, tuple) or len(item) != 2:
-                continue
-            if item[0] not in ["Speaker 1", "Speaker 2"]:
-                continue
-            if not isinstance(item[1], str):
                 continue
-            # Clean up the text content
-            text = item[1].strip()
-            text = text.replace('  ', ' ')
-            cleaned_tuples.append((item[0], text))
-        if not cleaned_tuples:
             raise Exception("No valid dialogue tuples found")
-        return cleaned_tuples
     except Exception as e:
         print(f"Error parsing generated text: {str(e)}")

             raise
 def clean_generated_text(text: str):
+    """Clean and validate the generated text using string manipulation"""
     try:
         # Find and extract the list content
         start_idx = text.find('[')
         # Remove any <|im_end|> markers
         list_text = list_text.split('<|im_end|>')[0].strip()
+        # Split into individual tuples
+        lines = list_text.split('\n')
+        dialogue_tuples = []
+        for line in lines:
+            line = line.strip()
+            if not line or line in ['[', ']']:
                 continue
+            # Extract speaker and text
+            if line.startswith('("Speaker'):
+                # Remove leading ( and trailing ),
+                line = line.rstrip(',').rstrip(')').lstrip('(')
+                # Split into speaker and text
+                try:
+                    speaker, text = line.split('", ', 1)
+                    speaker = speaker.strip('"')
+                    text = text.strip().strip('"')
+                    if speaker in ["Speaker 1", "Speaker 2"]:
+                        dialogue_tuples.append((speaker, text))
+                except ValueError:
+                    continue
+        if not dialogue_tuples:
             raise Exception("No valid dialogue tuples found")
+        return dialogue_tuples
     except Exception as e:
         print(f"Error parsing generated text: {str(e)}")