Update src/dramatize_podcast.py
Browse files- src/dramatize_podcast.py +25 -39
src/dramatize_podcast.py
CHANGED
|
@@ -103,7 +103,7 @@ def query(payload):
|
|
| 103 |
raise
|
| 104 |
|
| 105 |
def clean_generated_text(text: str):
|
| 106 |
-
"""Clean and validate the generated text"""
|
| 107 |
try:
|
| 108 |
# Find and extract the list content
|
| 109 |
start_idx = text.find('[')
|
|
@@ -117,49 +117,35 @@ def clean_generated_text(text: str):
|
|
| 117 |
# Remove any <|im_end|> markers
|
| 118 |
list_text = list_text.split('<|im_end|>')[0].strip()
|
| 119 |
|
| 120 |
-
#
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
list_text = list_text.replace(''', "'")
|
| 124 |
-
list_text = list_text.replace(''', "'")
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
# Try to parse
|
| 131 |
-
try:
|
| 132 |
-
dialogue_tuples = ast.literal_eval(list_text)
|
| 133 |
-
except SyntaxError as e:
|
| 134 |
-
print(f"Parse error: {str(e)}")
|
| 135 |
-
print("Attempting cleanup...")
|
| 136 |
-
# Try additional cleanup
|
| 137 |
-
list_text = list_text.strip()
|
| 138 |
-
dialogue_tuples = ast.literal_eval(list_text)
|
| 139 |
-
|
| 140 |
-
if not isinstance(dialogue_tuples, list):
|
| 141 |
-
raise Exception("Not a valid list of tuples")
|
| 142 |
-
|
| 143 |
-
# Validate and clean tuples
|
| 144 |
-
cleaned_tuples = []
|
| 145 |
-
for item in dialogue_tuples:
|
| 146 |
-
if not isinstance(item, tuple) or len(item) != 2:
|
| 147 |
-
continue
|
| 148 |
-
if item[0] not in ["Speaker 1", "Speaker 2"]:
|
| 149 |
-
continue
|
| 150 |
-
if not isinstance(item[1], str):
|
| 151 |
continue
|
| 152 |
|
| 153 |
-
#
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
raise Exception("No valid dialogue tuples found")
|
| 161 |
|
| 162 |
-
return
|
| 163 |
|
| 164 |
except Exception as e:
|
| 165 |
print(f"Error parsing generated text: {str(e)}")
|
|
|
|
| 103 |
raise
|
| 104 |
|
| 105 |
def clean_generated_text(text: str):
|
| 106 |
+
"""Clean and validate the generated text using string manipulation"""
|
| 107 |
try:
|
| 108 |
# Find and extract the list content
|
| 109 |
start_idx = text.find('[')
|
|
|
|
| 117 |
# Remove any <|im_end|> markers
|
| 118 |
list_text = list_text.split('<|im_end|>')[0].strip()
|
| 119 |
|
| 120 |
+
# Split into individual tuples
|
| 121 |
+
lines = list_text.split('\n')
|
| 122 |
+
dialogue_tuples = []
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
for line in lines:
|
| 125 |
+
line = line.strip()
|
| 126 |
+
if not line or line in ['[', ']']:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
continue
|
| 128 |
|
| 129 |
+
# Extract speaker and text
|
| 130 |
+
if line.startswith('("Speaker'):
|
| 131 |
+
# Remove leading ( and trailing ),
|
| 132 |
+
line = line.rstrip(',').rstrip(')').lstrip('(')
|
| 133 |
+
|
| 134 |
+
# Split into speaker and text
|
| 135 |
+
try:
|
| 136 |
+
speaker, text = line.split('", ', 1)
|
| 137 |
+
speaker = speaker.strip('"')
|
| 138 |
+
text = text.strip().strip('"')
|
| 139 |
+
|
| 140 |
+
if speaker in ["Speaker 1", "Speaker 2"]:
|
| 141 |
+
dialogue_tuples.append((speaker, text))
|
| 142 |
+
except ValueError:
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
if not dialogue_tuples:
|
| 146 |
raise Exception("No valid dialogue tuples found")
|
| 147 |
|
| 148 |
+
return dialogue_tuples
|
| 149 |
|
| 150 |
except Exception as e:
|
| 151 |
print(f"Error parsing generated text: {str(e)}")
|