yalrashed commited on
Commit
b5c2af5
·
verified ·
1 Parent(s): b475860

Update src/dramatize_podcast.py

Browse files
Files changed (1) hide show
  1. src/dramatize_podcast.py +25 -39
src/dramatize_podcast.py CHANGED
@@ -103,7 +103,7 @@ def query(payload):
103
  raise
104
 
105
  def clean_generated_text(text: str):
106
- """Clean and validate the generated text"""
107
  try:
108
  # Find and extract the list content
109
  start_idx = text.find('[')
@@ -117,49 +117,35 @@ def clean_generated_text(text: str):
117
  # Remove any <|im_end|> markers
118
  list_text = list_text.split('<|im_end|>')[0].strip()
119
 
120
- # Replace curly quotes with straight quotes
121
- list_text = list_text.replace('"', '"')
122
- list_text = list_text.replace('"', '"')
123
- list_text = list_text.replace(''', "'")
124
- list_text = list_text.replace(''', "'")
125
 
126
- # Clean up formatting
127
- list_text = list_text.replace('\n ', '\n')
128
- list_text = list_text.replace(' ', ' ')
129
-
130
- # Try to parse
131
- try:
132
- dialogue_tuples = ast.literal_eval(list_text)
133
- except SyntaxError as e:
134
- print(f"Parse error: {str(e)}")
135
- print("Attempting cleanup...")
136
- # Try additional cleanup
137
- list_text = list_text.strip()
138
- dialogue_tuples = ast.literal_eval(list_text)
139
-
140
- if not isinstance(dialogue_tuples, list):
141
- raise Exception("Not a valid list of tuples")
142
-
143
- # Validate and clean tuples
144
- cleaned_tuples = []
145
- for item in dialogue_tuples:
146
- if not isinstance(item, tuple) or len(item) != 2:
147
- continue
148
- if item[0] not in ["Speaker 1", "Speaker 2"]:
149
- continue
150
- if not isinstance(item[1], str):
151
  continue
152
 
153
- # Clean up the text content
154
- text = item[1].strip()
155
- text = text.replace(' ', ' ')
156
-
157
- cleaned_tuples.append((item[0], text))
158
-
159
- if not cleaned_tuples:
 
 
 
 
 
 
 
 
 
 
160
  raise Exception("No valid dialogue tuples found")
161
 
162
- return cleaned_tuples
163
 
164
  except Exception as e:
165
  print(f"Error parsing generated text: {str(e)}")
 
103
  raise
104
 
105
  def clean_generated_text(text: str):
106
+ """Clean and validate the generated text using string manipulation"""
107
  try:
108
  # Find and extract the list content
109
  start_idx = text.find('[')
 
117
  # Remove any <|im_end|> markers
118
  list_text = list_text.split('<|im_end|>')[0].strip()
119
 
120
+ # Split into individual tuples
121
+ lines = list_text.split('\n')
122
+ dialogue_tuples = []
 
 
123
 
124
+ for line in lines:
125
+ line = line.strip()
126
+ if not line or line in ['[', ']']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  continue
128
 
129
+ # Extract speaker and text
130
+ if line.startswith('("Speaker'):
131
+ # Remove leading ( and trailing ),
132
+ line = line.rstrip(',').rstrip(')').lstrip('(')
133
+
134
+ # Split into speaker and text
135
+ try:
136
+ speaker, text = line.split('", ', 1)
137
+ speaker = speaker.strip('"')
138
+ text = text.strip().strip('"')
139
+
140
+ if speaker in ["Speaker 1", "Speaker 2"]:
141
+ dialogue_tuples.append((speaker, text))
142
+ except ValueError:
143
+ continue
144
+
145
+ if not dialogue_tuples:
146
  raise Exception("No valid dialogue tuples found")
147
 
148
+ return dialogue_tuples
149
 
150
  except Exception as e:
151
  print(f"Error parsing generated text: {str(e)}")