Spaces:
Runtime error
Runtime error
Commit
·
9db5d78
1
Parent(s):
e9a914a
add configurability for tone and length
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ import os
|
|
| 8 |
import time
|
| 9 |
from pathlib import Path
|
| 10 |
from tempfile import NamedTemporaryFile
|
| 11 |
-
from typing import List, Literal, Tuple
|
| 12 |
|
| 13 |
# Third-party imports
|
| 14 |
import gradio as gr
|
|
@@ -36,15 +36,37 @@ class Dialogue(BaseModel):
|
|
| 36 |
dialogue: List[DialogueItem]
|
| 37 |
|
| 38 |
|
| 39 |
-
def generate_podcast(file: str) -> Tuple[str, str]:
|
| 40 |
"""Generate the audio and transcript from the PDF."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
# Read the PDF file and extract text
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Call the LLM
|
| 47 |
-
llm_output = generate_script(
|
| 48 |
logger.info(f"Generated dialogue: {llm_output}")
|
| 49 |
|
| 50 |
# Process the dialogue
|
|
@@ -100,6 +122,16 @@ demo = gr.Interface(
|
|
| 100 |
label="PDF",
|
| 101 |
file_types=[".pdf", "file/*"],
|
| 102 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
],
|
| 104 |
outputs=[
|
| 105 |
gr.Audio(label="Audio", format="mp3"),
|
|
|
|
| 8 |
import time
|
| 9 |
from pathlib import Path
|
| 10 |
from tempfile import NamedTemporaryFile
|
| 11 |
+
from typing import List, Literal, Tuple, Optional
|
| 12 |
|
| 13 |
# Third-party imports
|
| 14 |
import gradio as gr
|
|
|
|
| 36 |
dialogue: List[DialogueItem]
|
| 37 |
|
| 38 |
|
| 39 |
+
def generate_podcast(file: str, tone: Optional[str] = None, length: Optional[str] = None) -> Tuple[str, str]:
|
| 40 |
"""Generate the audio and transcript from the PDF."""
|
| 41 |
+
# Check if the file is a PDF
|
| 42 |
+
if not file.lower().endswith('.pdf'):
|
| 43 |
+
raise gr.Error("Please upload a PDF file.")
|
| 44 |
+
|
| 45 |
# Read the PDF file and extract text
|
| 46 |
+
try:
|
| 47 |
+
with Path(file).open("rb") as f:
|
| 48 |
+
reader = PdfReader(f)
|
| 49 |
+
text = "\n\n".join([page.extract_text() for page in reader.pages])
|
| 50 |
+
except Exception as e:
|
| 51 |
+
raise gr.Error(f"Error reading the PDF file: {str(e)}")
|
| 52 |
+
|
| 53 |
+
# Check if the PDF has more than ~150,000 characters
|
| 54 |
+
if len(text) > 100000:
|
| 55 |
+
raise gr.Error("The PDF is too long. Please upload a PDF with fewer than ~100,000 characters.")
|
| 56 |
+
|
| 57 |
+
# Modify the system prompt based on the chosen tone and length
|
| 58 |
+
modified_system_prompt = SYSTEM_PROMPT
|
| 59 |
+
if tone:
|
| 60 |
+
modified_system_prompt += f"\n\nTONE: The tone of the podcast should be {tone}."
|
| 61 |
+
if length:
|
| 62 |
+
length_instructions = {
|
| 63 |
+
"Short (1-2 min)": "Keep the podcast brief, around 1-2 minutes long.",
|
| 64 |
+
"Medium (3-5 min)": "Aim for a moderate length, about 3-5 minutes.",
|
| 65 |
+
}
|
| 66 |
+
modified_system_prompt += f"\n\nLENGTH: {length_instructions[length]}"
|
| 67 |
|
| 68 |
# Call the LLM
|
| 69 |
+
llm_output = generate_script(modified_system_prompt, text, Dialogue)
|
| 70 |
logger.info(f"Generated dialogue: {llm_output}")
|
| 71 |
|
| 72 |
# Process the dialogue
|
|
|
|
| 122 |
label="PDF",
|
| 123 |
file_types=[".pdf", "file/*"],
|
| 124 |
),
|
| 125 |
+
gr.Radio(
|
| 126 |
+
choices=["Fun", "Formal"],
|
| 127 |
+
label="Tone of the podcast",
|
| 128 |
+
value="casual"
|
| 129 |
+
),
|
| 130 |
+
gr.Radio(
|
| 131 |
+
choices=["Short (1-2 min)", "Medium (3-5 min)"],
|
| 132 |
+
label="Length of the podcast",
|
| 133 |
+
value="Medium (3-5 min)"
|
| 134 |
+
),
|
| 135 |
],
|
| 136 |
outputs=[
|
| 137 |
gr.Audio(label="Audio", format="mp3"),
|
utils.py
CHANGED
|
@@ -23,19 +23,19 @@ client = OpenAI(
|
|
| 23 |
hf_client = Client("mrfakename/MeloTTS")
|
| 24 |
|
| 25 |
|
| 26 |
-
def generate_script(system_prompt: str,
|
| 27 |
"""Get the dialogue from the LLM."""
|
| 28 |
# Load as python object
|
| 29 |
try:
|
| 30 |
-
response = call_llm(system_prompt,
|
| 31 |
-
dialogue =
|
| 32 |
response.choices[0].message.content
|
| 33 |
)
|
| 34 |
except ValidationError as e:
|
| 35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
| 36 |
-
system_prompt_with_error = f"{system_prompt}\n\
|
| 37 |
-
response = call_llm(system_prompt_with_error,
|
| 38 |
-
dialogue =
|
| 39 |
response.choices[0].message.content
|
| 40 |
)
|
| 41 |
return dialogue
|
|
|
|
| 23 |
hf_client = Client("mrfakename/MeloTTS")
|
| 24 |
|
| 25 |
|
| 26 |
+
def generate_script(system_prompt: str, input_text: str, output_model):
|
| 27 |
"""Get the dialogue from the LLM."""
|
| 28 |
# Load as python object
|
| 29 |
try:
|
| 30 |
+
response = call_llm(system_prompt, input_text, output_model)
|
| 31 |
+
dialogue = output_model.model_validate_json(
|
| 32 |
response.choices[0].message.content
|
| 33 |
)
|
| 34 |
except ValidationError as e:
|
| 35 |
error_message = f"Failed to parse dialogue JSON: {e}"
|
| 36 |
+
system_prompt_with_error = f"{system_prompt}\n\nPlease return a VALID JSON object. This was the earlier error: {error_message}"
|
| 37 |
+
response = call_llm(system_prompt_with_error, input_text, output_model)
|
| 38 |
+
dialogue = output_model.model_validate_json(
|
| 39 |
response.choices[0].message.content
|
| 40 |
)
|
| 41 |
return dialogue
|