Spaces:

hotdeem
/

mp3

Running

App Files Files Community

hotdeem commited on Oct 29, 2024

Commit

d16f2df

verified ·

1 Parent(s): 633c6cf

Upload 12 files

Browse files

Files changed (12) hide show

Dockerfile +21 -0
README.md +10 -10
api/__init__.py +0 -0
api/main.py +7 -0
api/routes/chat.py +141 -0
constants.py +84 -0
fishaudio.py +33 -0
main.py +36 -0
prompts.py +87 -0
requirements.txt +21 -0
schema.py +34 -0
utils.py +308 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.12-slim
+RUN apt-get update && apt-get install -y git ffmpeg && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# 设置工作目录
+WORKDIR $HOME/app
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY --chown=user . .
+EXPOSE 7860
+# 运行应用
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
----
-title: Mp3
-emoji: 📉
-colorFrom: red
-colorTo: pink
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: mp3
+emoji: 🚀
+colorFrom: blue
+colorTo: purple
+sdk: docker
+python_version: 3.12
+app_file: main.py
+pinned: false
+---

api/__init__.py ADDED Viewed

File without changes

api/main.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi import APIRouter
+from api.routes import chat
+api_router = APIRouter()
+api_router.include_router(chat.router, prefix="/chat")

api/routes/chat.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import uuid
+from fastapi import APIRouter, BackgroundTasks, Form, HTTPException, UploadFile, File
+from fastapi.responses import StreamingResponse, JSONResponse
+import json
+from typing import Dict, Optional
+from constants import SPEEKERS
+from utils import combine_audio, generate_dialogue, generate_podcast_info, generate_podcast_summary, get_link_text, get_pdf_text
+router = APIRouter()
+@router.post("/generate_transcript")
+async def generate_transcript(
+    pdfFile: Optional[UploadFile] = File(None),
+    textInput: str = Form(...),
+    mode: str = Form(...),
+    url: Optional[str] = Form(None),
+    tone: str = Form(...),
+    duration: str = Form(...),
+    language: str = Form(...),
+):
+    pdfContent =""
+    if mode=='pdf':
+        pdfContent = await get_pdf_text(pdfFile)
+    else:
+        linkData = get_link_text(url)
+        pdfContent = linkData['text']
+    new_text = pdfContent
+    return StreamingResponse(generate_dialogue(new_text,textInput, tone, duration, language), media_type="application/json")
+@router.get("/test")
+def test():
+    return {"message": "Hello World"}
+@router.get("/speekers")
+def speeker():
+    return JSONResponse(content=SPEEKERS)
+@router.get("/jina")
+def jina():
+    result = get_link_text("https://ui.shadcn.com/docs/components/select")
+    return JSONResponse(content=result)
+@router.post("/summarize")
+async def get_summary(
+    textInput: str = Form(...),
+    tone: str = Form(...),
+    duration: str = Form(...),
+    language: str = Form(...),
+    mode: str = Form(...),
+    url: Optional[str] = Form(None),
+    pdfFile: Optional[UploadFile] = File(None)
+):
+    pdfContent =""
+    if mode=='pdf':
+        pdfContent = await get_pdf_text(pdfFile)
+    else:
+        linkData = get_link_text(url)
+        pdfContent = linkData['text']
+    new_text = pdfContent
+    return StreamingResponse(
+        generate_podcast_summary(
+            new_text,
+            textInput,
+            tone,
+            duration,
+            language,
+        ),
+        media_type="application/json"
+    )
+@router.post("/pod_info")
+async def get_pod_info(
+    textInput: str = Form(...),
+    tone: str = Form(...),
+    duration: str = Form(...),
+    language: str = Form(...),
+    mode: str = Form(...),
+    url: Optional[str] = Form(None),
+    pdfFile: Optional[UploadFile] = File(None)
+):
+    pdfContent =""
+    if mode=='pdf':
+        pdfContent = await get_pdf_text(pdfFile)
+    else:
+        linkData = get_link_text(url)
+        pdfContent = linkData['text']
+    new_text = pdfContent[:100]
+    return StreamingResponse(generate_podcast_info(new_text, textInput, tone, duration, language), media_type="application/json")
+task_status: Dict[str, Dict] = {}
+@router.post("/generate_audio")
+async def audio(
+    background_tasks: BackgroundTasks,
+    text: str = Form(...),
+    host_voice: str = Form(...),
+    guest_voice: str = Form(...),
+    language: str = Form(...) ,
+    provider: str = Form(...)
+):
+    task_id = str(uuid.uuid4())
+    task_status[task_id] = {"status": "processing"}
+    background_tasks.add_task(combine_audio, task_status, task_id, text, language,provider , host_voice,guest_voice)
+    return JSONResponse(content={"task_id": task_id, "status": "processing"})
+@router.get("/audio_status/{task_id}")
+async def get_audio_status(task_id: str):
+    if task_id not in task_status:
+        raise HTTPException(status_code=404, detail="Task not found")
+    status = task_status[task_id]
+    if status["status"] == "completed":
+        return JSONResponse(content={
+            "status": "completed",
+            "audio_url": status["audio_url"]
+        })
+    elif status["status"] == "failed":
+        return JSONResponse(content={
+            "status": "failed",
+            "error": status["error"]
+        })
+    else:
+        return JSONResponse(content={
+            "status": "processing"
+        })

constants.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+constants.py
+"""
+import os
+from pathlib import Path
+# Key constants
+CHARACTER_LIMIT = 100_000
+# Gradio-related constants
+GRADIO_CLEAR_CACHE_OLDER_THAN = 1 * 2 * 60 * 60  # 2 hours
+AUDIO_CACHE_DIR = os.path.join(os.path.dirname(__file__), 'tmp', 'cache')
+# Error messages-related constants
+ERROR_MESSAGE_NO_INPUT = "Please provide at least one PDF file or a URL."
+ERROR_MESSAGE_NOT_PDF = "The provided file is not a PDF. Please upload only PDF files."
+ERROR_MESSAGE_NOT_SUPPORTED_IN_MELO_TTS = "The selected language is not supported without advanced audio generation. Please enable advanced audio generation or choose a supported language."
+ERROR_MESSAGE_READING_PDF = "Error reading the PDF file"
+ERROR_MESSAGE_TOO_LONG = "The total content is too long. Please ensure the combined text from PDFs and URL is fewer than {CHARACTER_LIMIT} characters."
+SPEECH_KEY = os.getenv('SPEECH_KEY')
+SPEECH_REGION = "japaneast"
+FISHAUDIO_KEY = os.getenv('FISHAUDIO_KEY')
+JINA_KEY = os.getenv('JINA_KEY','jina_c1759c7f49e14ced990ac7776800dc44ShJNTXBCizzwjE7IMFYJ6LD960cG')
+# Fireworks API-related constants
+FIREWORKS_API_KEY = os.getenv('FIREWORKS_API_KEY')
+FIREWORKS_BASE_URL = os.getenv('FIREWORKS_BASE_URL',"https://api.fireworks.ai/inference/v1")
+FIREWORKS_MAX_TOKENS = 16_384
+FIREWORKS_MODEL_ID = os.getenv('FIREWORKS_MODEL_ID',"accounts/fireworks/models/llama-v3p1-405b-instruct")
+FIREWORKS_TEMPERATURE = 0.1
+FIREWORKS_JSON_RETRY_ATTEMPTS = 3
+# Suno related constants
+SUNO_LANGUAGE_MAPPING = {
+    "English": "en",
+    "Chinese": "zh",
+    "French": "fr",
+    "German": "de",
+    "Hindi": "hi",
+    "Italian": "it",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Polish": "pl",
+    "Portuguese": "pt",
+    "Russian": "ru",
+    "Spanish": "es",
+    "Turkish": "tr",
+}
+FISHAUDIO_SPEEKER = [
+    { "id": "59cb5986671546eaa6ca8ae6f29f6d22", "name": "央视配音" },
+    { "id": "738d0cc1a3e9430a9de2b544a466a7fc", "name": "雷军" },
+    { "id": "54a5170264694bfc8e9ad98df7bd89c3", "name": "丁真" },
+    { "id": "7f92f8afb8ec43bf81429cc1c9199cb1", "name": "AD学姐" },
+    { "id": "0eb38bc974e1459facca38b359e13511", "name": "赛马娘" },
+    { "id": "e80ea225770f42f79d50aa98be3cedfc", "name": "孙笑川258" },
+    { "id": "e4642e5edccd4d9ab61a69e82d4f8a14", "name": "蔡徐坤" },
+    { "id": "f7561ff309bd4040a59f1e600f4f4338", "name": "黑手" },
+    { "id": "332941d1360c48949f1b4e0cabf912cd", "name": "丁真（锐刻五代版）" },
+    { "id": "1aacaeb1b840436391b835fd5513f4c4", "name": "芙宁娜" },
+    { "id": "3b55b3d84d2f453a98d8ca9bb24182d6", "name": "邓紫琪" },
+    { "id": "7af4d620be1c4c6686132f21940d51c5", "name": "东雪莲" },
+    { "id": "e1cfccf59a1c4492b5f51c7c62a8abd2", "name": "永雏塔菲" },
+    { "id": "665e031efe27435780ebfa56cc7e0e0d", "name": "月半猫" },
+    { "id": "aebaa2305aa2452fbdc8f41eec852a79", "name": "雷军" },
+    { "id": "7c66db6e457c4d53b1fe428a8c547953", "name": "郭德纲" },
+    { "id": "99503144194c45ed8fb998ceac181dcc", "name": "贝利亚" },
+    { "id": "4462fa28f3824bff808a94a6075570e5", "name": "雷军" },
+    { "id": "188c9b7c06654042be0e8a25781761e8", "name": "周杰伦" },
+    { "id": "6ce7ea8ada884bf3889fa7c7fb206691", "name": "御女茉莉" }
+]
+SPEEKERS = {
+    "fishaudio":FISHAUDIO_SPEEKER,
+    "azure":[
+        {"id":"zh-CN-YunxiNeural","name":"云希"},
+        {"id":"zh-CN-YunzeNeural","name":"云哲"},
+        {"id":"zh-CN-YunxuanNeural","name":"晓萱"},
+    ]
+}

fishaudio.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from fish_audio_sdk import Session, TTSRequest, ReferenceAudio
+from pydub import AudioSegment
+import io
+from constants import FISHAUDIO_KEY,FISHAUDIO_SPEEKER
+import random
+def get_adapter_speeker_id(speaker_name):
+    speeker = FISHAUDIO_SPEEKER[0]
+    if speaker_name != "主持人":
+        speeker = random.choice(FISHAUDIO_SPEEKER)
+    return speeker["id"]
+def fishaudio_tts(text, reference_id=None) -> AudioSegment:
+    """
+    将给定的文本转换为语音并返回AudioSegment对象。
+    :param text: 要转换的文本
+    :param reference_id: 可选参数，使用的模型 ID
+    :return: 返回生成的语音的AudioSegment对象
+    """
+    session = Session(FISHAUDIO_KEY)
+    audio_buffer = io.BytesIO()
+    for chunk in session.tts(TTSRequest(
+        reference_id=reference_id,
+        text=text
+    )):
+        audio_buffer.write(chunk)
+    audio_buffer.seek(0)  # 重置缓冲区的位置
+    return AudioSegment.from_file(audio_buffer, format="mp3")

main.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import asyncio
+import os
+from fastapi.responses import JSONResponse
+from constants import AUDIO_CACHE_DIR
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from api.main import api_router
+app = FastAPI()
+os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
+app.mount("/audio", StaticFiles(directory=AUDIO_CACHE_DIR), name="audio")
+# 添加CORS中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.include_router(api_router, prefix="/api/v1")
+@app.middleware("http")
+async def add_process_time_header(request, call_next):
+    try:
+        response = await asyncio.wait_for(call_next(request), timeout=2400) # 4分钟超时
+        return response
+    except asyncio.TimeoutError:
+        return JSONResponse(
+            status_code=504,
+            content={"detail": "Request processing time exceeded the limit."}
+        )

prompts.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""
+prompts.py
+"""
+SYSTEM_PROMPT = """
+你是一位世界级的播客制作人，任务是将提供的输入文本转化为引人入胜且内容丰富的播客脚本。输入内容可能是非结构化或杂乱的，来源于PDF或网页。你的目标是提取最有趣、最有洞察力的内容，形成一场引人入胜的播客讨论。
+操作步骤：
+	1.	分析输入：
+仔细检查文本，识别出关键主题、要点，以及能推动播客对话的有趣事实或轶事。忽略无关的信息或格式问题。
+	2.	编写对话：
+发展主持人与嘉宾（作者或该主题的专家）之间自然的对话流程，包含：
+	•	来自头脑风暴的最佳创意
+	•	对复杂话题的清晰解释
+	•	引人入胜的、活泼的语气以吸引听众
+	•	信息与娱乐的平衡
+对话规则：
+	•	主持人始终发起对话并采访嘉宾
+	•	包含主持人引导讨论的深思熟虑的问题
+	•	融入自然的口语模式，包括偶尔的语气词（如“嗯”，“好吧”，“你知道”）
+	•	允许主持人和嘉宾之间的自然打断和互动
+	•	嘉宾的回答必须基于输入文本，避免不支持的说法
+	•	保持PG级别的对话，适合所有观众
+	•	避免嘉宾的营销或自我推销内容
+	•	主持人结束对话
+	3.	总结关键见解：
+在对话的结尾，自然地融入关键点总结。这应像是随意对话，而不是正式的回顾，强化主要的收获，然后结束。
+	4.	保持真实性：
+在整个脚本中，努力保持对话的真实性，包含：
+	•	主持人表达出真实的好奇或惊讶时刻
+	•	嘉宾在表达复杂想法时可能短暂地有些卡顿
+	•	适当时加入轻松的时刻或幽默
+	•	简短的个人轶事或与主题相关的例子（以输入文本为基础）
+	5.	考虑节奏与结构：
+确保对话有自然的起伏：
+	•	以强有力的引子吸引听众的注意力
+	•	随着对话进行，逐渐增加复杂性
+	•	包含短暂的“喘息”时刻，让听众消化复杂信息
+	•	以有力的方式收尾，或许以发人深省的问题或对听众的号召结束
+重要规则：每句对话不应超过100个字符（例如，可以在5-8秒内完成）。
+示例格式:
+**Host**: 欢迎来到节目！今天我们讨论的是[话题]。我们的嘉宾是[嘉宾姓名].
+**[Guest Name]**: 谢谢邀请，Jane。我很高兴分享我对[话题]的见解.
+记住，在整个对话中保持这种格式。
+"""
+QUESTION_MODIFIER = "请回答这个问题:"
+TONE_MODIFIER = "语气： 播客的语气应该是"
+LANGUAGE_MODIFIER = "输出的语言<重要>：播客的语言应该是"
+LENGTH_MODIFIERS = {
+    "short": "保持播客的简短, 大约 1-2 分钟.",
+    "medium": "中等长度, 大约 3-5 分钟.",
+}
+SUMMARY_INFO_PROMPT = """
+根据以下输入内容，生成一个播客梗概，使用 markdown 格式，遵循以下具体指南：
+	•	提供播客内容的概述（200-300字）。
+	•	突出3个关键点或收获。
+"""
+PODCAST_INFO_PROMPT = """
+根据以下输入内容，生成一个吸引人的标题和一个富有创意的主持人名字。请遵循以下具体指南：
+	1.	标题：
+	•	创建一个引人入胜且简洁的标题，准确反映播客内容。
+	2.	主持人名字：
+	•	为播客主持人创造一个有创意且易记的名字。
+请以以下JSON格式提供输出：
+{
+    "title": "An engaging and relevant podcast title",
+    "host_name": "A creative name for the host"
+}
+确保你的回复是一个有效的 JSON 对象，且不包含其他内容。
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+annotated-types==0.7.0
+anyio==4.6.0
+click==8.1.7
+fastapi==0.115.0
+h11==0.14.0
+idna==3.10
+pydantic==2.7.0
+pydantic_core==2.18.1
+sniffio==1.3.1
+starlette==0.38.6
+typing_extensions==4.12.2
+uvicorn==0.31.1
+openai==1.50.2
+pydub==0.25.1
+loguru==0.7.2
+suno-bark @ git+https://github.com/suno-ai/bark.git@f4f32d4cd480dfec1c245d258174bc9bde3c2148
+numpy==2.1.1
+python-multipart==0.0.12
+PyPDF2==3.0.1
+azure-cognitiveservices-speech==1.41.1
+fish_audio_sdk

schema.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+schema.py
+"""
+from typing import Literal, List
+from pydantic import BaseModel, Field
+class Summary(BaseModel):
+    """Summary."""
+    summary: str
+class PodcastInfo(BaseModel):
+    """Summary."""
+    title: str
+    host_name: str
+class DialogueItem(BaseModel):
+    """A single dialogue item."""
+    speaker: Literal["Host (Jane)", "Guest"]
+    text: str
+class ShortDialogue(BaseModel):
+    """The dialogue between the host and guest."""
+    name_of_guest: str
+    dialogue: List[DialogueItem] = Field(
+        ..., description="A list of dialogue items, typically between 11 to 17 items"
+    )

utils.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import asyncio
+import glob
+import io
+import os
+import re
+import time
+import hashlib
+from typing import Any, Dict, Generator
+import uuid
+from openai import OpenAI
+import requests
+from fishaudio import fishaudio_tts
+from prompts import LANGUAGE_MODIFIER, LENGTH_MODIFIERS, PODCAST_INFO_PROMPT, QUESTION_MODIFIER, SUMMARY_INFO_PROMPT, SYSTEM_PROMPT, TONE_MODIFIER
+import json
+from pydub import AudioSegment
+from fastapi import UploadFile
+from PyPDF2 import PdfReader
+from schema import PodcastInfo, ShortDialogue, Summary
+from constants import (
+    AUDIO_CACHE_DIR,
+    FIREWORKS_API_KEY,
+    FIREWORKS_BASE_URL,
+    FIREWORKS_MODEL_ID,
+    FIREWORKS_MAX_TOKENS,
+    FIREWORKS_TEMPERATURE,
+    GRADIO_CLEAR_CACHE_OLDER_THAN,
+    JINA_KEY,
+    SPEECH_KEY,
+    SPEECH_REGION,
+)
+import azure.cognitiveservices.speech as speechsdk
+fw_client = OpenAI(base_url=FIREWORKS_BASE_URL, api_key=FIREWORKS_API_KEY)
+def generate_dialogue(pdfFile, textInput, tone, duration, language) -> Generator[str, None, None]:
+    modified_system_prompt = get_prompt(pdfFile, textInput, tone, duration, language)
+    if (modified_system_prompt == False):
+        yield json.dumps({
+            "type": "error",
+            "content": "Prompt is too long"
+        }) + "\n"
+        return
+    full_response = ""
+    llm_stream = call_llm_stream(SYSTEM_PROMPT, modified_system_prompt, ShortDialogue, isJSON=False)
+    for chunk in llm_stream:
+        yield json.dumps({"type": "chunk", "content": chunk}) + "\n"
+        full_response += chunk
+    yield json.dumps({"type": "final", "content": full_response})
+async def process_line(line, voice,provider):
+    if provider == 'fishaudio':
+        return await generate_podcast_audio(line['content'], voice)
+    return await generate_podcast_audio_by_azure(line['content'], voice)
+async def generate_podcast_audio_by_azure(text: str, voice: str) -> str:
+    try:
+        speech_config = speechsdk.SpeechConfig(subscription=SPEECH_KEY, region=SPEECH_REGION)
+        speech_config.speech_synthesis_voice_name = voice
+        synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)
+        future =await asyncio.to_thread(synthesizer.speak_text_async, text)
+        result = await asyncio.to_thread(future.get)
+        print("Speech synthesis completed")
+        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
+            print("Audio synthesized successfully")
+            audio_data = result.audio_data
+            audio_segment = AudioSegment.from_wav(io.BytesIO(audio_data))
+            return audio_segment
+        else:
+            print(f"Speech synthesis failed: {result.reason}")
+            if hasattr(result, 'cancellation_details'):
+                print(f"Cancellation details: {result.cancellation_details.reason}")
+                print(f"Cancellation error details: {result.cancellation_details.error_details}")
+            return None
+    except Exception as e:
+        print(f"Error in generate_podcast_audio: {e}")
+        raise
+async def generate_podcast_audio(text: str, voice: str) -> str:
+    return await generate_podcast_audio_by_fish(text,voice)
+async def generate_podcast_audio_by_fish(text: str, voice: str) -> str:
+    try:
+        return fishaudio_tts(text=text,reference_id=voice)
+    except Exception as e:
+        print(f"Error in generate_podcast_audio: {e}")
+        raise
+async def process_lines_with_limit(lines, provider , host_voice, guest_voice, max_concurrency):
+    semaphore = asyncio.Semaphore(max_concurrency)
+    async def limited_process_line(line):
+        async with semaphore:
+            voice = host_voice if (line['speaker'] == '主持人' or line['speaker'] == 'Host') else guest_voice
+            return await process_line(line, voice , provider)
+    tasks = [limited_process_line(line) for line in lines]
+    results = await asyncio.gather(*tasks)
+    return results
+async def combine_audio(task_status: Dict[str, Dict], task_id: str, text: str, language: str , provider:str,host_voice: str , guest_voice:str) -> Generator[str, None, None]:
+    try:
+        dialogue_regex = r'\*\*([\s\S]*?)\*\*[:：]\s*([\s\S]*?)(?=\*\*|$)'
+        matches = re.findall(dialogue_regex, text, re.DOTALL)
+        lines = [
+        {
+            "speaker": match[0],
+            "content": match[1].strip(),
+        }
+        for match in matches
+        ]
+        print("Starting audio generation")
+        # audio_segments = await asyncio.gather(
+        #     *[process_line(line, host_voice if line['speaker'] == '主持人' else guest_voice) for line in lines]
+        # )
+        audio_segments = await process_lines_with_limit(lines,provider, host_voice, guest_voice, 10 if provider=='azure' else 5)
+        print("Audio generation completed")
+        # 合并音频
+        combined_audio = await asyncio.to_thread(sum, audio_segments)
+        print("Audio combined")
+        # 只在最后写入文件
+        unique_filename = f"{uuid.uuid4()}.mp3"
+        os.makedirs(AUDIO_CACHE_DIR, exist_ok=True)
+        file_path = os.path.join(AUDIO_CACHE_DIR, unique_filename)
+        # 异步导出音频文件
+        await asyncio.to_thread(combined_audio.export, file_path, format="mp3")
+        audio_url = f"/audio/{unique_filename}"
+        task_status[task_id] = {"status": "completed", "audio_url": audio_url}
+        for file in glob.glob(f"{AUDIO_CACHE_DIR}*.mp3"):
+            if (
+                os.path.isfile(file)
+                and time.time() - os.path.getmtime(file) > GRADIO_CLEAR_CACHE_OLDER_THAN
+            ):
+                os.remove, file
+        clear_pdf_cache()
+        return audio_url
+    except Exception as e:
+        # 如果发生错误，更新状态为失败
+        task_status[task_id] = {"status": "failed", "error": str(e)}
+def generate_podcast_summary(pdf_content: str, text: str, tone: str, length: str, language: str) -> Generator[str, None, None]:
+    modified_system_prompt = get_prompt(pdf_content, text, '', '', '')
+    if (modified_system_prompt == False):
+        yield json.dumps({
+            "type": "error",
+            "content": "Prompt is too long"
+        }) + "\n"
+        return
+    stream = call_llm_stream(SUMMARY_INFO_PROMPT, modified_system_prompt, Summary, False)
+    full_response = ""
+    for chunk in stream:
+        # 将每个 chunk 作为 JSON 字符串 yield
+        yield json.dumps({"type": "chunk", "content": chunk}) + "\n"
+    yield json.dumps({"type": "final", "content": full_response})
+def generate_podcast_info(pdfContent: str, text: str, tone: str, length: str, language: str) -> Generator[str, None, None]:
+    modified_system_prompt = get_prompt(pdfContent, text, '', '', '')
+    if (modified_system_prompt == False):
+        yield json.dumps({
+            "type": "error",
+            "content": "Prompt is too long"
+        }) + "\n"
+        return
+    full_response = ""
+    for chunk in call_llm_stream(PODCAST_INFO_PROMPT, modified_system_prompt, PodcastInfo):
+        full_response += chunk
+    try:
+        result = json.loads(full_response)
+        yield json.dumps({
+            "type": "podcast_info",
+            "content": result
+        }) + "\n"
+    except Exception as e:
+        yield json.dumps({
+            "type": "error",
+            "content": f"An unexpected error occurred: {str(e)}"
+        }) + "\n"
+def call_llm_stream(system_prompt: str, text: str, dialogue_format: Any, isJSON: bool = True) -> Generator[str, None, None]:
+    """Call the LLM with the given prompt and dialogue format, returning a stream of responses."""
+    request_params = {
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": text},
+        ],
+        "model": FIREWORKS_MODEL_ID,
+        "max_tokens": FIREWORKS_MAX_TOKENS,
+        "temperature": FIREWORKS_TEMPERATURE,
+        "stream": True  # 启用流式输出
+    }
+    # 如果需要 JSON 响应，添加 response_format 参数
+    if isJSON:
+        request_params["response_format"] = {
+            "type": "json_object",
+            "schema": dialogue_format.model_json_schema(),
+        }
+    stream = fw_client.chat.completions.create(**request_params)
+    full_response = ""
+    for chunk in stream:
+        if chunk.choices[0].delta.content is not None:
+            content = chunk.choices[0].delta.content
+            full_response += content
+            yield content
+    # 在流结束时，尝试解析完整的 JSON 响应
+    # try:
+    #     parsed_response = json.loads(full_response)
+    #     yield json.dumps({"type": "final", "content": parsed_response})
+    # except json.JSONDecodeError:
+    #     yield json.dumps({"type": "error", "content": "Failed to parse JSON response"})
+def call_llm(system_prompt: str, text: str, dialogue_format: Any) -> Any:
+    """Call the LLM with the given prompt and dialogue format."""
+    response = fw_client.chat.completions.create(
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": text},
+        ],
+        model=FIREWORKS_MODEL_ID,
+        max_tokens=FIREWORKS_MAX_TOKENS,
+        temperature=FIREWORKS_TEMPERATURE,
+        response_format={
+            "type": "json_object",
+            "schema": dialogue_format.model_json_schema(),
+        },
+    )
+    return response
+pdf_cache = {}
+def clear_pdf_cache():
+    global pdf_cache
+    pdf_cache.clear()
+def get_link_text(url: str):
+    """ 通过jina.ai 抓取url内容 """
+    url  = f"https://r.jina.ai/{url}"
+    headers = {}
+    headers['Authorization'] = 'Bearer ' + JINA_KEY
+    headers['Accept'] = 'application/json'
+    headers['X-Return-Format'] = 'text'
+    response = requests.get(url, headers=headers)
+    return response.json()['data']
+async def get_pdf_text(pdf_file: UploadFile):
+    text = ""
+    print(pdf_file)
+    try:
+       # 读取上传文件的内容
+        contents = await pdf_file.read()
+        file_hash = hashlib.md5(contents).hexdigest()
+        if file_hash in pdf_cache:
+            return pdf_cache[file_hash]
+        # 使用 BytesIO 创建一个内存中的文件对象
+        pdf_file_obj = io.BytesIO(contents)
+        # 使用 PdfReader 读取 PDF 内容
+        pdf_reader = PdfReader(pdf_file_obj)
+        # 提取所有页面的文本
+        text = "\n\n".join([page.extract_text() for page in pdf_reader.pages])
+        # 重置文件指针，以防后续还需要读取文件
+        await pdf_file.seek(0)
+        return text
+    except Exception as e:
+        return {"error": str(e)}
+def get_prompt(pdfContent: str, text: str, tone: str, length: str, language: str):
+    modified_system_prompt = ""
+    new_text = pdfContent +text
+    if pdfContent:
+        modified_system_prompt += f"\n\n{QUESTION_MODIFIER} {new_text}"
+    if tone:
+        modified_system_prompt += f"\n\n{TONE_MODIFIER} {tone}."
+    if length:
+        modified_system_prompt += f"\n\n{LENGTH_MODIFIERS[length]}"
+    if language:
+        modified_system_prompt += f"\n\n{LANGUAGE_MODIFIER} {language}."
+    return modified_system_prompt