import requests import json import os import xml.etree.ElementTree as ET import openai import edge_tts from pydub import AudioSegment import re import time import asyncio import streamlit as st from datetime import datetime from dotenv import load_dotenv load_dotenv() PODCAST_FILE = "merged_audio.mp3" SHOW_NOTES_FILE = "show_notes.txt" CONVERSATION_FILE = "conversation.json" LAST_RUN_FILE = "last_run.txt" feed_url = "http://papers.takara.ai/api/feed" response = requests.get(feed_url) tree = ET.ElementTree(ET.fromstring(response.text)) daily_feed = "" items = [] for item in tree.iter("item"): title = item.find("title").text link = item.find("link").text description = item.find("description").text items.append({"title": title, "link": link, "description": description}) daily_feed += f"Title: {title.strip()}\nDescription: {description}\n\n" client = openai.Client( api_key=os.getenv("DEEPINFRA_API"), base_url="https://api.deepinfra.com/v1/openai", ) def build_prompt(text, items): """Generates a well-balanced podcast conversation covering all research papers, ensuring correct JSON format.""" paper_summaries = "\n".join( [f"- {item['title']} ({item['link']}): {item['description']}" for item in items] ) template = """ { "conversation": [ {"speaker": "Brian", "text": ""}, {"speaker": "Jenny", "text": ""} ] } """ return ( f"πŸŽ™οΈ Welcome to Daily Papers! Today, we're diving into the latest AI research in an engaging and " f"informative discussion. The goal is to make it a **medium-length podcast** that’s **engaging, natural, and insightful** while covering " f"the key points of each paper.\n\n" f"Here are today's research papers:\n{paper_summaries}\n\n" f"Convert this into a **conversational podcast-style discussion** between two experts, Brian and Jenny. " f"Ensure the conversation flows naturally, using a mix of **insightful analysis, casual phrasing, and occasional filler words** like 'uhm' and 'you know' " f"to keep it realistic. The tone should be engaging yet professional, making it interesting for the audience.\n\n" f"Each research paper should be **discussed meaningfully**, but avoid dragging the conversation too long. " f"Focus on key insights and practical takeaways. Keep the pacing dynamic and interactive.\n\n" f"Please return the conversation in **this exact JSON format**:\n{template}" ) def extract_conversation(text, items, max_retries=3): """Extracts podcast conversation from OpenAI API with retries.""" for attempt in range(1, max_retries + 1): try: print(f"Attempt {attempt} to generate conversation...") chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": build_prompt(text, items)}], model="meta-llama/Llama-3.3-70B-Instruct-Turbo", temperature=0.7, max_tokens=4096, ) pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}" json_match = re.search(pattern, chat_completion.choices[0].message.content) if json_match: return json.loads(json_match.group()) raise ValueError("No valid JSON found in response") except Exception as e: print(f"Error: {e}") if attempt < max_retries: time.sleep(2) else: raise RuntimeError(f"Failed after {max_retries} attempts.") def generate_show_notes(items): """Creates structured show notes summarizing research papers.""" notes = "**Show Notes**\n\nIn today's episode:\n\n" for i, item in enumerate(items): notes += f"{i+1}. **{item['title']}**\n - {item['description']}\n - [Read More]({item['link']})\n\n" return notes async def generate_audio_parallel(conversation): """Generates audio for the podcast in parallel.""" tasks = [] audio_files = [] for i, item in enumerate(conversation["conversation"]): text = item["text"] output_file = f"audio_{i}.mp3" voice = "en-GB-RyanNeural" if item["speaker"] == "Brian" else "en-US-AvaMultilingualNeural" tasks.append(edge_tts.Communicate(text=text, voice=voice).save(output_file)) audio_files.append(output_file) await asyncio.gather(*tasks) return audio_files def merge_audio_files(audio_files, output_file="merged_audio.mp3"): """Merges multiple audio files into one MP3 file.""" combined = AudioSegment.empty() for file in audio_files: audio = AudioSegment.from_file(file) combined += audio combined.export(output_file, format="mp3") print(f"Merged audio saved to {output_file}") def save_to_file(content, filename): """Saves content to a file.""" with open(filename, "w") as file: file.write(content) def load_conversation(): """Loads conversation from file if exists.""" if os.path.exists(CONVERSATION_FILE): with open(CONVERSATION_FILE, "r", encoding="utf-8") as file: return json.load(file) return None async def generate_podcast(): """Generates podcast content (once per day or on force generate).""" if os.path.exists(LAST_RUN_FILE): with open(LAST_RUN_FILE, "r") as file: last_run_date = file.read().strip() if last_run_date == datetime.today().strftime("%Y-%m-%d"): print("Podcast already generated today. Skipping...") return conversation = extract_conversation(daily_feed, items) save_to_file(json.dumps(conversation, indent=2), CONVERSATION_FILE) save_to_file(generate_show_notes(items), SHOW_NOTES_FILE) print("Generating audio...") audio_files = await generate_audio_parallel(conversation) merge_audio_files(audio_files) for file in audio_files: os.remove(file) with open(LAST_RUN_FILE, "w") as file: file.write(datetime.today().strftime("%Y-%m-%d")) print("Podcast and show notes generated successfully.") asyncio.run(generate_podcast()) st.set_page_config(page_title="Daily Papers Podcast", page_icon="πŸŽ™οΈ", layout="wide") st.title("πŸŽ™οΈ Today's Daily Papers Podcast") st.subheader("Your Daily AI Research Insights - Engaging & Informative") col1, col2 = st.columns([0.2, 0.8]) with col1: st.image("Logo.png", width=120) # Ensure logo.png is in the working directory with col2: st.markdown( """ **Powered by:** πŸ”— [HF Daily Papers Feeds](https://github.com/404missinglink/HF-Daily-Papers-Feeds) πŸ”— [TLDR Takara AI](https://tldr.takara.ai/) πŸ”— [Takara AI Papers Feed](http://papers.takara.ai/api/feed) """ ) # Styling Divider st.markdown("---") conversation_data = load_conversation() show_notes = "**No show notes available.**" if os.path.exists(SHOW_NOTES_FILE): with open(SHOW_NOTES_FILE, "r", encoding="utf-8") as f: show_notes = f.read() col1, col2 = st.columns([2, 1]) with col1: st.subheader("🎧 Listen to the Podcast") if os.path.exists(PODCAST_FILE): audio_bytes = open(PODCAST_FILE, "rb").read() st.audio(audio_bytes, format="audio/mp3") else: st.warning("No podcast available. Please generate an episode.") st.subheader("πŸ“œ Show Notes") st.markdown(show_notes) with col2: st.subheader("πŸ—¨οΈ Podcast Conversation") if conversation_data: for msg in conversation_data["conversation"]: st.write(f"**{msg['speaker']}**: {msg['text']}") else: st.warning("No conversation data available.") st.markdown("---") if st.button("πŸ”„ Force Generate Podcast"): asyncio.run(generate_podcast()) st.rerun() st.markdown("πŸ“’ **Stay tuned for more AI research insights!**")