# import os # import ffmpeg # import whisper # import streamlit as st # from groq import Groq # # Set the app title and description with styling # st.set_page_config(page_title="Audio/Video Transcription & Summarization", page_icon="🎙️") # st.title("🎙️ Audio/Video Transcription & Summarization") # st.write("Easily upload an audio or video file to get a transcription and a quick summary.") # # Add a sidebar for settings and instructions # with st.sidebar: # st.header("Settings") # st.write("Configure app preferences here.") # enable_summary = st.checkbox("Enable Summarization", value=True) # st.info("Note: Summarization uses the Groq API.") # # Retrieve the API key from environment variables or Streamlit secrets # GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"] # os.environ["GROQ_API_KEY"] = GROQ_API_KEY # # Create a temporary directory # temp_dir = "temp" # os.makedirs(temp_dir, exist_ok=True) # # Display file uploader with improved layout and style # st.subheader("Upload Audio/Video File") # uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"]) # # Function to extract audio from video # def extract_audio(video_path, audio_path="temp/temp_audio.wav"): # """Extracts audio from video.""" # try: # # Run ffmpeg command with stderr capture for better error handling # ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) # except ffmpeg.Error as e: # st.error("Error processing file with FFmpeg: " + e.stderr.decode()) # return audio_path # # Function to transcribe audio using Whisper model # def transcribe_audio(audio_path): # """Transcribes audio to text using Whisper model.""" # model = whisper.load_model("base") # result = model.transcribe(audio_path) # return result["text"] # # Function to summarize text using Groq API # def summarize_text(text): # """Summarizes text using Groq API.""" # client = Groq(api_key=os.environ.get("GROQ_API_KEY")) # response = client.chat.completions.create( # messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], # model="llama3-8b-8192" # ) # summary = response.choices[0].message.content # return summary # # Main processing function with progress indicators # def process_media(media_file): # """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription if enabled.""" # # Save the uploaded file to a temporary path # temp_file_path = os.path.join(temp_dir, media_file.name) # with open(temp_file_path, "wb") as f: # f.write(media_file.getbuffer()) # # Determine if the file is a video or audio # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # st.info("Extracting audio from video...") # audio_path = extract_audio(temp_file_path) # else: # audio_path = temp_file_path # If already audio, use it as is # # Transcribe audio to text with progress spinner # with st.spinner("Transcribing audio..."): # transcription = transcribe_audio(audio_path) # st.success("Transcription completed!") # st.write("### Transcription:") # st.write(transcription) # # Summarize transcription if enabled # if enable_summary: # with st.spinner("Generating summary..."): # summary = summarize_text(transcription) # st.success("Summary generated!") # st.write("### Summary:") # st.write(summary) # # Cleanup temporary files # os.remove(temp_file_path) # if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): # os.remove(audio_path) # # Run the app and handle file upload state # if uploaded_file is not None: # st.info("Processing your file...") # process_media(uploaded_file) # else: # st.warning("Please upload an audio or video file to begin.") import os import ffmpeg import whisper import streamlit as st from groq import Groq # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # App title and description with styling st.markdown("
🎙️ Audio/Video Transcription & Summarization
", unsafe_allow_html=True) st.markdown("
Upload an audio or video file to get a transcription and a concise summary.
", unsafe_allow_html=True) # Sidebar for settings and instructions with st.sidebar: st.header("Settings") st.write("Customize your preferences:") enable_summary = st.checkbox("Enable Summarization", value=True) st.info("Note: Summarization uses the Groq API.") # Retrieve the API key from environment variables or Streamlit secrets GROQ_API_KEY = os.getenv("GROQ_API_KEY") or st.secrets["GROQ_API_KEY"] os.environ["GROQ_API_KEY"] = GROQ_API_KEY # Create a temporary directory temp_dir = "temp" os.makedirs(temp_dir, exist_ok=True) # Enhanced file upload area st.markdown("
", unsafe_allow_html=True) uploaded_file = st.file_uploader( label="Select an audio or video file", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"], help="Supported formats: mp4, mov, avi, mkv, wav, mp3" ) # Function to extract audio from video def extract_audio(video_path, audio_path="temp/temp_audio.wav"): try: ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True) except ffmpeg.Error as e: st.error("Error processing file with FFmpeg: " + e.stderr.decode()) return audio_path # Function to transcribe audio using Whisper model def transcribe_audio(audio_path): model = whisper.load_model("base") result = model.transcribe(audio_path) return result["text"] # Function to summarize text using Groq API def summarize_text(text): client = Groq(api_key=os.environ.get("GROQ_API_KEY")) response = client.chat.completions.create( messages=[{"role": "user", "content": f"Summarize the following text: {text}"}], model="llama3-8b-8192" ) summary = response.choices[0].message.content return summary # Main processing function with progress indicators def process_media(media_file): temp_file_path = os.path.join(temp_dir, media_file.name) with open(temp_file_path, "wb") as f: f.write(media_file.getbuffer()) # Extract audio if the file is a video if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): st.info("Extracting audio from video...") audio_path = extract_audio(temp_file_path) else: audio_path = temp_file_path # Transcribe audio with st.spinner("Transcribing audio..."): transcription = transcribe_audio(audio_path) st.success("Transcription completed!") st.write("### Transcription:") st.write(transcription) # Summarize transcription if enabled if enable_summary: with st.spinner("Generating summary..."): summary = summarize_text(transcription) st.success("Summary generated!") st.write("### Summary:") st.write(summary) # Cleanup os.remove(temp_file_path) if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')): os.remove(audio_path) if uploaded_file: st.info("Processing your file, please wait...") process_media(uploaded_file) else: st.warning("Please upload an audio or video file to begin.") # Footer with branding st.markdown(""" """, unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True)