Spaces:

abdullahzunorain
/

Audio-or-Video-Transcription-and-Summarization-Application

Running

App Files Files Community

abdullahzunorain commited on Nov 2, 2024

Commit

bf8ea22

·

verified ·

1 Parent(s): 8caf173

Create app.py

Files changed (1) hide show

app.py +80 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import os
+import ffmpeg
+import whisper
+import streamlit as st
+from groq import Groq
+# Set the title and description of the app
+st.title("Audio/Video Transcription and Summarization")
+st.write("Upload your audio or video file, and this app will transcribe the audio and provide a summary of the transcription.")
+# Get the API key from user input (You may want to use Streamlit secrets management)
+GROQ_API_KEY = st.text_input("Enter your Groq API Key:")
+os.environ["GROQ_API_KEY"] = GROQ_API_KEY
+# Upload the audio or video file
+uploaded_file = st.file_uploader("Choose an audio or video file...", type=["mp4", "mov", "avi", "mkv", "wav", "mp3"])
+# Function to extract audio from video
+def extract_audio(video_path, audio_path="temp_audio.wav"):
+    """Extracts audio from video."""
+    try:
+        # Run ffmpeg command with stderr capture for better error handling
+        ffmpeg.input(video_path).output(audio_path).run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
+    except ffmpeg.Error as e:
+        st.error("FFmpeg error encountered: " + e.stderr.decode())
+    return audio_path
+# Function to transcribe audio to text using Whisper model
+def transcribe_audio(audio_path):
+    """Transcribes audio to text using Whisper model."""
+    model = whisper.load_model("base")  # Load the Whisper model
+    result = model.transcribe(audio_path)
+    return result["text"]
+# Function to summarize text using Groq API
+def summarize_text(text):
+    """Summarizes text using Groq API."""
+    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+    response = client.chat.completions.create(
+        messages=[{"role": "user", "content": f"Summarize the following text: {text}"}],
+        model="llama3-8b-8192"
+    )
+    summary = response.choices[0].message.content
+    return summary
+# Complete function to process audio or video
+def process_media(media_file):
+    """Processes audio or video: extracts audio, transcribes it, and summarizes the transcription."""
+    # Save the uploaded file to a temporary path
+    temp_file_path = f"temp/{media_file.name}"
+    with open(temp_file_path, "wb") as f:
+        f.write(media_file.getbuffer())
+    # Determine if the file is a video or audio based on the file extension
+    if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
+        # Step 1: Extract audio from video
+        audio_path = extract_audio(temp_file_path)
+    else:
+        audio_path = temp_file_path  # If it's already audio, use it as is
+    # Step 2: Transcribe audio to text
+    transcription = transcribe_audio(audio_path)
+    st.write("### Transcription:")
+    st.write(transcription)
+    # Step 3: Summarize transcription
+    summary = summarize_text(transcription)
+    st.write("### Summary:")
+    st.write(summary)
+    # Clean up temporary files if needed
+    os.remove(temp_file_path)
+    if media_file.name.endswith(('.mp4', '.mov', '.avi', '.mkv')):
+        os.remove(audio_path)
+# Run the app
+if uploaded_file is not None and GROQ_API_KEY:
+    process_media(uploaded_file)
+else:
+    st.warning("Please upload a file and enter your Groq API key.")