Spaces:
Running
Running
import streamlit as st | |
import whisper | |
import os | |
import tempfile | |
import subprocess | |
import datetime | |
# Helper: Format seconds into ASS time format (H:MM:SS.cs) | |
def format_ass_time(seconds): | |
td = datetime.timedelta(seconds=seconds) | |
total_seconds = int(td.total_seconds()) | |
hours = total_seconds // 3600 | |
minutes = (total_seconds % 3600) // 60 | |
secs = total_seconds % 60 | |
# Get centiseconds (2 decimal places) | |
cs = int((td.total_seconds() - total_seconds) * 100) | |
return f"{hours}:{minutes:02d}:{secs:02d}.{cs:02d}" | |
# Load Whisper model (cached for performance) | |
def load_model(): | |
return whisper.load_model("small") # Using "small" for free usage | |
# Transcribe video using Whisper and return segments | |
def transcribe_video(video_path): | |
model = load_model() | |
result = model.transcribe(video_path) | |
return result["segments"] | |
# Create an ASS file with karaoke-style effects. | |
# Each segment is split into words; each word gets an ASS karaoke tag (\k) | |
# which gradually reveals it. The unrevealed text is made fully transparent. | |
def create_ass(segments, ass_path): | |
header = """[Script Info] | |
Title: AI Captioning | |
ScriptType: v4.00+ | |
Collisions: Normal | |
PlayResX: 1280 | |
PlayResY: 720 | |
Timer: 100.0000 | |
[V4+ Styles] | |
; Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding | |
Style: Default,Arial,36,&H00FFFFFF,&HFF000000,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1 | |
[Events] | |
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |
""" | |
events = "" | |
for segment in segments: | |
text = segment.get("text", "").strip() | |
if not text: | |
continue | |
words = text.split() | |
if not words: | |
continue | |
start_time = format_ass_time(segment["start"]) | |
end_time = format_ass_time(segment["end"]) | |
# Calculate total duration in centiseconds | |
total_duration = segment["end"] - segment["start"] | |
total_cs = int(total_duration * 100) | |
# Determine duration per word (in centiseconds) | |
duration_per_word = max(total_cs // len(words), 1) | |
# Build karaoke text: each word preceded by its \k tag. | |
ass_text = "" | |
for word in words: | |
ass_text += r"{\k" + str(duration_per_word) + "}" + word + " " | |
ass_text = ass_text.strip() | |
dialogue_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{ass_text}\n" | |
events += dialogue_line | |
with open(ass_path, "w", encoding="utf-8") as f: | |
f.write(header + events) | |
# Burn the ASS subtitles into the video using FFmpeg. | |
def burn_captions(video_path, ass_path, output_path): | |
command = [ | |
"ffmpeg", | |
"-i", video_path, | |
"-vf", f"subtitles={ass_path}", | |
"-c:a", "copy", | |
output_path | |
] | |
subprocess.run(command, check=True) | |
# Streamlit UI | |
st.title("π₯ AI Video Captioning App with Karaoke-Style Captions") | |
uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"]) | |
if uploaded_file: | |
# Save the uploaded video to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video: | |
temp_video.write(uploaded_file.read()) | |
video_path = temp_video.name | |
st.video(video_path) | |
if st.button("Generate Captions & Burn into Video"): | |
with st.spinner("Generating captions..."): | |
segments = transcribe_video(video_path) | |
# Create ASS subtitle file | |
ass_path = video_path.replace(".mp4", ".ass") | |
create_ass(segments, ass_path) | |
output_video_path = video_path.replace(".mp4", "_captioned.mp4") | |
with st.spinner("Burning captions into video..."): | |
burn_captions(video_path, ass_path, output_video_path) | |
st.success("Processing complete! Download your video below.") | |
with open(output_video_path, "rb") as file: | |
st.download_button("π₯ Download Captioned Video", file, file_name="captioned_video.mp4", mime="video/mp4") | |