Spaces:

Hasnain-Ali
/

CaptionFlow

Sleeping

App Files Files Community

CaptionFlow / app.py

Hasnain-Ali

Update app.py

6ec0137 verified 7 months ago

raw

history blame contribute delete

4.21 kB

	import streamlit as st
	import whisper
	import os
	import tempfile
	import subprocess
	import datetime

	# Helper: Format seconds into ASS time format (H:MM:SS.cs)
	def format_ass_time(seconds):
	td = datetime.timedelta(seconds=seconds)
	total_seconds = int(td.total_seconds())
	hours = total_seconds // 3600
	minutes = (total_seconds % 3600) // 60
	secs = total_seconds % 60
	# Get centiseconds (2 decimal places)
	cs = int((td.total_seconds() - total_seconds) * 100)
	return f"{hours}:{minutes:02d}:{secs:02d}.{cs:02d}"

	# Load Whisper model (cached for performance)
	@st.cache_resource
	def load_model():
	return whisper.load_model("small") # Using "small" for free usage

	# Transcribe video using Whisper and return segments
	def transcribe_video(video_path):
	model = load_model()
	result = model.transcribe(video_path)
	return result["segments"]

	# Create an ASS file with karaoke-style effects.
	# Each segment is split into words; each word gets an ASS karaoke tag (\k)
	# which gradually reveals it. The unrevealed text is made fully transparent.
	def create_ass(segments, ass_path):
	header = """[Script Info]
	Title: AI Captioning
	ScriptType: v4.00+
	Collisions: Normal
	PlayResX: 1280
	PlayResY: 720
	Timer: 100.0000

	[V4+ Styles]
	; Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
	Style: Default,Arial,36,&H00FFFFFF,&HFF000000,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1

	[Events]
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	"""
	events = ""
	for segment in segments:
	text = segment.get("text", "").strip()
	if not text:
	continue
	words = text.split()
	if not words:
	continue
	start_time = format_ass_time(segment["start"])
	end_time = format_ass_time(segment["end"])
	# Calculate total duration in centiseconds
	total_duration = segment["end"] - segment["start"]
	total_cs = int(total_duration * 100)
	# Determine duration per word (in centiseconds)
	duration_per_word = max(total_cs // len(words), 1)

	# Build karaoke text: each word preceded by its \k tag.
	ass_text = ""
	for word in words:
	ass_text += r"{\k" + str(duration_per_word) + "}" + word + " "
	ass_text = ass_text.strip()

	dialogue_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{ass_text}\n"
	events += dialogue_line

	with open(ass_path, "w", encoding="utf-8") as f:
	f.write(header + events)

	# Burn the ASS subtitles into the video using FFmpeg.
	def burn_captions(video_path, ass_path, output_path):
	command = [
	"ffmpeg",
	"-i", video_path,
	"-vf", f"subtitles={ass_path}",
	"-c:a", "copy",
	output_path
	]
	subprocess.run(command, check=True)

	# Streamlit UI
	st.title("🎥 AI Video Captioning App with Karaoke-Style Captions")

	uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"])

	if uploaded_file:
	# Save the uploaded video to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
	temp_video.write(uploaded_file.read())
	video_path = temp_video.name

	st.video(video_path)

	if st.button("Generate Captions & Burn into Video"):
	with st.spinner("Generating captions..."):
	segments = transcribe_video(video_path)

	# Create ASS subtitle file
	ass_path = video_path.replace(".mp4", ".ass")
	create_ass(segments, ass_path)

	output_video_path = video_path.replace(".mp4", "_captioned.mp4")

	with st.spinner("Burning captions into video..."):
	burn_captions(video_path, ass_path, output_video_path)

	st.success("Processing complete! Download your video below.")
	with open(output_video_path, "rb") as file:
	st.download_button("📥 Download Captioned Video", file, file_name="captioned_video.mp4", mime="video/mp4")