Spaces:

Hasnain-Ali
/

CaptionFlow

Running

App Files Files Community

Hasnain-Ali commited on Feb 20

Commit

31043f6

verified ·

1 Parent(s): 832b30c

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -29

app.py CHANGED Viewed

@@ -3,49 +3,90 @@ import whisper
 import os
 import tempfile
 import subprocess
-import srt
 import datetime
-# Load Whisper model (using cache for performance)
 @st.cache_resource
 def load_model():
-    return whisper.load_model("small")  # Use the small model for free usage
-# Transcribe video using Whisper
 def transcribe_video(video_path):
     model = load_model()
     result = model.transcribe(video_path)
     return result["segments"]
-# Create an SRT file from the transcription segments
-def create_srt(subtitles, srt_path):
-    subs = []
-    for i, segment in enumerate(subtitles):
-        # Convert float seconds to timedelta objects
-        start_time = datetime.timedelta(seconds=segment["start"])
-        end_time = datetime.timedelta(seconds=segment["end"])
-        subs.append(srt.Subtitle(
-            index=i + 1,
-            start=start_time,
-            end=end_time,
-            content=segment["text"]
-        ))
-    with open(srt_path, "w", encoding="utf-8") as f:
-        f.write(srt.compose(subs))
-# Burn subtitles into the video using FFmpeg
-def burn_captions(video_path, srt_path, output_path):
     command = [
         "ffmpeg",
         "-i", video_path,
-        "-vf", f"subtitles={srt_path}",
         "-c:a", "copy",
         output_path
     ]
     subprocess.run(command, check=True)
-# Streamlit app UI
-st.title("🎥 AI Video Captioning App")
 uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"])
@@ -59,15 +100,16 @@ if uploaded_file:
     if st.button("Generate Captions & Burn into Video"):
         with st.spinner("Generating captions..."):
-            captions = transcribe_video(video_path)
-        srt_path = video_path.replace(".mp4", ".srt")
-        create_srt(captions, srt_path)
         output_video_path = video_path.replace(".mp4", "_captioned.mp4")
         with st.spinner("Burning captions into video..."):
-            burn_captions(video_path, srt_path, output_video_path)
         st.success("Processing complete! Download your video below.")
         with open(output_video_path, "rb") as file:

 import os
 import tempfile
 import subprocess
 import datetime
+# Helper: Format seconds into ASS time format (H:MM:SS.cs)
+def format_ass_time(seconds):
+    td = datetime.timedelta(seconds=seconds)
+    total_seconds = int(td.total_seconds())
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    secs = total_seconds % 60
+    # Get centiseconds (2 decimal places)
+    cs = int((td.total_seconds() - total_seconds) * 100)
+    return f"{hours}:{minutes:02d}:{secs:02d}.{cs:02d}"
+# Load Whisper model (cached for performance)
 @st.cache_resource
 def load_model():
+    return whisper.load_model("small")  # Using "small" for free usage
+# Transcribe video using Whisper and return segments
 def transcribe_video(video_path):
     model = load_model()
     result = model.transcribe(video_path)
     return result["segments"]
+# Create an ASS file with karaoke-style effects.
+# Each segment is split into words; each word gets an ASS karaoke tag (\k)
+# which reveals it gradually over the segment’s duration.
+def create_ass(segments, ass_path):
+    header = """[Script Info]
+Title: AI Captioning
+ScriptType: v4.00+
+Collisions: Normal
+PlayResX: 1280
+PlayResY: 720
+Timer: 100.0000
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+Style: Default,Arial,36,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+"""
+    events = ""
+    for segment in segments:
+        text = segment.get("text", "").strip()
+        if not text:
+            continue
+        words = text.split()
+        if not words:
+            continue
+        start_time = format_ass_time(segment["start"])
+        end_time = format_ass_time(segment["end"])
+        # Calculate total duration in centiseconds
+        total_duration = segment["end"] - segment["start"]
+        total_cs = int(total_duration * 100)
+        # Determine duration per word (in centiseconds)
+        duration_per_word = max(total_cs // len(words), 1)
+        # Build karaoke text: each word preceded by its \k tag.
+        ass_text = ""
+        for word in words:
+            ass_text += r"{\k" + str(duration_per_word) + "}" + word + " "
+        ass_text = ass_text.strip()
+        dialogue_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{ass_text}\n"
+        events += dialogue_line
+    with open(ass_path, "w", encoding="utf-8") as f:
+        f.write(header + events)
+# Burn the ASS subtitles into the video using FFmpeg.
+def burn_captions(video_path, ass_path, output_path):
     command = [
         "ffmpeg",
         "-i", video_path,
+        "-vf", f"subtitles={ass_path}",
         "-c:a", "copy",
         output_path
     ]
     subprocess.run(command, check=True)
+# Streamlit UI
+st.title("🎥 AI Video Captioning App with Karaoke-Style Captions")
 uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"])
     if st.button("Generate Captions & Burn into Video"):
         with st.spinner("Generating captions..."):
+            segments = transcribe_video(video_path)
+        # Create ASS subtitle file
+        ass_path = video_path.replace(".mp4", ".ass")
+        create_ass(segments, ass_path)
         output_video_path = video_path.replace(".mp4", "_captioned.mp4")
         with st.spinner("Burning captions into video..."):
+            burn_captions(video_path, ass_path, output_video_path)
         st.success("Processing complete! Download your video below.")
         with open(output_video_path, "rb") as file: