Hasnain-Ali commited on
Commit
31043f6
·
verified ·
1 Parent(s): 832b30c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -29
app.py CHANGED
@@ -3,49 +3,90 @@ import whisper
3
  import os
4
  import tempfile
5
  import subprocess
6
- import srt
7
  import datetime
8
 
9
- # Load Whisper model (using cache for performance)
 
 
 
 
 
 
 
 
 
 
 
10
  @st.cache_resource
11
  def load_model():
12
- return whisper.load_model("small") # Use the small model for free usage
13
 
14
- # Transcribe video using Whisper
15
  def transcribe_video(video_path):
16
  model = load_model()
17
  result = model.transcribe(video_path)
18
  return result["segments"]
19
 
20
- # Create an SRT file from the transcription segments
21
- def create_srt(subtitles, srt_path):
22
- subs = []
23
- for i, segment in enumerate(subtitles):
24
- # Convert float seconds to timedelta objects
25
- start_time = datetime.timedelta(seconds=segment["start"])
26
- end_time = datetime.timedelta(seconds=segment["end"])
27
- subs.append(srt.Subtitle(
28
- index=i + 1,
29
- start=start_time,
30
- end=end_time,
31
- content=segment["text"]
32
- ))
33
- with open(srt_path, "w", encoding="utf-8") as f:
34
- f.write(srt.compose(subs))
35
-
36
- # Burn subtitles into the video using FFmpeg
37
- def burn_captions(video_path, srt_path, output_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  command = [
39
  "ffmpeg",
40
  "-i", video_path,
41
- "-vf", f"subtitles={srt_path}",
42
  "-c:a", "copy",
43
  output_path
44
  ]
45
  subprocess.run(command, check=True)
46
 
47
- # Streamlit app UI
48
- st.title("🎥 AI Video Captioning App")
49
 
50
  uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"])
51
 
@@ -59,15 +100,16 @@ if uploaded_file:
59
 
60
  if st.button("Generate Captions & Burn into Video"):
61
  with st.spinner("Generating captions..."):
62
- captions = transcribe_video(video_path)
63
 
64
- srt_path = video_path.replace(".mp4", ".srt")
65
- create_srt(captions, srt_path)
 
66
 
67
  output_video_path = video_path.replace(".mp4", "_captioned.mp4")
68
 
69
  with st.spinner("Burning captions into video..."):
70
- burn_captions(video_path, srt_path, output_video_path)
71
 
72
  st.success("Processing complete! Download your video below.")
73
  with open(output_video_path, "rb") as file:
 
3
  import os
4
  import tempfile
5
  import subprocess
 
6
  import datetime
7
 
8
+ # Helper: Format seconds into ASS time format (H:MM:SS.cs)
9
+ def format_ass_time(seconds):
10
+ td = datetime.timedelta(seconds=seconds)
11
+ total_seconds = int(td.total_seconds())
12
+ hours = total_seconds // 3600
13
+ minutes = (total_seconds % 3600) // 60
14
+ secs = total_seconds % 60
15
+ # Get centiseconds (2 decimal places)
16
+ cs = int((td.total_seconds() - total_seconds) * 100)
17
+ return f"{hours}:{minutes:02d}:{secs:02d}.{cs:02d}"
18
+
19
+ # Load Whisper model (cached for performance)
20
  @st.cache_resource
21
  def load_model():
22
+ return whisper.load_model("small") # Using "small" for free usage
23
 
24
+ # Transcribe video using Whisper and return segments
25
  def transcribe_video(video_path):
26
  model = load_model()
27
  result = model.transcribe(video_path)
28
  return result["segments"]
29
 
30
+ # Create an ASS file with karaoke-style effects.
31
+ # Each segment is split into words; each word gets an ASS karaoke tag (\k)
32
+ # which reveals it gradually over the segment’s duration.
33
+ def create_ass(segments, ass_path):
34
+ header = """[Script Info]
35
+ Title: AI Captioning
36
+ ScriptType: v4.00+
37
+ Collisions: Normal
38
+ PlayResX: 1280
39
+ PlayResY: 720
40
+ Timer: 100.0000
41
+
42
+ [V4+ Styles]
43
+ Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
44
+ Style: Default,Arial,36,&H00FFFFFF,&H000000FF,&H00000000,&H64000000,0,0,0,0,100,100,0,0,1,2,0,2,10,10,30,1
45
+
46
+ [Events]
47
+ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
48
+ """
49
+ events = ""
50
+ for segment in segments:
51
+ text = segment.get("text", "").strip()
52
+ if not text:
53
+ continue
54
+ words = text.split()
55
+ if not words:
56
+ continue
57
+ start_time = format_ass_time(segment["start"])
58
+ end_time = format_ass_time(segment["end"])
59
+ # Calculate total duration in centiseconds
60
+ total_duration = segment["end"] - segment["start"]
61
+ total_cs = int(total_duration * 100)
62
+ # Determine duration per word (in centiseconds)
63
+ duration_per_word = max(total_cs // len(words), 1)
64
+
65
+ # Build karaoke text: each word preceded by its \k tag.
66
+ ass_text = ""
67
+ for word in words:
68
+ ass_text += r"{\k" + str(duration_per_word) + "}" + word + " "
69
+ ass_text = ass_text.strip()
70
+
71
+ dialogue_line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{ass_text}\n"
72
+ events += dialogue_line
73
+
74
+ with open(ass_path, "w", encoding="utf-8") as f:
75
+ f.write(header + events)
76
+
77
+ # Burn the ASS subtitles into the video using FFmpeg.
78
+ def burn_captions(video_path, ass_path, output_path):
79
  command = [
80
  "ffmpeg",
81
  "-i", video_path,
82
+ "-vf", f"subtitles={ass_path}",
83
  "-c:a", "copy",
84
  output_path
85
  ]
86
  subprocess.run(command, check=True)
87
 
88
+ # Streamlit UI
89
+ st.title("🎥 AI Video Captioning App with Karaoke-Style Captions")
90
 
91
  uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "mkv", "avi", "mov"])
92
 
 
100
 
101
  if st.button("Generate Captions & Burn into Video"):
102
  with st.spinner("Generating captions..."):
103
+ segments = transcribe_video(video_path)
104
 
105
+ # Create ASS subtitle file
106
+ ass_path = video_path.replace(".mp4", ".ass")
107
+ create_ass(segments, ass_path)
108
 
109
  output_video_path = video_path.replace(".mp4", "_captioned.mp4")
110
 
111
  with st.spinner("Burning captions into video..."):
112
+ burn_captions(video_path, ass_path, output_video_path)
113
 
114
  st.success("Processing complete! Download your video below.")
115
  with open(output_video_path, "rb") as file: