|
from faster_whisper import WhisperModel |
|
import subprocess |
|
import os |
|
import datetime |
|
|
|
|
|
model = WhisperModel("guillaumekln/faster-whisper-small", compute_type="int8") |
|
|
|
def format_timestamp(seconds: float) -> str: |
|
hours = int(seconds // 3600) |
|
minutes = int((seconds % 3600) // 60) |
|
secs = int(seconds % 60) |
|
millis = int((seconds - int(seconds)) * 1000) |
|
return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}" |
|
|
|
def extract_audio(video_path): |
|
audio_path = video_path.replace(".mp4", ".wav") |
|
command = [ |
|
"ffmpeg", "-y", |
|
"-i", video_path, |
|
"-vn", |
|
"-acodec", "pcm_s16le", |
|
"-ar", "16000", |
|
"-ac", "1", |
|
audio_path |
|
] |
|
|
|
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) |
|
return audio_path |
|
|
|
def generate_subtitles(video_path): |
|
|
|
audio_path = extract_audio(video_path) |
|
|
|
|
|
segments, _ = model.transcribe(audio_path, language='zh', task='translate') |
|
|
|
srt_path = os.path.abspath(video_path.replace(".mp4", ".srt")) |
|
with open(srt_path, "w", encoding="utf-8") as f: |
|
for i, seg in enumerate(segments): |
|
f.write(f"{i+1}\n") |
|
f.write(f"{format_timestamp(seg.start)} --> {format_timestamp(seg.end)}\n") |
|
f.write(f"{seg.text.strip()}\n\n") |
|
|
|
print(f"✅ Subtitle saved at: {srt_path}") |
|
return srt_path |
|
|
|
def burn_subtitles(video_path, srt_path): |
|
if not os.path.exists(srt_path): |
|
raise FileNotFoundError(f"❌ Không tìm thấy phụ đề: {srt_path}") |
|
|
|
video_path = os.path.abspath(video_path) |
|
srt_path = os.path.abspath(srt_path) |
|
output_path = video_path.replace(".mp4", "_subtitled.mp4") |
|
|
|
filter_arg = f"subtitles='{srt_path}'" |
|
|
|
command = [ |
|
"ffmpeg", "-y", |
|
"-i", video_path, |
|
"-vf", filter_arg, |
|
"-c:v", "libx264", |
|
"-c:a", "copy", |
|
"-preset", "ultrafast", |
|
"-threads", str(os.cpu_count()), |
|
"-c:a", "copy", |
|
output_path |
|
] |
|
|
|
print(f"🚀 Running FFmpeg command:\n{' '.join(command)}") |
|
|
|
result = subprocess.run(command, capture_output=True, text=True) |
|
if result.returncode != 0: |
|
print("❌ FFmpeg error output:") |
|
print(result.stderr) |
|
raise RuntimeError("⚠️ Lỗi khi chạy FFmpeg để chèn phụ đề!") |
|
|
|
print(f"✅ Video with subtitles saved at: {output_path}") |
|
return output_path |
|
|
|
def process_video(video_path): |
|
srt_path = generate_subtitles(video_path) |
|
final_video = burn_subtitles(video_path, srt_path) |
|
return final_video |
|
|