bilibli / utils.py
oppaiio's picture
Update utils.py
934e0ee verified
from faster_whisper import WhisperModel
import subprocess
import os
import datetime
# Load model một lần khi khởi tạo
model = WhisperModel("guillaumekln/faster-whisper-small", compute_type="int8")
def format_timestamp(seconds: float) -> str:
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds - int(seconds)) * 1000)
return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}"
def extract_audio(video_path):
audio_path = video_path.replace(".mp4", ".wav")
command = [
"ffmpeg", "-y",
"-i", video_path,
"-vn", # Không lấy hình
"-acodec", "pcm_s16le", # WAV
"-ar", "16000", # 16kHz
"-ac", "1", # Mono
audio_path
]
subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return audio_path
def generate_subtitles(video_path):
# 🔊 Tách audio từ video
audio_path = extract_audio(video_path)
# 🧠 Dùng audio để sinh phụ đề
segments, _ = model.transcribe(audio_path, language='zh', task='translate')
srt_path = os.path.abspath(video_path.replace(".mp4", ".srt"))
with open(srt_path, "w", encoding="utf-8") as f:
for i, seg in enumerate(segments):
f.write(f"{i+1}\n")
f.write(f"{format_timestamp(seg.start)} --> {format_timestamp(seg.end)}\n")
f.write(f"{seg.text.strip()}\n\n")
print(f"✅ Subtitle saved at: {srt_path}")
return srt_path
def burn_subtitles(video_path, srt_path):
if not os.path.exists(srt_path):
raise FileNotFoundError(f"❌ Không tìm thấy phụ đề: {srt_path}")
video_path = os.path.abspath(video_path)
srt_path = os.path.abspath(srt_path)
output_path = video_path.replace(".mp4", "_subtitled.mp4")
filter_arg = f"subtitles='{srt_path}'"
command = [
"ffmpeg", "-y",
"-i", video_path,
"-vf", filter_arg,
"-c:v", "libx264",
"-c:a", "copy",
"-preset", "ultrafast",
"-threads", str(os.cpu_count()),
"-c:a", "copy",
output_path
]
print(f"🚀 Running FFmpeg command:\n{' '.join(command)}")
result = subprocess.run(command, capture_output=True, text=True)
if result.returncode != 0:
print("❌ FFmpeg error output:")
print(result.stderr)
raise RuntimeError("⚠️ Lỗi khi chạy FFmpeg để chèn phụ đề!")
print(f"✅ Video with subtitles saved at: {output_path}")
return output_path
def process_video(video_path):
srt_path = generate_subtitles(video_path)
final_video = burn_subtitles(video_path, srt_path)
return final_video