File size: 4,096 Bytes
89816f6
 
26a4f2a
c1b39b1
 
 
 
 
39517b3
c1b39b1
 
 
 
39517b3
c1b39b1
39517b3
 
c1b39b1
ab1d311
c1b39b1
 
39517b3
c1b39b1
 
 
 
 
 
39517b3
c1b39b1
 
 
 
39517b3
 
 
c1b39b1
 
 
 
 
 
39517b3
c1b39b1
39517b3
59737ff
39517b3
 
 
 
 
 
26a4f2a
 
59737ff
c1b39b1
39517b3
 
c1b39b1
6770943
6a24084
c1b39b1
39517b3
 
c1b39b1
39517b3
 
 
 
c1b39b1
39517b3
 
 
 
 
 
 
 
 
 
 
 
c1b39b1
 
 
 
39517b3
c1b39b1
 
 
 
 
39517b3
c1b39b1
 
 
 
 
 
 
 
 
 
 
39517b3
 
 
 
 
 
 
 
 
 
 
 
c1b39b1
 
39517b3
c1b39b1
26a4f2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import warnings
warnings.filterwarnings("ignore")

import gradio as gr
from transformers import pipeline
import tempfile
import torch
import os
import shutil
from moviepy.editor import VideoFileClip
import srt
import datetime

# Select CPU or GPU
device = 0 if torch.cuda.is_available() else -1

# Load Hugging Face pipelines
whisper = pipeline("automatic-speech-recognition", model="openai/whisper-large", device=device)
punctuate = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Extract audio from uploaded video file
def extract_audio(video_path):
    video = VideoFileClip(video_path)
    audio_path = tempfile.mktemp(suffix=".wav")
    video.audio.write_audiofile(audio_path, verbose=False, logger=None)
    return audio_path

# Generate basic subtitle file
def generate_srt(transcript_text):
    lines = transcript_text.strip().split(". ")
    subs = []
    for i, line in enumerate(lines):
        start = datetime.timedelta(seconds=i * 2)
        end = datetime.timedelta(seconds=(i + 1) * 2)
        subs.append(srt.Subtitle(index=i + 1, start=start, end=end, content=line.strip()))
    srt_data = srt.compose(subs)
    srt_path = tempfile.mktemp(suffix=".srt")
    with open(srt_path, "w") as f:
        f.write(srt_data)
    return srt_path

# Main pipeline
def transcribe_pipeline(video_file):
    try:
        # Copy uploaded file to temp location
        video_path = tempfile.mktemp(suffix=".mp4")
        shutil.copy(video_file, video_path)

        # Extract audio from video
        audio_path = extract_audio(video_path)

        # Transcribe with Whisper (force English)
        result = whisper(audio_path, return_timestamps=True, generate_kwargs={"language": "en"})
        raw_text = " ".join([chunk['text'] for chunk in result['chunks']])

        # Add punctuation
        punctuated = punctuate(raw_text)[0]["generated_text"]

        # Summarize
        summary = summarizer(punctuated, max_length=60, min_length=20, do_sample=False)[0]["summary_text"]

        # Generate subtitle file
        srt_path = generate_srt(punctuated)

        # Save files for download
        raw_txt_path = tempfile.mktemp(suffix=".txt")
        punct_txt_path = tempfile.mktemp(suffix=".txt")
        summary_txt_path = tempfile.mktemp(suffix=".txt")

        with open(raw_txt_path, "w") as f:
            f.write(raw_text)
        with open(punct_txt_path, "w") as f:
            f.write(punctuated)
        with open(summary_txt_path, "w") as f:
            f.write(summary)

        return raw_text, punctuated, summary, punct_txt_path, summary_txt_path, srt_path

    except Exception as e:
        print("❌ Pipeline Error:", e)
        return "Error", "Error", "Error", None, None, None

# Gradio UI
with gr.Blocks(theme=gr.themes.Soft()) as iface:
    gr.Markdown("# 🎥 EchoScribe: Smart Video Transcriber")
    gr.Markdown("Upload a video to extract transcript, add punctuation, and generate a summary. You can also download the .srt subtitle file.")

    with gr.Row():
        video_input = gr.Video(label="🎬 Upload your video")

    with gr.Row():
        raw_output = gr.Textbox(label="🧾 Raw Transcript", lines=6)
        punct_output = gr.Textbox(label="📄 Punctuated Transcript", lines=6)

    summary_output = gr.Textbox(label="📝 Summary", lines=4)

    with gr.Row():
        download_transcript = gr.File(label="⬇️ Download Transcript (.txt)")
        download_summary = gr.File(label="⬇️ Download Summary (.txt)")
        download_srt = gr.File(label="⬇️ Download Subtitles (.srt)")

    submit_btn = gr.Button("🚀 Transcribe & Summarize")

    submit_btn.click(
        fn=transcribe_pipeline,
        inputs=video_input,
        outputs=[
            raw_output,
            punct_output,
            summary_output,
            download_transcript,
            download_summary,
            download_srt,
        ],
    )

    gr.Markdown("---")
    gr.Markdown("Built with ❤️ by Snigdha’s AI Lab")

iface.launch(share=True)