Sjanmanchi commited on
Commit
c1b39b1
·
verified ·
1 Parent(s): fea69d7

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +43 -0
  2. app.py +96 -0
  3. requirements.txt +7 -0
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: EchoScribe
3
+ emoji: 🎥
4
+ colorFrom: indigo
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: "3.50.2"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # 🎥 EchoScribe: Smart Video Transcriber
13
+
14
+ **EchoScribe** is a powerful AI tool that turns your videos into clean, punctuated transcripts, live subtitles, and insightful summaries — all in one click.
15
+
16
+ ## 🚀 Features
17
+
18
+ - 🎬 Upload any video (MP4)
19
+ - 🧾 Get raw and punctuated transcripts using `openai/whisper-large` and `oliverguhr/fullstop-punctuation-multilang-large`
20
+ - 📝 Generate clean summaries using `facebook/bart-large-cnn`
21
+ - ⬇️ Download transcript, summary, and subtitle (.srt) files
22
+ - 🌈 Clean, responsive interface powered by Gradio
23
+ - 🖤 Built with ❤️ by Snigdha’s AI Lab
24
+
25
+ ## 🔧 Technologies Used
26
+
27
+ - Hugging Face Transformers
28
+ - Gradio
29
+ - Whisper ASR
30
+ - MoviePy for audio extraction
31
+ - Python, Torch
32
+
33
+ ## 🛠️ Usage
34
+
35
+ 1. Upload a short video clip
36
+ 2. Wait for the transcript and summary to be generated
37
+ 3. View and download the results instantly
38
+
39
+ > Ideal for note-taking, content summarization, interviews, YouTube creators, and accessibility projects.
40
+
41
+ ---
42
+
43
+ Feel free to fork, contribute, and remix!
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import tempfile
4
+ import torch
5
+ import os
6
+ from moviepy.editor import VideoFileClip
7
+ import srt
8
+ import datetime
9
+
10
+ # Load Hugging Face models
11
+ device = 0 if torch.cuda.is_available() else -1
12
+ whisper = pipeline("automatic-speech-recognition", model="openai/whisper-large", device=device)
13
+ punctuate = pipeline("text2text-generation", model="oliverguhr/fullstop-punctuation-multilang-large")
14
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
15
+
16
+ # Utility: Extract audio
17
+ def extract_audio(video_path):
18
+ video = VideoFileClip(video_path)
19
+ audio_path = tempfile.mktemp(suffix=".wav")
20
+ video.audio.write_audiofile(audio_path, verbose=False, logger=None)
21
+ return audio_path
22
+
23
+ # Utility: Create .srt subtitles
24
+ def generate_srt(transcript_text):
25
+ lines = transcript_text.strip().split(". ")
26
+ subs = []
27
+ for i, line in enumerate(lines):
28
+ start = datetime.timedelta(seconds=i*2)
29
+ end = datetime.timedelta(seconds=(i+1)*2)
30
+ subs.append(srt.Subtitle(index=i+1, start=start, end=end, content=line.strip()))
31
+ srt_data = srt.compose(subs)
32
+ srt_path = tempfile.mktemp(suffix=".srt")
33
+ with open(srt_path, "w") as f:
34
+ f.write(srt_data)
35
+ return srt_path
36
+
37
+ # Full pipeline
38
+ def transcribe_pipeline(video_file):
39
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp:
40
+ tmp.write(video_file.read())
41
+ video_path = tmp.name
42
+
43
+ audio_path = extract_audio(video_path)
44
+ result = whisper(audio_path)
45
+ raw_text = result["text"]
46
+
47
+ punctuated = punctuate(raw_text)[0]["generated_text"]
48
+ summary = summarizer(punctuated, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
49
+ srt_path = generate_srt(punctuated)
50
+
51
+ # Save raw text, punctuated text, and summary
52
+ raw_txt_path = tempfile.mktemp(suffix=".txt")
53
+ punct_txt_path = tempfile.mktemp(suffix=".txt")
54
+ summary_txt_path = tempfile.mktemp(suffix=".txt")
55
+
56
+ with open(raw_txt_path, "w") as f:
57
+ f.write(raw_text)
58
+ with open(punct_txt_path, "w") as f:
59
+ f.write(punctuated)
60
+ with open(summary_txt_path, "w") as f:
61
+ f.write(summary)
62
+
63
+ return raw_text, punctuated, summary, punct_txt_path, summary_txt_path, srt_path
64
+
65
+ # Gradio UI
66
+ with gr.Blocks(theme=gr.themes.Soft()) as iface:
67
+ gr.Markdown("# 🎥 EchoScribe: Smart Video Transcriber")
68
+ gr.Markdown("Upload a video to extract transcript with punctuation and summary using Hugging Face models. Powered by Whisper, BART, and punctuation restoration.")
69
+
70
+ with gr.Row():
71
+ video_input = gr.Video(label="🎬 Upload your video")
72
+
73
+ with gr.Row():
74
+ raw_output = gr.Textbox(label="🧾 Raw Transcript (Whisper)", lines=6)
75
+ punct_output = gr.Textbox(label="📄 Punctuated Transcript", lines=6)
76
+
77
+ summary_output = gr.Textbox(label="📝 Summary", lines=4)
78
+
79
+ with gr.Row():
80
+ download_transcript = gr.File(label="⬇️ Download Transcript (.txt)")
81
+ download_summary = gr.File(label="⬇️ Download Summary (.txt)")
82
+ download_srt = gr.File(label="⬇️ Download Subtitles (.srt)")
83
+
84
+ submit_btn = gr.Button("🚀 Transcribe & Summarize")
85
+
86
+ def run_all(video):
87
+ return transcribe_pipeline(video)
88
+
89
+ submit_btn.click(fn=run_all, inputs=video_input,
90
+ outputs=[raw_output, punct_output, summary_output,
91
+ download_transcript, download_summary, download_srt])
92
+
93
+ gr.Markdown("---")
94
+ gr.Markdown("🔧 Built with ❤️ by **Snigdha’s AI Lab**")
95
+
96
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ moviepy
5
+ pydub
6
+ ffmpeg-python
7
+ srt