DeeeeeeM
commited on
Commit
·
0b9d716
1
Parent(s):
e8fae30
added urgent yt playlist extractor feature
Browse files- app.py +39 -2
- playlist.csv +0 -0
- yt-playlist-export.py +27 -0
app.py
CHANGED
|
@@ -7,6 +7,9 @@ import torch
|
|
| 7 |
import stable_whisper
|
| 8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
| 9 |
import time
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def process_media(
|
| 12 |
model_size, source_lang, upload, model_type,
|
|
@@ -169,6 +172,31 @@ def segments2blocks(segments, max_lines_per_segment, line_penalty, longest_line_
|
|
| 169 |
for i, s in enumerate(segments)
|
| 170 |
)
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
WHISPER_LANGUAGES = [
|
| 173 |
("Afrikaans", "af"),
|
| 174 |
("Albanian", "sq"),
|
|
@@ -440,7 +468,16 @@ with gr.Blocks() as interface:
|
|
| 440 |
outputs=[audio_output, video_output, transcript_output, srt_output]
|
| 441 |
)
|
| 442 |
|
| 443 |
-
with gr.TabItem("
|
| 444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
|
| 446 |
interface.launch(share=True)
|
|
|
|
| 7 |
import stable_whisper
|
| 8 |
from stable_whisper.text_output import result_to_any, sec2srt
|
| 9 |
import time
|
| 10 |
+
from yt_dlp import YoutubeDL
|
| 11 |
+
import csv
|
| 12 |
+
import os
|
| 13 |
|
| 14 |
def process_media(
|
| 15 |
model_size, source_lang, upload, model_type,
|
|
|
|
| 172 |
for i, s in enumerate(segments)
|
| 173 |
)
|
| 174 |
|
| 175 |
+
def extract_playlist_to_csv(playlist_url):
|
| 176 |
+
ydl_opts = {
|
| 177 |
+
'extract_flat': True,
|
| 178 |
+
'quiet': True,
|
| 179 |
+
'dump_single_json': True
|
| 180 |
+
}
|
| 181 |
+
try:
|
| 182 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 183 |
+
result = ydl.extract_info(playlist_url, download=False)
|
| 184 |
+
entries = result.get('entries', [])
|
| 185 |
+
# Save to a temp file for download
|
| 186 |
+
fd, csv_path = tempfile.mkstemp(suffix=".csv", text=True)
|
| 187 |
+
os.close(fd)
|
| 188 |
+
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
|
| 189 |
+
writer = csv.writer(f)
|
| 190 |
+
writer.writerow(['Title', 'Video ID', 'URL'])
|
| 191 |
+
for video in entries:
|
| 192 |
+
title = video.get('title', 'N/A')
|
| 193 |
+
video_id = video['id']
|
| 194 |
+
url = f'https://www.youtube.com/watch?v={video_id}'
|
| 195 |
+
writer.writerow([title, video_id, url])
|
| 196 |
+
return csv_path
|
| 197 |
+
except Exception as e:
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
WHISPER_LANGUAGES = [
|
| 201 |
("Afrikaans", "af"),
|
| 202 |
("Albanian", "sq"),
|
|
|
|
| 468 |
outputs=[audio_output, video_output, transcript_output, srt_output]
|
| 469 |
)
|
| 470 |
|
| 471 |
+
with gr.TabItem("Youtube playlist extractor"):
|
| 472 |
+
gr.Markdown("### Extract YT Title, URL, and ID from a YouTube playlist and download as CSV.")
|
| 473 |
+
playlist_url = gr.Textbox(label="YouTube Playlist URL", placeholder="Paste playlist URL here")
|
| 474 |
+
process_btn = gr.Button("Process")
|
| 475 |
+
csv_output = gr.File(label="Download CSV")
|
| 476 |
+
process_btn.click(
|
| 477 |
+
extract_playlist_to_csv,
|
| 478 |
+
inputs=playlist_url,
|
| 479 |
+
outputs=csv_output
|
| 480 |
+
)
|
| 481 |
+
|
| 482 |
|
| 483 |
interface.launch(share=True)
|
playlist.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
yt-playlist-export.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from yt_dlp import YoutubeDL
|
| 2 |
+
import csv
|
| 3 |
+
|
| 4 |
+
playlist_url = 'https://www.youtube.com/playlist?list=PLGRhcC_vtOra_TUIec1NgfHJIggPONtqU'
|
| 5 |
+
|
| 6 |
+
ydl_opts = {
|
| 7 |
+
'extract_flat': True,
|
| 8 |
+
'quiet': True,
|
| 9 |
+
'dump_single_json': True
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 13 |
+
result = ydl.extract_info(playlist_url, download=False)
|
| 14 |
+
|
| 15 |
+
entries = result.get('entries', [])
|
| 16 |
+
|
| 17 |
+
with open('playlist.csv', 'w', newline='', encoding='utf-8') as f:
|
| 18 |
+
writer = csv.writer(f)
|
| 19 |
+
writer.writerow(['Title', 'Video ID', 'URL']) # Header
|
| 20 |
+
|
| 21 |
+
for video in entries:
|
| 22 |
+
title = video.get('title', 'N/A')
|
| 23 |
+
video_id = video['id']
|
| 24 |
+
url = f'https://www.youtube.com/watch?v={video_id}'
|
| 25 |
+
writer.writerow([title, video_id, url])
|
| 26 |
+
|
| 27 |
+
print("✅ Video IDs and URLs saved to 'playlist.csv'")
|