Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api.formatters import TextFormatter | |
| import re | |
| # Define the models | |
| models = { | |
| "Falconsai/text_summarization": "Falconsai/text_summarization", | |
| "suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization" | |
| } | |
| # Default model | |
| default_model = "Falconsai/text_summarization" | |
| # Function to create a summarization pipeline | |
| def create_summarization_pipeline(model_name): | |
| return pipeline("summarization", model=model_name) | |
| # Function to extract video ID from URL | |
| def extract_video_id(url): | |
| regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
| match = re.search(regex, url) | |
| if match: | |
| return match.group(1) | |
| return None | |
| # Function to get YouTube transcript | |
| def get_youtube_transcript(video_url): | |
| video_id = extract_video_id(video_url) | |
| if not video_id: | |
| return "Video ID could not be extracted. Please check the URL format." | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
| formatter = TextFormatter() | |
| text_transcript = formatter.format_transcript(transcript) | |
| return text_transcript | |
| except Exception as e: | |
| error_message = str(e) | |
| if "Subtitles are disabled for this video" in error_message: | |
| return "Subtitles are disabled for this video. Transcript cannot be retrieved." | |
| return f"An error occurred while retrieving the transcript: {error_message}" | |
| # Function to summarize YouTube video with selected model | |
| def summarize_youtube_video(url, model_name): | |
| transcript = get_youtube_transcript(url) | |
| if "An error occurred" in transcript: | |
| return transcript | |
| # Truncate the transcript if necessary | |
| max_length = 1024 # Adjust according to the model's maximum sequence length | |
| if len(transcript) > max_length: | |
| transcript = transcript[:max_length] | |
| summarization_pipeline = create_summarization_pipeline(model_name) | |
| summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False) | |
| return summary[0]['summary_text'] | |
| # Define the Gradio interface | |
| iface = gr.Interface( | |
| fn=summarize_youtube_video, | |
| inputs=[ | |
| gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"), | |
| gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model") | |
| ], | |
| outputs=gr.Textbox(label="Video Summary"), | |
| title="YouTube Video Summarizer", | |
| description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |