lgsantini1 commited on
Commit
a13cae7
·
verified ·
1 Parent(s): bf4e5ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -43
app.py CHANGED
@@ -1,49 +1,38 @@
 
1
  import gradio as gr
2
- from langchain_community.document_loaders import YoutubeLoader
3
- from youtube_transcript_api import NoTranscriptFound
4
- from pytube.exceptions import PytubeError
5
 
6
- def load_youtube_video_info(url):
7
- """
8
- Load YouTube video information including metadata and transcript.
9
-
10
- Args:
11
- url (str): URL of the YouTube video.
12
-
13
- Returns:
14
- str: Information about the video, including metadata and transcript or error messages.
15
- """
16
  try:
17
- # Initialize YouTube loader with preferred language as Portuguese
18
- loader = YoutubeLoader.from_youtube_url(url, add_video_info=True, language="pt")
19
-
20
- # Load the video information and transcript
21
- result = loader.load()
22
 
23
- if result and len(result) > 0:
24
- # Retrieve metadata information
25
- author = result[0].metadata.get('author', 'Unknown Author')
26
- length = result[0].metadata.get('length', 'Unknown Length')
27
- title = result[0].metadata.get('title', 'Unknown Title')
28
-
29
- # Check if transcript is available
30
- transcript = result[0].page_content if hasattr(result[0], 'page_content') else "No transcript available."
31
 
32
- # Format information
33
- info = (
34
- f"Video Title: {title}\n"
35
- f"Author: {author}\n"
36
- f"Length: {length}\n\n"
37
- f"Transcript:\n{transcript}"
38
- )
39
- else:
40
- info = "No result found."
 
 
41
 
42
- except NoTranscriptFound:
43
- info = "Transcript not found for this video."
44
- except PytubeError as e:
45
- info = f"Error loading video information: {str(e)}"
46
- except Exception as e:
47
- info = f"An unexpected error occurred: {str(e)}"
48
-
49
- return info
 
1
+ import re
2
  import gradio as gr
3
+ from youtube_transcript_api import YouTubeTranscriptApi
 
 
4
 
5
+ def extract_video_id(youtube_url):
6
+ # Regex para extrair o ID do vídeo da URL do YouTube
7
+ match = re.search(r'(?:youtu\.be\/|(?:www\.)?youtube\.com\/(?:watch\?v=|embed\/|v\/|.+\?v=))([^&]{11})', youtube_url)
8
+ return match.group(1) if match else None
9
+
10
+ def get_transcript(video_id, language="en"):
 
 
 
 
11
  try:
12
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
13
+ return "\n".join([f"{t['start']}: {t['text']}" for t in transcript])
14
+ except Exception as e:
15
+ return f"Erro ao obter transcrição: {str(e)}"
 
16
 
17
+ # Função que combina a extração do ID e a obtenção da transcrição
18
+ def gradio_interface(youtube_url, language):
19
+ video_id = extract_video_id(youtube_url)
20
+ if not video_id:
21
+ return "Erro: URL inválida. Por favor, insira um link válido do YouTube."
22
+
23
+ return get_transcript(video_id, language)
 
24
 
25
+ # Criação da interface Gradio
26
+ iface = gr.Interface(
27
+ fn=gradio_interface,
28
+ inputs=[
29
+ gr.Textbox(label="URL do Vídeo (YouTube)", placeholder="Ex: https://www.youtube.com/watch?v=tl1jHm0qC_4"),
30
+ gr.Dropdown(label="Idioma", choices=["en", "pt", "es", "fr"], value="en")
31
+ ],
32
+ outputs="text",
33
+ title="Obter Transcrição de Vídeos do YouTube",
34
+ description="Insira a URL de um vídeo do YouTube e selecione o idioma da transcrição."
35
+ )
36
 
37
+ # Execução da interface Gradio
38
+ iface.launch()