Abu1998's picture
Update app.py
9214aad verified
raw
history blame
6.39 kB
import gradio as gr
import pandas as pd
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Function to get the channel ID from the channel name
def get_channel_id(api_key, channel_name):
youtube = build('youtube', 'v3', developerKey=api_key)
try:
response = youtube.search().list(
part='snippet',
q=channel_name,
type='channel',
maxResults=1
).execute()
if response['items']:
return response['items'][0]['id']['channelId']
else:
return None, "No channel found with that name."
except HttpError as e:
return None, f"Error fetching channel ID: {e}"
# Function to fetch videos from a channel
def get_channel_videos(api_key, channel_id, max_results):
youtube = build('youtube', 'v3', developerKey=api_key)
videos = []
try:
response = youtube.search().list(
part='snippet',
channelId=channel_id,
maxResults=max_results,
type='video',
order='date'
).execute()
for item in response['items']:
videos.append({
'video_id': item['id']['videoId'],
'title': item['snippet']['title']
})
except HttpError as e:
return None, f"Error fetching videos: {e}"
return videos, None
# Function to fetch comments from a video
def get_video_comments(api_key, video_id, filters):
youtube = build('youtube', 'v3', developerKey=api_key)
comments = []
next_page_token = None
while True:
try:
response = youtube.commentThreads().list(
part='snippet,replies',
videoId=video_id,
maxResults=100,
pageToken=next_page_token
).execute()
for item in response['items']:
comment = item['snippet']['topLevelComment']['snippet']
published_at = comment['publishedAt']
like_count = comment['likeCount']
reply_count = item['snippet']['totalReplyCount']
# Apply filters
if (
like_count >= filters['likes'] and
reply_count >= filters['replies'] and
(not filters['date'] or published_at >= filters['date'])
):
comments.append({
'VideoID': video_id,
'Channel': comment.get('authorChannelUrl', ''),
'CommentedDateTime': published_at,
'NumOfCommentLikes': like_count,
'NumOfReplies': reply_count,
'Comment': comment['textDisplay'],
'CommentedUserID': comment['authorChannelId']['value']
})
# Handle replies (if any)
if 'replies' in item:
for reply in item['replies']['comments']:
reply_snippet = reply['snippet']
comments.append({
'VideoID': video_id,
'Channel': reply_snippet.get('authorChannelUrl', ''),
'CommentedDateTime': reply_snippet['publishedAt'],
'NumOfCommentLikes': reply_snippet['likeCount'],
'NumOfReplies': 0,
'Comment': reply_snippet['textDisplay'],
'CommentedUserID': reply_snippet['authorChannelId']['value']
})
next_page_token = response.get('nextPageToken')
if not next_page_token:
break
except HttpError as e:
return None, f"Error fetching comments: {e}"
return comments, None
# Main function
def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
if not api_key or not channel_name_or_id:
return None, "API key and channel name/ID are required."
if "UC" in channel_name_or_id:
channel_id = channel_name_or_id
error = None
else:
channel_id, error = get_channel_id(api_key, channel_name_or_id)
if error:
return None, error
videos, error = get_channel_videos(api_key, channel_id, num_videos)
if error:
return None, error
all_comments = []
filters = {
'likes': min_likes,
'replies': min_replies,
'date': filter_date if filter_date else None
}
for video in videos:
video_comments, error = get_video_comments(api_key, video['video_id'], filters)
if error:
return None, error
all_comments.extend(video_comments)
if all_comments:
df_comments = pd.DataFrame(all_comments)
csv_data = df_comments.to_csv(index=False)
return csv_data, None
else:
return None, "No comments found for the selected videos."
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("### YouTube Comments Extractor")
api_key = gr.Textbox(label="YouTube API Key", type="password")
channel_name_or_id = gr.Textbox(label="Channel Name or ID")
num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10)
min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0)
min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0)
filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)")
output_file = gr.File(label="Download Extracted Comments as CSV")
error_msg = gr.Textbox(label="Error Message", interactive=False)
def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date)
if error:
return None, error
else:
return csv_data, None
extract_button = gr.Button("Extract Comments")
extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg])
demo.launch()