Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from googleapiclient.discovery import build | |
from googleapiclient.errors import HttpError | |
# Function to get the channel ID from the channel name | |
def get_channel_id(api_key, channel_name): | |
youtube = build('youtube', 'v3', developerKey=api_key) | |
try: | |
response = youtube.search().list( | |
part='snippet', | |
q=channel_name, | |
type='channel', | |
maxResults=1 | |
).execute() | |
if response['items']: | |
return response['items'][0]['id']['channelId'] | |
else: | |
return None, "No channel found with that name." | |
except HttpError as e: | |
return None, f"Error fetching channel ID: {e}" | |
# Function to fetch videos from a channel | |
def get_channel_videos(api_key, channel_id, max_results): | |
youtube = build('youtube', 'v3', developerKey=api_key) | |
videos = [] | |
try: | |
response = youtube.search().list( | |
part='snippet', | |
channelId=channel_id, | |
maxResults=max_results, | |
type='video', | |
order='date' | |
).execute() | |
for item in response['items']: | |
videos.append({ | |
'video_id': item['id']['videoId'], | |
'title': item['snippet']['title'] | |
}) | |
except HttpError as e: | |
return None, f"Error fetching videos: {e}" | |
return videos, None | |
# Function to fetch comments from a video | |
def get_video_comments(api_key, video_id, filters): | |
youtube = build('youtube', 'v3', developerKey=api_key) | |
comments = [] | |
next_page_token = None | |
while True: | |
try: | |
response = youtube.commentThreads().list( | |
part='snippet,replies', | |
videoId=video_id, | |
maxResults=100, | |
pageToken=next_page_token | |
).execute() | |
for item in response['items']: | |
comment = item['snippet']['topLevelComment']['snippet'] | |
published_at = comment['publishedAt'] | |
like_count = comment['likeCount'] | |
reply_count = item['snippet']['totalReplyCount'] | |
# Apply filters | |
if ( | |
like_count >= filters['likes'] and | |
reply_count >= filters['replies'] and | |
(not filters['date'] or published_at >= filters['date']) | |
): | |
comments.append({ | |
'VideoID': video_id, | |
'Channel': comment.get('authorChannelUrl', ''), | |
'CommentedDateTime': published_at, | |
'NumOfCommentLikes': like_count, | |
'NumOfReplies': reply_count, | |
'Comment': comment['textDisplay'], | |
'CommentedUserID': comment['authorChannelId']['value'] | |
}) | |
# Handle replies (if any) | |
if 'replies' in item: | |
for reply in item['replies']['comments']: | |
reply_snippet = reply['snippet'] | |
comments.append({ | |
'VideoID': video_id, | |
'Channel': reply_snippet.get('authorChannelUrl', ''), | |
'CommentedDateTime': reply_snippet['publishedAt'], | |
'NumOfCommentLikes': reply_snippet['likeCount'], | |
'NumOfReplies': 0, | |
'Comment': reply_snippet['textDisplay'], | |
'CommentedUserID': reply_snippet['authorChannelId']['value'] | |
}) | |
next_page_token = response.get('nextPageToken') | |
if not next_page_token: | |
break | |
except HttpError as e: | |
return None, f"Error fetching comments: {e}" | |
return comments, None | |
# Main function | |
def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date): | |
if not api_key or not channel_name_or_id: | |
return None, "API key and channel name/ID are required." | |
if "UC" in channel_name_or_id: | |
channel_id = channel_name_or_id | |
error = None | |
else: | |
channel_id, error = get_channel_id(api_key, channel_name_or_id) | |
if error: | |
return None, error | |
videos, error = get_channel_videos(api_key, channel_id, num_videos) | |
if error: | |
return None, error | |
all_comments = [] | |
filters = { | |
'likes': min_likes, | |
'replies': min_replies, | |
'date': filter_date if filter_date else None | |
} | |
for video in videos: | |
video_comments, error = get_video_comments(api_key, video['video_id'], filters) | |
if error: | |
return None, error | |
all_comments.extend(video_comments) | |
if all_comments: | |
df_comments = pd.DataFrame(all_comments) | |
csv_data = df_comments.to_csv(index=False) | |
return csv_data, None | |
else: | |
return None, "No comments found for the selected videos." | |
# Gradio Interface | |
with gr.Blocks() as demo: | |
gr.Markdown("### YouTube Comments Extractor") | |
api_key = gr.Textbox(label="YouTube API Key", type="password") | |
channel_name_or_id = gr.Textbox(label="Channel Name or ID") | |
num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10) | |
min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0) | |
min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0) | |
filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)") | |
output_file = gr.File(label="Download Extracted Comments as CSV") | |
error_msg = gr.Textbox(label="Error Message", interactive=False) | |
def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date): | |
csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date) | |
if error: | |
return None, error | |
else: | |
return csv_data, None | |
extract_button = gr.Button("Extract Comments") | |
extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg]) | |
demo.launch() | |