Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from googleapiclient.discovery import build | |
| from googleapiclient.errors import HttpError | |
| # Function to fetch comments from a video | |
| def get_filtered_video_comments(api_key, video_id, username_filter, text_filter): | |
| youtube = build('youtube', 'v3', developerKey=api_key) | |
| comments = [] | |
| next_page_token = None | |
| while True: | |
| try: | |
| response = youtube.commentThreads().list( | |
| part='snippet,replies', | |
| videoId=video_id, | |
| maxResults=100, | |
| pageToken=next_page_token | |
| ).execute() | |
| for item in response['items']: | |
| comment = item['snippet']['topLevelComment']['snippet'] | |
| author_name = comment.get('authorDisplayName', '') | |
| comment_text = comment['textDisplay'] | |
| # Check if the comment matches the username or contains the specified text | |
| if (username_filter.lower() in author_name.lower() if username_filter else True) and \ | |
| (text_filter.lower() in comment_text.lower() if text_filter else True): | |
| comments.append({ | |
| 'VideoID': video_id, | |
| 'AuthorName': author_name, | |
| 'CommentedDateTime': comment['publishedAt'], | |
| 'CommentText': comment_text, | |
| 'NumOfCommentLikes': comment['likeCount'], | |
| 'CommentedUserID': comment['authorChannelId']['value'] | |
| }) | |
| next_page_token = response.get('nextPageToken') | |
| if not next_page_token: | |
| break | |
| except HttpError as e: | |
| return None, f"Error fetching comments: {e}" | |
| return comments, None | |
| # Main function | |
| def scrape_filtered_comments(api_key, video_id, username_filter, text_filter): | |
| if not api_key or not video_id: | |
| return None, "API key and video ID are required." | |
| comments, error = get_filtered_video_comments(api_key, video_id, username_filter, text_filter) | |
| if error: | |
| return None, error | |
| if comments: | |
| df_comments = pd.DataFrame(comments) | |
| csv_data = df_comments.to_csv(index=False) | |
| return csv_data, None | |
| else: | |
| return None, "No comments matching the criteria were found." | |
| # Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### YouTube Filtered Comments Extractor") | |
| api_key = gr.Textbox(label="YouTube API Key", type="password") | |
| video_id = gr.Textbox(label="Video ID") | |
| username_filter = gr.Textbox(label="Filter by Username (Optional)") | |
| text_filter = gr.Textbox(label="Filter by Text in Comments (Optional)") | |
| output_file = gr.File(label="Download Filtered Comments as CSV") | |
| error_msg = gr.Textbox(label="Error Message", interactive=False) | |
| def main(api_key, video_id, username_filter, text_filter): | |
| csv_data, error = scrape_filtered_comments(api_key, video_id, username_filter, text_filter) | |
| if error: | |
| return None, error | |
| else: | |
| return csv_data, None | |
| extract_button = gr.Button("Extract Filtered Comments") | |
| extract_button.click(main, [api_key, video_id, username_filter, text_filter], [output_file, error_msg]) | |
| demo.launch() | |