Spaces:

Abu1998
/

Youtube_Likes_Extractor

Sleeping

App Files Files Community

Youtube_Likes_Extractor / app.py

Abu1998

Update app.py

9214aad verified 10 months ago

raw

history blame

6.39 kB

	import gradio as gr
	import pandas as pd
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError

	# Function to get the channel ID from the channel name
	def get_channel_id(api_key, channel_name):
	youtube = build('youtube', 'v3', developerKey=api_key)
	try:
	response = youtube.search().list(
	part='snippet',
	q=channel_name,
	type='channel',
	maxResults=1
	).execute()

	if response['items']:
	return response['items'][0]['id']['channelId']
	else:
	return None, "No channel found with that name."
	except HttpError as e:
	return None, f"Error fetching channel ID: {e}"

	# Function to fetch videos from a channel
	def get_channel_videos(api_key, channel_id, max_results):
	youtube = build('youtube', 'v3', developerKey=api_key)
	videos = []
	try:
	response = youtube.search().list(
	part='snippet',
	channelId=channel_id,
	maxResults=max_results,
	type='video',
	order='date'
	).execute()

	for item in response['items']:
	videos.append({
	'video_id': item['id']['videoId'],
	'title': item['snippet']['title']
	})
	except HttpError as e:
	return None, f"Error fetching videos: {e}"

	return videos, None

	# Function to fetch comments from a video
	def get_video_comments(api_key, video_id, filters):
	youtube = build('youtube', 'v3', developerKey=api_key)
	comments = []
	next_page_token = None

	while True:
	try:
	response = youtube.commentThreads().list(
	part='snippet,replies',
	videoId=video_id,
	maxResults=100,
	pageToken=next_page_token
	).execute()

	for item in response['items']:
	comment = item['snippet']['topLevelComment']['snippet']
	published_at = comment['publishedAt']
	like_count = comment['likeCount']
	reply_count = item['snippet']['totalReplyCount']

	# Apply filters
	if (
	like_count >= filters['likes'] and
	reply_count >= filters['replies'] and
	(not filters['date'] or published_at >= filters['date'])
	):
	comments.append({
	'VideoID': video_id,
	'Channel': comment.get('authorChannelUrl', ''),
	'CommentedDateTime': published_at,
	'NumOfCommentLikes': like_count,
	'NumOfReplies': reply_count,
	'Comment': comment['textDisplay'],
	'CommentedUserID': comment['authorChannelId']['value']
	})

	# Handle replies (if any)
	if 'replies' in item:
	for reply in item['replies']['comments']:
	reply_snippet = reply['snippet']
	comments.append({
	'VideoID': video_id,
	'Channel': reply_snippet.get('authorChannelUrl', ''),
	'CommentedDateTime': reply_snippet['publishedAt'],
	'NumOfCommentLikes': reply_snippet['likeCount'],
	'NumOfReplies': 0,
	'Comment': reply_snippet['textDisplay'],
	'CommentedUserID': reply_snippet['authorChannelId']['value']
	})

	next_page_token = response.get('nextPageToken')
	if not next_page_token:
	break

	except HttpError as e:
	return None, f"Error fetching comments: {e}"

	return comments, None

	# Main function
	def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
	if not api_key or not channel_name_or_id:
	return None, "API key and channel name/ID are required."

	if "UC" in channel_name_or_id:
	channel_id = channel_name_or_id
	error = None
	else:
	channel_id, error = get_channel_id(api_key, channel_name_or_id)

	if error:
	return None, error

	videos, error = get_channel_videos(api_key, channel_id, num_videos)
	if error:
	return None, error

	all_comments = []
	filters = {
	'likes': min_likes,
	'replies': min_replies,
	'date': filter_date if filter_date else None
	}

	for video in videos:
	video_comments, error = get_video_comments(api_key, video['video_id'], filters)
	if error:
	return None, error
	all_comments.extend(video_comments)

	if all_comments:
	df_comments = pd.DataFrame(all_comments)
	csv_data = df_comments.to_csv(index=False)
	return csv_data, None
	else:
	return None, "No comments found for the selected videos."

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("### YouTube Comments Extractor")
	api_key = gr.Textbox(label="YouTube API Key", type="password")
	channel_name_or_id = gr.Textbox(label="Channel Name or ID")
	num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10)
	min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0)
	min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0)
	filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)")

	output_file = gr.File(label="Download Extracted Comments as CSV")
	error_msg = gr.Textbox(label="Error Message", interactive=False)

	def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
	csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date)
	if error:
	return None, error
	else:
	return csv_data, None

	extract_button = gr.Button("Extract Comments")
	extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg])

	demo.launch()