Abu1998 commited on
Commit
9214aad
·
verified ·
1 Parent(s): 3f279da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -94
app.py CHANGED
@@ -1,38 +1,51 @@
1
- import streamlit as st
2
  import pandas as pd
3
  from googleapiclient.discovery import build
4
  from googleapiclient.errors import HttpError
5
- import time
6
 
7
- # Streamlit UI
8
- st.title("YouTube Video Comments Extractor")
9
-
10
- # User input for API key and search query
11
- api_key = st.text_input("Enter your YouTube API Key", type="password")
12
- search_query = st.text_input("Enter the Search Query (e.g., MrBeast)")
13
- num_videos = st.number_input("Number of Videos to Scrape", min_value=1, max_value=50, step=1)
14
-
15
- # Function to search for videos based on a query
16
- def search_videos(api_key, query, max_results):
17
  youtube = build('youtube', 'v3', developerKey=api_key)
18
- response = youtube.search().list(
19
- part='snippet',
20
- q=query,
21
- type='video',
22
- maxResults=max_results
23
- ).execute()
 
 
 
 
 
 
 
 
24
 
 
 
 
25
  videos = []
26
- for item in response['items']:
27
- videos.append({
28
- 'video_id': item['id']['videoId'],
29
- 'title': item['snippet']['title']
30
- })
31
-
32
- return videos
33
-
34
- # Function to extract comments from a video
35
- def get_video_comments(api_key, video_id):
 
 
 
 
 
 
 
 
 
 
 
36
  youtube = build('youtube', 'v3', developerKey=api_key)
37
  comments = []
38
  next_page_token = None
@@ -48,79 +61,108 @@ def get_video_comments(api_key, video_id):
48
 
49
  for item in response['items']:
50
  comment = item['snippet']['topLevelComment']['snippet']
51
- comments.append({
52
- 'VideoID': video_id,
53
- 'Channel': comment.get('authorChannelUrl', ''),
54
- 'CommentedDateTime': comment['publishedAt'],
55
- 'NumOfCommentlikes': comment['likeCount'],
56
- 'Comment': comment['textDisplay'],
57
- 'CommentedUserID': comment['authorChannelId']['value']
58
- })
59
-
60
- # Handle replies (if any)
61
- if 'replies' in item:
62
- for reply in item['replies']['comments']:
63
- reply_snippet = reply['snippet']
64
- comments.append({
65
- 'VideoID': video_id,
66
- 'Channel': reply_snippet.get('authorChannelUrl', ''),
67
- 'CommentedDateTime': reply_snippet['publishedAt'],
68
- 'NumOfCommentlikes': reply_snippet['likeCount'],
69
- 'Comment': reply_snippet['textDisplay'],
70
- 'CommentedUserID': reply_snippet['authorChannelId']['value']
71
- })
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  next_page_token = response.get('nextPageToken')
74
  if not next_page_token:
75
  break
76
 
77
  except HttpError as e:
78
- st.error(f"An error occurred while fetching comments: {e}")
79
- break
80
 
81
- return comments
82
 
83
- # Action button
84
- if st.button("Start Scraping"):
85
- if api_key and search_query and num_videos:
86
- st.text("Searching for videos...")
87
- videos = search_videos(api_key, search_query, num_videos)
88
 
89
- if not videos:
90
- st.warning("No videos found for the given query.")
91
- else:
92
- st.text(f"Found {len(videos)} videos. Extracting comments...")
93
- all_comments = []
94
- progress_bar = st.progress(0)
95
-
96
- for idx, video in enumerate(videos):
97
- video_id = video['video_id']
98
- st.text(f"Fetching comments for video: {video['title']} (ID: {video_id})")
99
- video_comments = get_video_comments(api_key, video_id)
100
- all_comments.extend(video_comments)
101
-
102
- # Update progress bar
103
- progress = (idx + 1) / len(videos)
104
- progress_bar.progress(progress)
105
-
106
- # Save all comments to a CSV file
107
- if all_comments:
108
- df_comments = pd.DataFrame(all_comments)
109
- csv_file = f"{search_query.replace(' ', '_')}_comments.csv"
110
- df_comments.to_csv(csv_file, index=False)
111
- st.success(f"Comments extracted and saved to {csv_file}")
112
-
113
- # Show dataframe details
114
- st.write("First 5 rows of the extracted comments:")
115
- st.dataframe(df_comments.head())
116
-
117
- st.download_button(
118
- label="Download CSV",
119
- data=df_comments.to_csv(index=False),
120
- file_name=csv_file,
121
- mime='text/csv'
122
- )
123
- else:
124
- st.warning("No comments found for the selected videos.")
125
  else:
126
- st.warning("Please enter your API key, search query, and number of videos.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import pandas as pd
3
  from googleapiclient.discovery import build
4
  from googleapiclient.errors import HttpError
 
5
 
6
+ # Function to get the channel ID from the channel name
7
+ def get_channel_id(api_key, channel_name):
 
 
 
 
 
 
 
 
8
  youtube = build('youtube', 'v3', developerKey=api_key)
9
+ try:
10
+ response = youtube.search().list(
11
+ part='snippet',
12
+ q=channel_name,
13
+ type='channel',
14
+ maxResults=1
15
+ ).execute()
16
+
17
+ if response['items']:
18
+ return response['items'][0]['id']['channelId']
19
+ else:
20
+ return None, "No channel found with that name."
21
+ except HttpError as e:
22
+ return None, f"Error fetching channel ID: {e}"
23
 
24
+ # Function to fetch videos from a channel
25
+ def get_channel_videos(api_key, channel_id, max_results):
26
+ youtube = build('youtube', 'v3', developerKey=api_key)
27
  videos = []
28
+ try:
29
+ response = youtube.search().list(
30
+ part='snippet',
31
+ channelId=channel_id,
32
+ maxResults=max_results,
33
+ type='video',
34
+ order='date'
35
+ ).execute()
36
+
37
+ for item in response['items']:
38
+ videos.append({
39
+ 'video_id': item['id']['videoId'],
40
+ 'title': item['snippet']['title']
41
+ })
42
+ except HttpError as e:
43
+ return None, f"Error fetching videos: {e}"
44
+
45
+ return videos, None
46
+
47
+ # Function to fetch comments from a video
48
+ def get_video_comments(api_key, video_id, filters):
49
  youtube = build('youtube', 'v3', developerKey=api_key)
50
  comments = []
51
  next_page_token = None
 
61
 
62
  for item in response['items']:
63
  comment = item['snippet']['topLevelComment']['snippet']
64
+ published_at = comment['publishedAt']
65
+ like_count = comment['likeCount']
66
+ reply_count = item['snippet']['totalReplyCount']
67
+
68
+ # Apply filters
69
+ if (
70
+ like_count >= filters['likes'] and
71
+ reply_count >= filters['replies'] and
72
+ (not filters['date'] or published_at >= filters['date'])
73
+ ):
74
+ comments.append({
75
+ 'VideoID': video_id,
76
+ 'Channel': comment.get('authorChannelUrl', ''),
77
+ 'CommentedDateTime': published_at,
78
+ 'NumOfCommentLikes': like_count,
79
+ 'NumOfReplies': reply_count,
80
+ 'Comment': comment['textDisplay'],
81
+ 'CommentedUserID': comment['authorChannelId']['value']
82
+ })
83
+
84
+ # Handle replies (if any)
85
+ if 'replies' in item:
86
+ for reply in item['replies']['comments']:
87
+ reply_snippet = reply['snippet']
88
+ comments.append({
89
+ 'VideoID': video_id,
90
+ 'Channel': reply_snippet.get('authorChannelUrl', ''),
91
+ 'CommentedDateTime': reply_snippet['publishedAt'],
92
+ 'NumOfCommentLikes': reply_snippet['likeCount'],
93
+ 'NumOfReplies': 0,
94
+ 'Comment': reply_snippet['textDisplay'],
95
+ 'CommentedUserID': reply_snippet['authorChannelId']['value']
96
+ })
97
 
98
  next_page_token = response.get('nextPageToken')
99
  if not next_page_token:
100
  break
101
 
102
  except HttpError as e:
103
+ return None, f"Error fetching comments: {e}"
 
104
 
105
+ return comments, None
106
 
107
+ # Main function
108
+ def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
109
+ if not api_key or not channel_name_or_id:
110
+ return None, "API key and channel name/ID are required."
 
111
 
112
+ if "UC" in channel_name_or_id:
113
+ channel_id = channel_name_or_id
114
+ error = None
115
+ else:
116
+ channel_id, error = get_channel_id(api_key, channel_name_or_id)
117
+
118
+ if error:
119
+ return None, error
120
+
121
+ videos, error = get_channel_videos(api_key, channel_id, num_videos)
122
+ if error:
123
+ return None, error
124
+
125
+ all_comments = []
126
+ filters = {
127
+ 'likes': min_likes,
128
+ 'replies': min_replies,
129
+ 'date': filter_date if filter_date else None
130
+ }
131
+
132
+ for video in videos:
133
+ video_comments, error = get_video_comments(api_key, video['video_id'], filters)
134
+ if error:
135
+ return None, error
136
+ all_comments.extend(video_comments)
137
+
138
+ if all_comments:
139
+ df_comments = pd.DataFrame(all_comments)
140
+ csv_data = df_comments.to_csv(index=False)
141
+ return csv_data, None
 
 
 
 
 
 
142
  else:
143
+ return None, "No comments found for the selected videos."
144
+
145
+ # Gradio Interface
146
+ with gr.Blocks() as demo:
147
+ gr.Markdown("### YouTube Comments Extractor")
148
+ api_key = gr.Textbox(label="YouTube API Key", type="password")
149
+ channel_name_or_id = gr.Textbox(label="Channel Name or ID")
150
+ num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10)
151
+ min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0)
152
+ min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0)
153
+ filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)")
154
+
155
+ output_file = gr.File(label="Download Extracted Comments as CSV")
156
+ error_msg = gr.Textbox(label="Error Message", interactive=False)
157
+
158
+ def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
159
+ csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date)
160
+ if error:
161
+ return None, error
162
+ else:
163
+ return csv_data, None
164
+
165
+ extract_button = gr.Button("Extract Comments")
166
+ extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg])
167
+
168
+ demo.launch()