Spaces:

Abu1998
/

Youtube_Likes_Extractor

Sleeping

App Files Files Community

Abu1998 commited on Dec 9, 2024

Commit

9214aad

verified ·

1 Parent(s): 3f279da

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -94

app.py CHANGED Viewed

@@ -1,38 +1,51 @@
-import streamlit as st
 import pandas as pd
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
-import time
-# Streamlit UI
-st.title("YouTube Video Comments Extractor")
-# User input for API key and search query
-api_key = st.text_input("Enter your YouTube API Key", type="password")
-search_query = st.text_input("Enter the Search Query (e.g., MrBeast)")
-num_videos = st.number_input("Number of Videos to Scrape", min_value=1, max_value=50, step=1)
-# Function to search for videos based on a query
-def search_videos(api_key, query, max_results):
     youtube = build('youtube', 'v3', developerKey=api_key)
-    response = youtube.search().list(
-        part='snippet',
-        q=query,
-        type='video',
-        maxResults=max_results
-    ).execute()
     videos = []
-    for item in response['items']:
-        videos.append({
-            'video_id': item['id']['videoId'],
-            'title': item['snippet']['title']
-        })
-    return videos
-# Function to extract comments from a video
-def get_video_comments(api_key, video_id):
     youtube = build('youtube', 'v3', developerKey=api_key)
     comments = []
     next_page_token = None
@@ -48,79 +61,108 @@ def get_video_comments(api_key, video_id):
             for item in response['items']:
                 comment = item['snippet']['topLevelComment']['snippet']
-                comments.append({
-                    'VideoID': video_id,
-                    'Channel': comment.get('authorChannelUrl', ''),
-                    'CommentedDateTime': comment['publishedAt'],
-                    'NumOfCommentlikes': comment['likeCount'],
-                    'Comment': comment['textDisplay'],
-                    'CommentedUserID': comment['authorChannelId']['value']
-                })
-                # Handle replies (if any)
-                if 'replies' in item:
-                    for reply in item['replies']['comments']:
-                        reply_snippet = reply['snippet']
-                        comments.append({
-                            'VideoID': video_id,
-                            'Channel': reply_snippet.get('authorChannelUrl', ''),
-                            'CommentedDateTime': reply_snippet['publishedAt'],
-                            'NumOfCommentlikes': reply_snippet['likeCount'],
-                            'Comment': reply_snippet['textDisplay'],
-                            'CommentedUserID': reply_snippet['authorChannelId']['value']
-                        })
             next_page_token = response.get('nextPageToken')
             if not next_page_token:
                 break
         except HttpError as e:
-            st.error(f"An error occurred while fetching comments: {e}")
-            break
-    return comments
-# Action button
-if st.button("Start Scraping"):
-    if api_key and search_query and num_videos:
-        st.text("Searching for videos...")
-        videos = search_videos(api_key, search_query, num_videos)
-        if not videos:
-            st.warning("No videos found for the given query.")
-        else:
-            st.text(f"Found {len(videos)} videos. Extracting comments...")
-            all_comments = []
-            progress_bar = st.progress(0)
-            for idx, video in enumerate(videos):
-                video_id = video['video_id']
-                st.text(f"Fetching comments for video: {video['title']} (ID: {video_id})")
-                video_comments = get_video_comments(api_key, video_id)
-                all_comments.extend(video_comments)
-                # Update progress bar
-                progress = (idx + 1) / len(videos)
-                progress_bar.progress(progress)
-            # Save all comments to a CSV file
-            if all_comments:
-                df_comments = pd.DataFrame(all_comments)
-                csv_file = f"{search_query.replace(' ', '_')}_comments.csv"
-                df_comments.to_csv(csv_file, index=False)
-                st.success(f"Comments extracted and saved to {csv_file}")
-                # Show dataframe details
-                st.write("First 5 rows of the extracted comments:")
-                st.dataframe(df_comments.head())
-                st.download_button(
-                    label="Download CSV",
-                    data=df_comments.to_csv(index=False),
-                    file_name=csv_file,
-                    mime='text/csv'
-                )
-            else:
-                st.warning("No comments found for the selected videos.")
     else:
-        st.warning("Please enter your API key, search query, and number of videos.")

+import gradio as gr
 import pandas as pd
 from googleapiclient.discovery import build
 from googleapiclient.errors import HttpError
+# Function to get the channel ID from the channel name
+def get_channel_id(api_key, channel_name):
     youtube = build('youtube', 'v3', developerKey=api_key)
+    try:
+        response = youtube.search().list(
+            part='snippet',
+            q=channel_name,
+            type='channel',
+            maxResults=1
+        ).execute()
+        if response['items']:
+            return response['items'][0]['id']['channelId']
+        else:
+            return None, "No channel found with that name."
+    except HttpError as e:
+        return None, f"Error fetching channel ID: {e}"
+# Function to fetch videos from a channel
+def get_channel_videos(api_key, channel_id, max_results):
+    youtube = build('youtube', 'v3', developerKey=api_key)
     videos = []
+    try:
+        response = youtube.search().list(
+            part='snippet',
+            channelId=channel_id,
+            maxResults=max_results,
+            type='video',
+            order='date'
+        ).execute()
+        for item in response['items']:
+            videos.append({
+                'video_id': item['id']['videoId'],
+                'title': item['snippet']['title']
+            })
+    except HttpError as e:
+        return None, f"Error fetching videos: {e}"
+    return videos, None
+# Function to fetch comments from a video
+def get_video_comments(api_key, video_id, filters):
     youtube = build('youtube', 'v3', developerKey=api_key)
     comments = []
     next_page_token = None
             for item in response['items']:
                 comment = item['snippet']['topLevelComment']['snippet']
+                published_at = comment['publishedAt']
+                like_count = comment['likeCount']
+                reply_count = item['snippet']['totalReplyCount']
+                # Apply filters
+                if (
+                    like_count >= filters['likes'] and
+                    reply_count >= filters['replies'] and
+                    (not filters['date'] or published_at >= filters['date'])
+                ):
+                    comments.append({
+                        'VideoID': video_id,
+                        'Channel': comment.get('authorChannelUrl', ''),
+                        'CommentedDateTime': published_at,
+                        'NumOfCommentLikes': like_count,
+                        'NumOfReplies': reply_count,
+                        'Comment': comment['textDisplay'],
+                        'CommentedUserID': comment['authorChannelId']['value']
+                    })
+                    # Handle replies (if any)
+                    if 'replies' in item:
+                        for reply in item['replies']['comments']:
+                            reply_snippet = reply['snippet']
+                            comments.append({
+                                'VideoID': video_id,
+                                'Channel': reply_snippet.get('authorChannelUrl', ''),
+                                'CommentedDateTime': reply_snippet['publishedAt'],
+                                'NumOfCommentLikes': reply_snippet['likeCount'],
+                                'NumOfReplies': 0,
+                                'Comment': reply_snippet['textDisplay'],
+                                'CommentedUserID': reply_snippet['authorChannelId']['value']
+                            })
             next_page_token = response.get('nextPageToken')
             if not next_page_token:
                 break
         except HttpError as e:
+            return None, f"Error fetching comments: {e}"
+    return comments, None
+# Main function
+def scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
+    if not api_key or not channel_name_or_id:
+        return None, "API key and channel name/ID are required."
+    if "UC" in channel_name_or_id:
+        channel_id = channel_name_or_id
+        error = None
+    else:
+        channel_id, error = get_channel_id(api_key, channel_name_or_id)
+    if error:
+        return None, error
+    videos, error = get_channel_videos(api_key, channel_id, num_videos)
+    if error:
+        return None, error
+    all_comments = []
+    filters = {
+        'likes': min_likes,
+        'replies': min_replies,
+        'date': filter_date if filter_date else None
+    }
+    for video in videos:
+        video_comments, error = get_video_comments(api_key, video['video_id'], filters)
+        if error:
+            return None, error
+        all_comments.extend(video_comments)
+    if all_comments:
+        df_comments = pd.DataFrame(all_comments)
+        csv_data = df_comments.to_csv(index=False)
+        return csv_data, None
     else:
+        return None, "No comments found for the selected videos."
+# Gradio Interface
+with gr.Blocks() as demo:
+    gr.Markdown("### YouTube Comments Extractor")
+    api_key = gr.Textbox(label="YouTube API Key", type="password")
+    channel_name_or_id = gr.Textbox(label="Channel Name or ID")
+    num_videos = gr.Slider(label="Number of Videos to Scrape", minimum=1, maximum=50, step=1, value=10)
+    min_likes = gr.Slider(label="Minimum Likes on Comments", minimum=0, maximum=100, step=1, value=0)
+    min_replies = gr.Slider(label="Minimum Replies on Comments", minimum=0, maximum=50, step=1, value=0)
+    filter_date = gr.Textbox(label="Filter Comments After This Date (YYYY-MM-DD, Optional)")
+    output_file = gr.File(label="Download Extracted Comments as CSV")
+    error_msg = gr.Textbox(label="Error Message", interactive=False)
+    def main(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date):
+        csv_data, error = scrape_youtube_comments(api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date)
+        if error:
+            return None, error
+        else:
+            return csv_data, None
+    extract_button = gr.Button("Extract Comments")
+    extract_button.click(main, [api_key, channel_name_or_id, num_videos, min_likes, min_replies, filter_date], [output_file, error_msg])
+demo.launch()