Spaces:

tushifire
/

Youtube_data

Runtime error

App Files Files Community

tushifire commited on Oct 5, 2022

Commit

77b5adc

1 Parent(s): 3e555a8

Increase volume of comments

Browse files

Files changed (1) hide show

app.py +30 -15

app.py CHANGED Viewed

@@ -4,29 +4,44 @@ import googleapiclient.discovery
 import pandas as pd
 import re
 def extract_comments_from_video(video_id,youtube_api_key):
     try:
     	youtube = googleapiclient.discovery.build(
     		api_service_name, api_version, developerKey = youtube_api_key)
-    	request = youtube.commentThreads().list(part = ['id','snippet'],maxResults = 100,videoId = video_id)
-    	response = request.execute()
     except:
         print("An exception occurred")
         return pd.DataFrame()
-    comments_df = pd.json_normalize(response['items'],sep='_')
-    try:
-        comments_df=comments_df[['snippet_topLevelComment_snippet_textDisplay',
-           'snippet_topLevelComment_snippet_textOriginal',
-           'snippet_topLevelComment_snippet_viewerRating',
-           'snippet_topLevelComment_snippet_likeCount',
-           'snippet_topLevelComment_snippet_publishedAt',
-           'snippet_topLevelComment_snippet_updatedAt',
-           'snippet_totalReplyCount']]
-    except:
-        print("An exception occurred Key error")
-        return pd.DataFrame()
     return comments_df

 import pandas as pd
 import re
+pd.set_option("display.max_colwidth", -1)
+def extract_all_comments(video_id,page_token='',comments_list =[]):
+    request = youtube.commentThreads().list(part = ['id','snippet'],
+                                      maxResults = 100,videoId = video_id ,pageToken= page_token)
+    response = request.execute()
+    for comment_details in response['items']:
+        text_dsiplay = comment_details.get('snippet').get('topLevelComment').get('snippet').get('textDisplay')
+        text_original = comment_details.get('snippet').get('topLevelComment').get('snippet').get('textOriginal')
+        likes = comment_details.get('snippet').get('topLevelComment').get('snippet').get('likeCount')
+        published_at = comment_details.get('snippet').get('topLevelComment').get('snippet').get('publishedAt')
+        updated_at = comment_details.get('snippet').get('topLevelComment').get('snippet').get('updatedAt')
+        reply_count = comment_details.get('snippet').get('totalReplyCount')
+        comments_list.append({'text_dsiplay':text_dsiplay,'text_original':text_original,
+               'likes':likes,'published_at':published_at,'updated_at':updated_at,
+               'reply_count':reply_count})
+    if 'nextPageToken' in response.keys():
+        if len(comments_list) < 500:
+            extract_all_comments(video_id = video_id,page_token= response['nextPageToken'],comments_list= comments_list)
+        else:
+            print("Limiting results for speed up")
+    return comments_list
 def extract_comments_from_video(video_id,youtube_api_key):
     try:
     	youtube = googleapiclient.discovery.build(
     		api_service_name, api_version, developerKey = youtube_api_key)
+    	found_comments = extract_all_comments(video_id,page_token='')
+		comments_df = pd.DataFrame(found_comments)
     except:
         print("An exception occurred")
         return pd.DataFrame()
     return comments_df