Prathamesh Sarjerao Vaidya commited on
Commit
4d857f2
·
1 Parent(s): 65fbbac

made changes

Browse files
.github/workflows/check.yml CHANGED
@@ -97,6 +97,9 @@ jobs:
97
  path: "**/*.pdf"
98
  retention-days: 30
99
 
 
 
 
100
  - name: Upload to Google Drive
101
  env:
102
  GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
 
97
  path: "**/*.pdf"
98
  retention-days: 30
99
 
100
+ - name: Install Google API dependencies
101
+ run: pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
102
+
103
  - name: Upload to Google Drive
104
  env:
105
  GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
.github/workflows/main.yml CHANGED
@@ -75,6 +75,9 @@ jobs:
75
  path: "**/*.pdf"
76
  retention-days: 30
77
 
 
 
 
78
  - name: Upload to Google Drive
79
  env:
80
  # Primary authentication method (OAuth)
 
75
  path: "**/*.pdf"
76
  retention-days: 30
77
 
78
+ - name: Install Google API dependencies
79
+ run: pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
80
+
81
  - name: Upload to Google Drive
82
  env:
83
  # Primary authentication method (OAuth)
.github/workflows/scripts/upload_to_drive.py CHANGED
@@ -1,394 +1,141 @@
 
1
  import os
2
  import json
3
  import mimetypes
4
- from google.oauth2.credentials import Credentials
5
- from google.auth.transport.requests import Request
6
- from googleapiclient.discovery import build
7
- from googleapiclient.http import MediaFileUpload
8
-
9
- # Load OAuth credentials from environment
10
- oauth_token_json = os.environ['GOOGLE_OAUTH_TOKEN']
11
- token_info = json.loads(oauth_token_json)
12
-
13
- # Create credentials from the token info
14
- credentials = Credentials.from_authorized_user_info(token_info)
15
-
16
- # Refresh the token if needed
17
- if credentials.expired and credentials.refresh_token:
18
- credentials.refresh(Request())
19
-
20
- # Build the Drive service
21
- service = build('drive', 'v3', credentials=credentials)
22
-
23
- # Target folder ID - This is where files will be uploaded
24
- FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
25
-
26
- def get_mime_type(file_path):
27
- mime_type, _ = mimetypes.guess_type(file_path)
28
- return mime_type or 'application/octet-stream'
29
-
30
- def upload_file(file_path, parent_folder_id, drive_service):
31
- file_name = os.path.basename(file_path)
32
-
33
- # Check if file already exists in the specific folder
34
- query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
35
- results = drive_service.files().list(q=query).execute()
36
- items = results.get('files', [])
37
-
38
- media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
39
-
40
- if items:
41
- # Update existing file
42
- file_id = items[0]['id']
43
- updated_file = drive_service.files().update(
44
- fileId=file_id,
45
- media_body=media
46
- ).execute()
47
- print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
48
- else:
49
- # Create new file
50
- file_metadata = {
51
- 'name': file_name,
52
- 'parents': [parent_folder_id]
53
- }
54
- file = drive_service.files().create(
55
- body=file_metadata,
56
- media_body=media,
57
- fields='id'
58
- ).execute()
59
- print(f'Uploaded: {file_name} (ID: {file.get("id")})')
60
-
61
- def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
62
- """Create a folder if it doesn't exist and return its ID"""
63
- query = (
64
- f"name='{folder_name}' and '{parent_folder_id}' in parents and "
65
- f"mimeType='application/vnd.google-apps.folder' and trashed=false"
66
- )
67
- results = drive_service.files().list(q=query).execute()
68
- items = results.get('files', [])
69
-
70
- if items:
71
- return items[0]['id']
72
- else:
73
- folder_metadata = {
74
- 'name': folder_name,
75
- 'parents': [parent_folder_id],
76
- 'mimeType': 'application/vnd.google-apps.folder'
77
- }
78
- folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
79
- print(f'Created folder: {folder_name} (ID: {folder.get("id")})')
80
- return folder.get('id')
81
-
82
- def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
83
- if exclude_dirs is None:
84
- exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
85
- if exclude_files is None:
86
- exclude_files = ['*.md'] # Skip markdown files
87
-
88
- import fnmatch
89
-
90
- for root, dirs, files in os.walk(local_path):
91
- # Remove excluded directories
92
- dirs[:] = [d for d in dirs if d not in exclude_dirs]
93
-
94
- # Calculate relative path from the root
95
- rel_path = os.path.relpath(root, local_path)
96
- current_folder_id = parent_folder_id
97
-
98
- # Create nested folders if needed
99
- if rel_path != '.':
100
- path_parts = rel_path.split(os.sep)
101
- for part in path_parts:
102
- current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
103
-
104
- # Upload files in current directory
105
- for file in files:
106
- should_skip = False
107
- for pattern in exclude_files:
108
- if fnmatch.fnmatch(file, pattern):
109
- should_skip = True
110
- break
111
-
112
- if should_skip:
113
- print(f'Skipping {file} (excluded file type)')
114
- continue
115
-
116
- file_path = os.path.join(root, file)
117
- try:
118
- upload_file(file_path, current_folder_id, drive_service)
119
- except Exception as e:
120
- print(f'Error uploading {file_path}: {e}')
121
-
122
- # Test folder permissions first
123
- try:
124
- test_query = f"'{FOLDER_ID}' in parents and trashed=false"
125
- test_results = service.files().list(q=test_query, pageSize=1).execute()
126
- print(f"Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
127
- except Exception as e:
128
- print(f"ERROR: Cannot access folder {FOLDER_ID}. Error: {e}")
129
- exit(1)
130
-
131
- # Upload all files to Google Drive (excluding MD files)
132
- print("Starting upload to Google Drive...")
133
- upload_directory('.', FOLDER_ID, service)
134
-
135
- print("Upload completed - MD files were skipped, PDFs were uploaded!")
136
- import os
137
- import json
138
- import mimetypes
139
  from google.oauth2.credentials import Credentials
140
  from google.oauth2 import service_account
141
  from google.auth.transport.requests import Request
142
  from googleapiclient.discovery import build
143
  from googleapiclient.http import MediaFileUpload
144
- import sys
145
 
146
- # Target folder ID - This is where files will be uploaded
147
  FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
148
 
149
- def create_oauth_service():
150
- """Try to create Google Drive service using OAuth credentials"""
151
- try:
152
- print("🔐 Attempting OAuth authentication...")
153
-
154
- # Load OAuth credentials from environment
155
- oauth_token_json = os.environ.get('GOOGLE_OAUTH_TOKEN')
156
- if not oauth_token_json:
157
- print("❌ GOOGLE_OAUTH_TOKEN not found in environment")
158
- return None
159
-
160
- token_info = json.loads(oauth_token_json)
161
-
162
- # Create credentials from the token info
163
- credentials = Credentials.from_authorized_user_info(token_info)
164
-
165
- # Refresh the token if needed
166
- if credentials.expired and credentials.refresh_token:
167
- print("🔄 Token expired, attempting refresh...")
168
- credentials.refresh(Request())
169
- print("✅ Token refreshed successfully")
170
-
171
- # Test the credentials by building service
172
- service = build('drive', 'v3', credentials=credentials)
173
-
174
- # Test access to the folder
175
- test_query = f"'{FOLDER_ID}' in parents and trashed=false"
176
- test_results = service.files().list(q=test_query, pageSize=1).execute()
177
-
178
- print("✅ OAuth authentication successful!")
179
- return service
180
-
181
- except Exception as e:
182
- print(f"❌ OAuth authentication failed: {str(e)}")
183
- return None
184
-
185
- def create_service_account_service():
186
- """Try to create Google Drive service using Service Account credentials"""
187
- try:
188
- print("🔐 Attempting Service Account authentication...")
189
-
190
- # Load service account credentials from environment
191
- service_account_json = os.environ.get('GOOGLE_SERVICE_ACCOUNT_KEY')
192
- if not service_account_json:
193
- print("❌ GOOGLE_SERVICE_ACCOUNT_KEY not found in environment")
194
- return None
195
-
196
- credentials_info = json.loads(service_account_json)
197
-
198
- # Create credentials from service account
199
- credentials = service_account.Credentials.from_service_account_info(
200
- credentials_info,
201
- scopes=['https://www.googleapis.com/auth/drive.file']
202
- )
203
-
204
- # Build the Drive service
205
- service = build('drive', 'v3', credentials=credentials)
206
-
207
- # Test access to the folder
208
- test_query = f"'{FOLDER_ID}' in parents and trashed=false"
209
- test_results = service.files().list(q=test_query, pageSize=1).execute()
210
-
211
- print("✅ Service Account authentication successful!")
212
- return service
213
-
214
- except Exception as e:
215
- print(f"❌ Service Account authentication failed: {str(e)}")
216
- return None
217
-
218
  def get_drive_service():
219
- """Get Google Drive service with fallback authentication"""
220
- print("🚀 Initializing Google Drive authentication with fallback...")
221
-
222
  # Try OAuth first
223
- service = create_oauth_service()
224
- if service:
225
- return service, "OAuth"
226
-
227
- print("🔄 OAuth failed, trying Service Account fallback...")
 
 
 
 
 
228
 
229
  # Fallback to Service Account
230
- service = create_service_account_service()
231
- if service:
232
- return service, "Service Account"
233
-
234
- # Both methods failed
235
- print("💥 Both authentication methods failed!")
236
- print("\nPlease ensure you have either:")
237
- print("1. GOOGLE_OAUTH_TOKEN secret set with valid OAuth credentials, OR")
238
- print("2. GOOGLE_SERVICE_ACCOUNT_KEY secret set with service account JSON")
239
- print("\nFor Service Account:")
240
- print("- Create a service account in Google Cloud Console")
241
- print("- Share your target folder with the service account email")
242
- print("- Add the service account JSON as GOOGLE_SERVICE_ACCOUNT_KEY secret")
243
 
 
244
  sys.exit(1)
245
 
246
- def get_mime_type(file_path):
247
- mime_type, _ = mimetypes.guess_type(file_path)
248
- return mime_type or 'application/octet-stream'
249
-
250
- def upload_file(file_path, parent_folder_id, drive_service):
251
  file_name = os.path.basename(file_path)
252
-
253
- # Check if file already exists in the specific folder
254
  query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
255
- results = drive_service.files().list(q=query).execute()
256
  items = results.get('files', [])
257
-
258
- media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
259
-
260
  if items:
261
- # Update existing file
262
- file_id = items[0]['id']
263
- updated_file = drive_service.files().update(
264
- fileId=file_id,
265
- media_body=media
266
- ).execute()
267
- print(f'📝 Updated: {file_name} (ID: {updated_file.get("id")})')
268
  else:
269
- # Create new file
270
- file_metadata = {
271
- 'name': file_name,
272
- 'parents': [parent_folder_id]
273
- }
274
- file = drive_service.files().create(
275
- body=file_metadata,
276
- media_body=media,
277
- fields='id'
278
- ).execute()
279
- print(f'📤 Uploaded: {file_name} (ID: {file.get("id")})')
280
-
281
- def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
282
- """Create a folder if it doesn't exist and return its ID"""
283
- query = (
284
- f"name='{folder_name}' and '{parent_folder_id}' in parents and "
285
- f"mimeType='application/vnd.google-apps.folder' and trashed=false"
286
- )
287
- results = drive_service.files().list(q=query).execute()
288
  items = results.get('files', [])
289
-
290
  if items:
291
  return items[0]['id']
292
- else:
293
- folder_metadata = {
294
- 'name': folder_name,
295
- 'parents': [parent_folder_id],
296
- 'mimeType': 'application/vnd.google-apps.folder'
297
- }
298
- folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
299
- print(f'📁 Created folder: {folder_name} (ID: {folder.get("id")})')
300
- return folder.get('id')
301
-
302
- def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
303
- if exclude_dirs is None:
304
- exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
305
- if exclude_files is None:
306
- exclude_files = ['*.md'] # Skip markdown files
307
-
308
- import fnmatch
309
- uploaded_count = 0
310
- skipped_count = 0
311
- error_count = 0
312
-
313
  for root, dirs, files in os.walk(local_path):
314
- # Remove excluded directories
315
  dirs[:] = [d for d in dirs if d not in exclude_dirs]
316
-
317
- # Calculate relative path from the root
318
  rel_path = os.path.relpath(root, local_path)
319
- current_folder_id = parent_folder_id
320
-
321
- # Create nested folders if needed
322
  if rel_path != '.':
323
- path_parts = rel_path.split(os.sep)
324
- for part in path_parts:
325
- current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
326
-
327
- # Upload files in current directory
328
  for file in files:
329
- should_skip = False
330
- for pattern in exclude_files:
331
- if fnmatch.fnmatch(file, pattern):
332
- should_skip = True
333
- break
334
-
335
- if should_skip:
336
- print(f'⏭️ Skipping {file} (excluded file type)')
337
- skipped_count += 1
338
  continue
339
-
340
- file_path = os.path.join(root, file)
341
  try:
342
- upload_file(file_path, current_folder_id, drive_service)
343
- uploaded_count += 1
344
  except Exception as e:
345
- print(f'Error uploading {file_path}: {e}')
346
- error_count += 1
347
-
348
- return uploaded_count, skipped_count, error_count
349
 
350
  def main():
351
- """Main execution function"""
352
- print("=" * 60)
353
- print("🚀 ROBUST GOOGLE DRIVE UPLOADER WITH FALLBACK")
354
- print("=" * 60)
355
 
356
- # Get Drive service with fallback authentication
357
- service, auth_method = get_drive_service()
358
 
359
- print(f"🎉 Successfully authenticated using: {auth_method}")
360
-
361
- # Test folder permissions
362
  try:
363
- test_query = f"'{FOLDER_ID}' in parents and trashed=false"
364
- test_results = service.files().list(q=test_query, pageSize=1).execute()
365
- print(f"✅ Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
366
  except Exception as e:
367
- print(f"💥 ERROR: Cannot access folder {FOLDER_ID}")
368
- print(f"Error: {e}")
369
- if auth_method == "Service Account":
370
- print("💡 Make sure to share the folder with the service account email!")
371
  sys.exit(1)
372
-
373
- # Upload all files to Google Drive
374
- print("\n📤 Starting upload to Google Drive...")
375
- print("-" * 40)
376
 
 
377
  uploaded, skipped, errors = upload_directory('.', FOLDER_ID, service)
378
 
379
- print("-" * 40)
380
- print("📊 UPLOAD SUMMARY:")
381
- print(f"✅ Files uploaded: {uploaded}")
382
- print(f"⏭️ Files skipped: {skipped}")
383
- print(f"❌ Errors: {errors}")
384
- print(f"🔐 Authentication method: {auth_method}")
385
- print("=" * 60)
386
 
387
  if errors > 0:
388
- print("⚠️ Some files failed to upload. Check the logs above for details.")
389
  sys.exit(1)
390
- else:
391
- print("🎉 Upload completed successfully!")
392
 
393
  if __name__ == "__main__":
394
  main()
 
1
+ #!/usr/bin/env python3
2
  import os
3
  import json
4
  import mimetypes
5
+ import fnmatch
6
+ import sys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from google.oauth2.credentials import Credentials
8
  from google.oauth2 import service_account
9
  from google.auth.transport.requests import Request
10
  from googleapiclient.discovery import build
11
  from googleapiclient.http import MediaFileUpload
 
12
 
 
13
  FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def get_drive_service():
16
+ """Get Google Drive service with OAuth/Service Account fallback"""
 
 
17
  # Try OAuth first
18
+ oauth_token = os.environ.get('GOOGLE_OAUTH_TOKEN')
19
+ if oauth_token:
20
+ try:
21
+ token_info = json.loads(oauth_token)
22
+ credentials = Credentials.from_authorized_user_info(token_info)
23
+ if credentials.expired and credentials.refresh_token:
24
+ credentials.refresh(Request())
25
+ return build('drive', 'v3', credentials=credentials)
26
+ except Exception as e:
27
+ print(f"OAuth failed: {e}")
28
 
29
  # Fallback to Service Account
30
+ sa_key = os.environ.get('GOOGLE_SERVICE_ACCOUNT_KEY')
31
+ if sa_key:
32
+ try:
33
+ credentials_info = json.loads(sa_key)
34
+ credentials = service_account.Credentials.from_service_account_info(
35
+ credentials_info, scopes=['https://www.googleapis.com/auth/drive.file']
36
+ )
37
+ return build('drive', 'v3', credentials=credentials)
38
+ except Exception as e:
39
+ print(f"Service Account failed: {e}")
 
 
 
40
 
41
+ print("Both authentication methods failed!")
42
  sys.exit(1)
43
 
44
+ def upload_file(file_path, parent_folder_id, service):
45
+ """Upload or update a file"""
 
 
 
46
  file_name = os.path.basename(file_path)
47
+
48
+ # Check if file exists
49
  query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
50
+ results = service.files().list(q=query).execute()
51
  items = results.get('files', [])
52
+
53
+ media = MediaFileUpload(file_path, mimetype=mimetypes.guess_type(file_path)[0] or 'application/octet-stream')
54
+
55
  if items:
56
+ # Update existing
57
+ service.files().update(fileId=items[0]['id'], media_body=media).execute()
58
+ print(f'Updated: {file_name}')
 
 
 
 
59
  else:
60
+ # Create new
61
+ file_metadata = {'name': file_name, 'parents': [parent_folder_id]}
62
+ service.files().create(body=file_metadata, media_body=media).execute()
63
+ print(f'Uploaded: {file_name}')
64
+
65
+ def create_folder(folder_name, parent_id, service):
66
+ """Create folder if it doesn't exist"""
67
+ query = f"name='{folder_name}' and '{parent_id}' in parents and mimeType='application/vnd.google-apps.folder' and trashed=false"
68
+ results = service.files().list(q=query).execute()
 
 
 
 
 
 
 
 
 
 
69
  items = results.get('files', [])
70
+
71
  if items:
72
  return items[0]['id']
73
+
74
+ folder_metadata = {
75
+ 'name': folder_name,
76
+ 'parents': [parent_id],
77
+ 'mimeType': 'application/vnd.google-apps.folder'
78
+ }
79
+ folder = service.files().create(body=folder_metadata).execute()
80
+ return folder.get('id')
81
+
82
+ def upload_directory(local_path, parent_id, service):
83
+ """Upload directory with exclusions"""
84
+ exclude_dirs = {'.git', '.github', 'node_modules', '__pycache__'}
85
+ exclude_patterns = ['*.md'] # Skip markdown files
86
+
87
+ uploaded = skipped = errors = 0
88
+
 
 
 
 
 
89
  for root, dirs, files in os.walk(local_path):
90
+ # Filter directories
91
  dirs[:] = [d for d in dirs if d not in exclude_dirs]
92
+
93
+ # Create folder structure
94
  rel_path = os.path.relpath(root, local_path)
95
+ current_folder_id = parent_id
96
+
 
97
  if rel_path != '.':
98
+ for part in rel_path.split(os.sep):
99
+ current_folder_id = create_folder(part, current_folder_id, service)
100
+
101
+ # Upload files
 
102
  for file in files:
103
+ # Check exclusions
104
+ if any(fnmatch.fnmatch(file, pattern) for pattern in exclude_patterns):
105
+ print(f'Skipping: {file}')
106
+ skipped += 1
 
 
 
 
 
107
  continue
108
+
 
109
  try:
110
+ upload_file(os.path.join(root, file), current_folder_id, service)
111
+ uploaded += 1
112
  except Exception as e:
113
+ print(f'Error uploading {file}: {e}')
114
+ errors += 1
115
+
116
+ return uploaded, skipped, errors
117
 
118
  def main():
119
+ print("Starting Google Drive upload...")
 
 
 
120
 
121
+ # Get service and test access
122
+ service = get_drive_service()
123
 
 
 
 
124
  try:
125
+ service.files().list(q=f"'{FOLDER_ID}' in parents", pageSize=1).execute()
126
+ print("Folder access confirmed")
 
127
  except Exception as e:
128
+ print(f"Cannot access folder {FOLDER_ID}: {e}")
 
 
 
129
  sys.exit(1)
 
 
 
 
130
 
131
+ # Upload files
132
  uploaded, skipped, errors = upload_directory('.', FOLDER_ID, service)
133
 
134
+ print(f"\nSummary: {uploaded} uploaded, {skipped} skipped, {errors} errors")
 
 
 
 
 
 
135
 
136
  if errors > 0:
 
137
  sys.exit(1)
138
+ print("Upload completed successfully!")
 
139
 
140
  if __name__ == "__main__":
141
  main()