Prathamesh Sarjerao Vaidya
commited on
Commit
Β·
7d36e8b
1
Parent(s):
3e27995
fixed upload_to_drive & live visuals& results in full mode
Browse files- .github/workflows/main.yml +3 -0
- .github/workflows/scripts/upload_to_drive.py +259 -0
- templates/index.html +4 -1
- web_app.py +8 -5
.github/workflows/main.yml
CHANGED
|
@@ -38,7 +38,10 @@ jobs:
|
|
| 38 |
|
| 39 |
- name: Upload to Google Drive
|
| 40 |
env:
|
|
|
|
| 41 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
|
|
|
|
|
|
| 42 |
run: python .github/workflows/scripts/upload_to_drive.py
|
| 43 |
|
| 44 |
- name: Push to Hugging Face hub
|
|
|
|
| 38 |
|
| 39 |
- name: Upload to Google Drive
|
| 40 |
env:
|
| 41 |
+
# Primary authentication method (OAuth)
|
| 42 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
| 43 |
+
# Fallback authentication method (Service Account)
|
| 44 |
+
GOOGLE_SERVICE_ACCOUNT_KEY: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_KEY }}
|
| 45 |
run: python .github/workflows/scripts/upload_to_drive.py
|
| 46 |
|
| 47 |
- name: Push to Hugging Face hub
|
.github/workflows/scripts/upload_to_drive.py
CHANGED
|
@@ -133,3 +133,262 @@ print("Starting upload to Google Drive...")
|
|
| 133 |
upload_directory('.', FOLDER_ID, service)
|
| 134 |
|
| 135 |
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
upload_directory('.', FOLDER_ID, service)
|
| 134 |
|
| 135 |
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
| 136 |
+
import os
|
| 137 |
+
import json
|
| 138 |
+
import mimetypes
|
| 139 |
+
from google.oauth2.credentials import Credentials
|
| 140 |
+
from google.oauth2 import service_account
|
| 141 |
+
from google.auth.transport.requests import Request
|
| 142 |
+
from googleapiclient.discovery import build
|
| 143 |
+
from googleapiclient.http import MediaFileUpload
|
| 144 |
+
import sys
|
| 145 |
+
|
| 146 |
+
# Target folder ID - This is where files will be uploaded
|
| 147 |
+
FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
|
| 148 |
+
|
| 149 |
+
def create_oauth_service():
|
| 150 |
+
"""Try to create Google Drive service using OAuth credentials"""
|
| 151 |
+
try:
|
| 152 |
+
print("π Attempting OAuth authentication...")
|
| 153 |
+
|
| 154 |
+
# Load OAuth credentials from environment
|
| 155 |
+
oauth_token_json = os.environ.get('GOOGLE_OAUTH_TOKEN')
|
| 156 |
+
if not oauth_token_json:
|
| 157 |
+
print("β GOOGLE_OAUTH_TOKEN not found in environment")
|
| 158 |
+
return None
|
| 159 |
+
|
| 160 |
+
token_info = json.loads(oauth_token_json)
|
| 161 |
+
|
| 162 |
+
# Create credentials from the token info
|
| 163 |
+
credentials = Credentials.from_authorized_user_info(token_info)
|
| 164 |
+
|
| 165 |
+
# Refresh the token if needed
|
| 166 |
+
if credentials.expired and credentials.refresh_token:
|
| 167 |
+
print("π Token expired, attempting refresh...")
|
| 168 |
+
credentials.refresh(Request())
|
| 169 |
+
print("β
Token refreshed successfully")
|
| 170 |
+
|
| 171 |
+
# Test the credentials by building service
|
| 172 |
+
service = build('drive', 'v3', credentials=credentials)
|
| 173 |
+
|
| 174 |
+
# Test access to the folder
|
| 175 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
| 176 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
| 177 |
+
|
| 178 |
+
print("β
OAuth authentication successful!")
|
| 179 |
+
return service
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"β OAuth authentication failed: {str(e)}")
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
def create_service_account_service():
|
| 186 |
+
"""Try to create Google Drive service using Service Account credentials"""
|
| 187 |
+
try:
|
| 188 |
+
print("π Attempting Service Account authentication...")
|
| 189 |
+
|
| 190 |
+
# Load service account credentials from environment
|
| 191 |
+
service_account_json = os.environ.get('GOOGLE_SERVICE_ACCOUNT_KEY')
|
| 192 |
+
if not service_account_json:
|
| 193 |
+
print("β GOOGLE_SERVICE_ACCOUNT_KEY not found in environment")
|
| 194 |
+
return None
|
| 195 |
+
|
| 196 |
+
credentials_info = json.loads(service_account_json)
|
| 197 |
+
|
| 198 |
+
# Create credentials from service account
|
| 199 |
+
credentials = service_account.Credentials.from_service_account_info(
|
| 200 |
+
credentials_info,
|
| 201 |
+
scopes=['https://www.googleapis.com/auth/drive.file']
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Build the Drive service
|
| 205 |
+
service = build('drive', 'v3', credentials=credentials)
|
| 206 |
+
|
| 207 |
+
# Test access to the folder
|
| 208 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
| 209 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
| 210 |
+
|
| 211 |
+
print("β
Service Account authentication successful!")
|
| 212 |
+
return service
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
print(f"β Service Account authentication failed: {str(e)}")
|
| 216 |
+
return None
|
| 217 |
+
|
| 218 |
+
def get_drive_service():
|
| 219 |
+
"""Get Google Drive service with fallback authentication"""
|
| 220 |
+
print("π Initializing Google Drive authentication with fallback...")
|
| 221 |
+
|
| 222 |
+
# Try OAuth first
|
| 223 |
+
service = create_oauth_service()
|
| 224 |
+
if service:
|
| 225 |
+
return service, "OAuth"
|
| 226 |
+
|
| 227 |
+
print("π OAuth failed, trying Service Account fallback...")
|
| 228 |
+
|
| 229 |
+
# Fallback to Service Account
|
| 230 |
+
service = create_service_account_service()
|
| 231 |
+
if service:
|
| 232 |
+
return service, "Service Account"
|
| 233 |
+
|
| 234 |
+
# Both methods failed
|
| 235 |
+
print("π₯ Both authentication methods failed!")
|
| 236 |
+
print("\nPlease ensure you have either:")
|
| 237 |
+
print("1. GOOGLE_OAUTH_TOKEN secret set with valid OAuth credentials, OR")
|
| 238 |
+
print("2. GOOGLE_SERVICE_ACCOUNT_KEY secret set with service account JSON")
|
| 239 |
+
print("\nFor Service Account:")
|
| 240 |
+
print("- Create a service account in Google Cloud Console")
|
| 241 |
+
print("- Share your target folder with the service account email")
|
| 242 |
+
print("- Add the service account JSON as GOOGLE_SERVICE_ACCOUNT_KEY secret")
|
| 243 |
+
|
| 244 |
+
sys.exit(1)
|
| 245 |
+
|
| 246 |
+
def get_mime_type(file_path):
|
| 247 |
+
mime_type, _ = mimetypes.guess_type(file_path)
|
| 248 |
+
return mime_type or 'application/octet-stream'
|
| 249 |
+
|
| 250 |
+
def upload_file(file_path, parent_folder_id, drive_service):
|
| 251 |
+
file_name = os.path.basename(file_path)
|
| 252 |
+
|
| 253 |
+
# Check if file already exists in the specific folder
|
| 254 |
+
query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
|
| 255 |
+
results = drive_service.files().list(q=query).execute()
|
| 256 |
+
items = results.get('files', [])
|
| 257 |
+
|
| 258 |
+
media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
|
| 259 |
+
|
| 260 |
+
if items:
|
| 261 |
+
# Update existing file
|
| 262 |
+
file_id = items[0]['id']
|
| 263 |
+
updated_file = drive_service.files().update(
|
| 264 |
+
fileId=file_id,
|
| 265 |
+
media_body=media
|
| 266 |
+
).execute()
|
| 267 |
+
print(f'π Updated: {file_name} (ID: {updated_file.get("id")})')
|
| 268 |
+
else:
|
| 269 |
+
# Create new file
|
| 270 |
+
file_metadata = {
|
| 271 |
+
'name': file_name,
|
| 272 |
+
'parents': [parent_folder_id]
|
| 273 |
+
}
|
| 274 |
+
file = drive_service.files().create(
|
| 275 |
+
body=file_metadata,
|
| 276 |
+
media_body=media,
|
| 277 |
+
fields='id'
|
| 278 |
+
).execute()
|
| 279 |
+
print(f'π€ Uploaded: {file_name} (ID: {file.get("id")})')
|
| 280 |
+
|
| 281 |
+
def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
|
| 282 |
+
"""Create a folder if it doesn't exist and return its ID"""
|
| 283 |
+
query = (
|
| 284 |
+
f"name='{folder_name}' and '{parent_folder_id}' in parents and "
|
| 285 |
+
f"mimeType='application/vnd.google-apps.folder' and trashed=false"
|
| 286 |
+
)
|
| 287 |
+
results = drive_service.files().list(q=query).execute()
|
| 288 |
+
items = results.get('files', [])
|
| 289 |
+
|
| 290 |
+
if items:
|
| 291 |
+
return items[0]['id']
|
| 292 |
+
else:
|
| 293 |
+
folder_metadata = {
|
| 294 |
+
'name': folder_name,
|
| 295 |
+
'parents': [parent_folder_id],
|
| 296 |
+
'mimeType': 'application/vnd.google-apps.folder'
|
| 297 |
+
}
|
| 298 |
+
folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
|
| 299 |
+
print(f'π Created folder: {folder_name} (ID: {folder.get("id")})')
|
| 300 |
+
return folder.get('id')
|
| 301 |
+
|
| 302 |
+
def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
|
| 303 |
+
if exclude_dirs is None:
|
| 304 |
+
exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
|
| 305 |
+
if exclude_files is None:
|
| 306 |
+
exclude_files = ['*.md'] # Skip markdown files
|
| 307 |
+
|
| 308 |
+
import fnmatch
|
| 309 |
+
uploaded_count = 0
|
| 310 |
+
skipped_count = 0
|
| 311 |
+
error_count = 0
|
| 312 |
+
|
| 313 |
+
for root, dirs, files in os.walk(local_path):
|
| 314 |
+
# Remove excluded directories
|
| 315 |
+
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
| 316 |
+
|
| 317 |
+
# Calculate relative path from the root
|
| 318 |
+
rel_path = os.path.relpath(root, local_path)
|
| 319 |
+
current_folder_id = parent_folder_id
|
| 320 |
+
|
| 321 |
+
# Create nested folders if needed
|
| 322 |
+
if rel_path != '.':
|
| 323 |
+
path_parts = rel_path.split(os.sep)
|
| 324 |
+
for part in path_parts:
|
| 325 |
+
current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
|
| 326 |
+
|
| 327 |
+
# Upload files in current directory
|
| 328 |
+
for file in files:
|
| 329 |
+
should_skip = False
|
| 330 |
+
for pattern in exclude_files:
|
| 331 |
+
if fnmatch.fnmatch(file, pattern):
|
| 332 |
+
should_skip = True
|
| 333 |
+
break
|
| 334 |
+
|
| 335 |
+
if should_skip:
|
| 336 |
+
print(f'βοΈ Skipping {file} (excluded file type)')
|
| 337 |
+
skipped_count += 1
|
| 338 |
+
continue
|
| 339 |
+
|
| 340 |
+
file_path = os.path.join(root, file)
|
| 341 |
+
try:
|
| 342 |
+
upload_file(file_path, current_folder_id, drive_service)
|
| 343 |
+
uploaded_count += 1
|
| 344 |
+
except Exception as e:
|
| 345 |
+
print(f'β Error uploading {file_path}: {e}')
|
| 346 |
+
error_count += 1
|
| 347 |
+
|
| 348 |
+
return uploaded_count, skipped_count, error_count
|
| 349 |
+
|
| 350 |
+
def main():
|
| 351 |
+
"""Main execution function"""
|
| 352 |
+
print("=" * 60)
|
| 353 |
+
print("π ROBUST GOOGLE DRIVE UPLOADER WITH FALLBACK")
|
| 354 |
+
print("=" * 60)
|
| 355 |
+
|
| 356 |
+
# Get Drive service with fallback authentication
|
| 357 |
+
service, auth_method = get_drive_service()
|
| 358 |
+
|
| 359 |
+
print(f"π Successfully authenticated using: {auth_method}")
|
| 360 |
+
|
| 361 |
+
# Test folder permissions
|
| 362 |
+
try:
|
| 363 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
| 364 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
| 365 |
+
print(f"β
Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
|
| 366 |
+
except Exception as e:
|
| 367 |
+
print(f"π₯ ERROR: Cannot access folder {FOLDER_ID}")
|
| 368 |
+
print(f"Error: {e}")
|
| 369 |
+
if auth_method == "Service Account":
|
| 370 |
+
print("π‘ Make sure to share the folder with the service account email!")
|
| 371 |
+
sys.exit(1)
|
| 372 |
+
|
| 373 |
+
# Upload all files to Google Drive
|
| 374 |
+
print("\nπ€ Starting upload to Google Drive...")
|
| 375 |
+
print("-" * 40)
|
| 376 |
+
|
| 377 |
+
uploaded, skipped, errors = upload_directory('.', FOLDER_ID, service)
|
| 378 |
+
|
| 379 |
+
print("-" * 40)
|
| 380 |
+
print("π UPLOAD SUMMARY:")
|
| 381 |
+
print(f"β
Files uploaded: {uploaded}")
|
| 382 |
+
print(f"βοΈ Files skipped: {skipped}")
|
| 383 |
+
print(f"β Errors: {errors}")
|
| 384 |
+
print(f"π Authentication method: {auth_method}")
|
| 385 |
+
print("=" * 60)
|
| 386 |
+
|
| 387 |
+
if errors > 0:
|
| 388 |
+
print("β οΈ Some files failed to upload. Check the logs above for details.")
|
| 389 |
+
sys.exit(1)
|
| 390 |
+
else:
|
| 391 |
+
print("π Upload completed successfully!")
|
| 392 |
+
|
| 393 |
+
if __name__ == "__main__":
|
| 394 |
+
main()
|
templates/index.html
CHANGED
|
@@ -2152,7 +2152,10 @@
|
|
| 2152 |
const endedListener = () => {
|
| 2153 |
console.log(`βΉοΈ ${mode} audio ended`);
|
| 2154 |
stopLiveVisualization();
|
| 2155 |
-
|
|
|
|
|
|
|
|
|
|
| 2156 |
};
|
| 2157 |
|
| 2158 |
// Add listeners
|
|
|
|
| 2152 |
const endedListener = () => {
|
| 2153 |
console.log(`βΉοΈ ${mode} audio ended`);
|
| 2154 |
stopLiveVisualization();
|
| 2155 |
+
// Only draw static waveform for demo mode, not for full processing mode
|
| 2156 |
+
// if (mode === 'demo') {
|
| 2157 |
+
// drawStaticWaveform();
|
| 2158 |
+
// }
|
| 2159 |
};
|
| 2160 |
|
| 2161 |
// Add listeners
|
web_app.py
CHANGED
|
@@ -812,6 +812,8 @@ async def get_results(task_id: str):
|
|
| 812 |
# Return actual processed results
|
| 813 |
if task_id in processing_results:
|
| 814 |
results = processing_results[task_id]
|
|
|
|
|
|
|
| 815 |
|
| 816 |
# Convert to the expected format for frontend
|
| 817 |
formatted_results = {
|
|
@@ -878,11 +880,12 @@ async def get_results(task_id: str):
|
|
| 878 |
}
|
| 879 |
}
|
| 880 |
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
|
|
|
| 886 |
|
| 887 |
else:
|
| 888 |
# Fallback if results not found
|
|
|
|
| 812 |
# Return actual processed results
|
| 813 |
if task_id in processing_results:
|
| 814 |
results = processing_results[task_id]
|
| 815 |
+
logger.info(f"π Found results for task {task_id}: {type(results)}")
|
| 816 |
+
logger.info(f"π Results keys: {list(results.keys()) if isinstance(results, dict) else 'Not a dict'}")
|
| 817 |
|
| 818 |
# Convert to the expected format for frontend
|
| 819 |
formatted_results = {
|
|
|
|
| 880 |
}
|
| 881 |
}
|
| 882 |
|
| 883 |
+
logger.info(f"π€ Returning formatted results for task {task_id}: {len(formatted_results.get('segments', []))} segments")
|
| 884 |
+
return JSONResponse({
|
| 885 |
+
"task_id": task_id,
|
| 886 |
+
"status": "complete",
|
| 887 |
+
"results": formatted_results
|
| 888 |
+
})
|
| 889 |
|
| 890 |
else:
|
| 891 |
# Fallback if results not found
|