Prathamesh Sarjerao Vaidya
commited on
Commit
Β·
7d36e8b
1
Parent(s):
3e27995
fixed upload_to_drive & live visuals& results in full mode
Browse files- .github/workflows/main.yml +3 -0
- .github/workflows/scripts/upload_to_drive.py +259 -0
- templates/index.html +4 -1
- web_app.py +8 -5
.github/workflows/main.yml
CHANGED
@@ -38,7 +38,10 @@ jobs:
|
|
38 |
|
39 |
- name: Upload to Google Drive
|
40 |
env:
|
|
|
41 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
|
|
|
|
42 |
run: python .github/workflows/scripts/upload_to_drive.py
|
43 |
|
44 |
- name: Push to Hugging Face hub
|
|
|
38 |
|
39 |
- name: Upload to Google Drive
|
40 |
env:
|
41 |
+
# Primary authentication method (OAuth)
|
42 |
GOOGLE_OAUTH_TOKEN: ${{ secrets.GOOGLE_OAUTH_TOKEN }}
|
43 |
+
# Fallback authentication method (Service Account)
|
44 |
+
GOOGLE_SERVICE_ACCOUNT_KEY: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_KEY }}
|
45 |
run: python .github/workflows/scripts/upload_to_drive.py
|
46 |
|
47 |
- name: Push to Hugging Face hub
|
.github/workflows/scripts/upload_to_drive.py
CHANGED
@@ -133,3 +133,262 @@ print("Starting upload to Google Drive...")
|
|
133 |
upload_directory('.', FOLDER_ID, service)
|
134 |
|
135 |
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
upload_directory('.', FOLDER_ID, service)
|
134 |
|
135 |
print("Upload completed - MD files were skipped, PDFs were uploaded!")
|
136 |
+
import os
|
137 |
+
import json
|
138 |
+
import mimetypes
|
139 |
+
from google.oauth2.credentials import Credentials
|
140 |
+
from google.oauth2 import service_account
|
141 |
+
from google.auth.transport.requests import Request
|
142 |
+
from googleapiclient.discovery import build
|
143 |
+
from googleapiclient.http import MediaFileUpload
|
144 |
+
import sys
|
145 |
+
|
146 |
+
# Target folder ID - This is where files will be uploaded
|
147 |
+
FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
|
148 |
+
|
149 |
+
def create_oauth_service():
|
150 |
+
"""Try to create Google Drive service using OAuth credentials"""
|
151 |
+
try:
|
152 |
+
print("π Attempting OAuth authentication...")
|
153 |
+
|
154 |
+
# Load OAuth credentials from environment
|
155 |
+
oauth_token_json = os.environ.get('GOOGLE_OAUTH_TOKEN')
|
156 |
+
if not oauth_token_json:
|
157 |
+
print("β GOOGLE_OAUTH_TOKEN not found in environment")
|
158 |
+
return None
|
159 |
+
|
160 |
+
token_info = json.loads(oauth_token_json)
|
161 |
+
|
162 |
+
# Create credentials from the token info
|
163 |
+
credentials = Credentials.from_authorized_user_info(token_info)
|
164 |
+
|
165 |
+
# Refresh the token if needed
|
166 |
+
if credentials.expired and credentials.refresh_token:
|
167 |
+
print("π Token expired, attempting refresh...")
|
168 |
+
credentials.refresh(Request())
|
169 |
+
print("β
Token refreshed successfully")
|
170 |
+
|
171 |
+
# Test the credentials by building service
|
172 |
+
service = build('drive', 'v3', credentials=credentials)
|
173 |
+
|
174 |
+
# Test access to the folder
|
175 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
176 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
177 |
+
|
178 |
+
print("β
OAuth authentication successful!")
|
179 |
+
return service
|
180 |
+
|
181 |
+
except Exception as e:
|
182 |
+
print(f"β OAuth authentication failed: {str(e)}")
|
183 |
+
return None
|
184 |
+
|
185 |
+
def create_service_account_service():
|
186 |
+
"""Try to create Google Drive service using Service Account credentials"""
|
187 |
+
try:
|
188 |
+
print("π Attempting Service Account authentication...")
|
189 |
+
|
190 |
+
# Load service account credentials from environment
|
191 |
+
service_account_json = os.environ.get('GOOGLE_SERVICE_ACCOUNT_KEY')
|
192 |
+
if not service_account_json:
|
193 |
+
print("β GOOGLE_SERVICE_ACCOUNT_KEY not found in environment")
|
194 |
+
return None
|
195 |
+
|
196 |
+
credentials_info = json.loads(service_account_json)
|
197 |
+
|
198 |
+
# Create credentials from service account
|
199 |
+
credentials = service_account.Credentials.from_service_account_info(
|
200 |
+
credentials_info,
|
201 |
+
scopes=['https://www.googleapis.com/auth/drive.file']
|
202 |
+
)
|
203 |
+
|
204 |
+
# Build the Drive service
|
205 |
+
service = build('drive', 'v3', credentials=credentials)
|
206 |
+
|
207 |
+
# Test access to the folder
|
208 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
209 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
210 |
+
|
211 |
+
print("β
Service Account authentication successful!")
|
212 |
+
return service
|
213 |
+
|
214 |
+
except Exception as e:
|
215 |
+
print(f"β Service Account authentication failed: {str(e)}")
|
216 |
+
return None
|
217 |
+
|
218 |
+
def get_drive_service():
|
219 |
+
"""Get Google Drive service with fallback authentication"""
|
220 |
+
print("π Initializing Google Drive authentication with fallback...")
|
221 |
+
|
222 |
+
# Try OAuth first
|
223 |
+
service = create_oauth_service()
|
224 |
+
if service:
|
225 |
+
return service, "OAuth"
|
226 |
+
|
227 |
+
print("π OAuth failed, trying Service Account fallback...")
|
228 |
+
|
229 |
+
# Fallback to Service Account
|
230 |
+
service = create_service_account_service()
|
231 |
+
if service:
|
232 |
+
return service, "Service Account"
|
233 |
+
|
234 |
+
# Both methods failed
|
235 |
+
print("π₯ Both authentication methods failed!")
|
236 |
+
print("\nPlease ensure you have either:")
|
237 |
+
print("1. GOOGLE_OAUTH_TOKEN secret set with valid OAuth credentials, OR")
|
238 |
+
print("2. GOOGLE_SERVICE_ACCOUNT_KEY secret set with service account JSON")
|
239 |
+
print("\nFor Service Account:")
|
240 |
+
print("- Create a service account in Google Cloud Console")
|
241 |
+
print("- Share your target folder with the service account email")
|
242 |
+
print("- Add the service account JSON as GOOGLE_SERVICE_ACCOUNT_KEY secret")
|
243 |
+
|
244 |
+
sys.exit(1)
|
245 |
+
|
246 |
+
def get_mime_type(file_path):
|
247 |
+
mime_type, _ = mimetypes.guess_type(file_path)
|
248 |
+
return mime_type or 'application/octet-stream'
|
249 |
+
|
250 |
+
def upload_file(file_path, parent_folder_id, drive_service):
|
251 |
+
file_name = os.path.basename(file_path)
|
252 |
+
|
253 |
+
# Check if file already exists in the specific folder
|
254 |
+
query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
|
255 |
+
results = drive_service.files().list(q=query).execute()
|
256 |
+
items = results.get('files', [])
|
257 |
+
|
258 |
+
media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
|
259 |
+
|
260 |
+
if items:
|
261 |
+
# Update existing file
|
262 |
+
file_id = items[0]['id']
|
263 |
+
updated_file = drive_service.files().update(
|
264 |
+
fileId=file_id,
|
265 |
+
media_body=media
|
266 |
+
).execute()
|
267 |
+
print(f'π Updated: {file_name} (ID: {updated_file.get("id")})')
|
268 |
+
else:
|
269 |
+
# Create new file
|
270 |
+
file_metadata = {
|
271 |
+
'name': file_name,
|
272 |
+
'parents': [parent_folder_id]
|
273 |
+
}
|
274 |
+
file = drive_service.files().create(
|
275 |
+
body=file_metadata,
|
276 |
+
media_body=media,
|
277 |
+
fields='id'
|
278 |
+
).execute()
|
279 |
+
print(f'π€ Uploaded: {file_name} (ID: {file.get("id")})')
|
280 |
+
|
281 |
+
def create_folder_if_not_exists(folder_name, parent_folder_id, drive_service):
|
282 |
+
"""Create a folder if it doesn't exist and return its ID"""
|
283 |
+
query = (
|
284 |
+
f"name='{folder_name}' and '{parent_folder_id}' in parents and "
|
285 |
+
f"mimeType='application/vnd.google-apps.folder' and trashed=false"
|
286 |
+
)
|
287 |
+
results = drive_service.files().list(q=query).execute()
|
288 |
+
items = results.get('files', [])
|
289 |
+
|
290 |
+
if items:
|
291 |
+
return items[0]['id']
|
292 |
+
else:
|
293 |
+
folder_metadata = {
|
294 |
+
'name': folder_name,
|
295 |
+
'parents': [parent_folder_id],
|
296 |
+
'mimeType': 'application/vnd.google-apps.folder'
|
297 |
+
}
|
298 |
+
folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
|
299 |
+
print(f'π Created folder: {folder_name} (ID: {folder.get("id")})')
|
300 |
+
return folder.get('id')
|
301 |
+
|
302 |
+
def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None, exclude_files=None):
|
303 |
+
if exclude_dirs is None:
|
304 |
+
exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
|
305 |
+
if exclude_files is None:
|
306 |
+
exclude_files = ['*.md'] # Skip markdown files
|
307 |
+
|
308 |
+
import fnmatch
|
309 |
+
uploaded_count = 0
|
310 |
+
skipped_count = 0
|
311 |
+
error_count = 0
|
312 |
+
|
313 |
+
for root, dirs, files in os.walk(local_path):
|
314 |
+
# Remove excluded directories
|
315 |
+
dirs[:] = [d for d in dirs if d not in exclude_dirs]
|
316 |
+
|
317 |
+
# Calculate relative path from the root
|
318 |
+
rel_path = os.path.relpath(root, local_path)
|
319 |
+
current_folder_id = parent_folder_id
|
320 |
+
|
321 |
+
# Create nested folders if needed
|
322 |
+
if rel_path != '.':
|
323 |
+
path_parts = rel_path.split(os.sep)
|
324 |
+
for part in path_parts:
|
325 |
+
current_folder_id = create_folder_if_not_exists(part, current_folder_id, drive_service)
|
326 |
+
|
327 |
+
# Upload files in current directory
|
328 |
+
for file in files:
|
329 |
+
should_skip = False
|
330 |
+
for pattern in exclude_files:
|
331 |
+
if fnmatch.fnmatch(file, pattern):
|
332 |
+
should_skip = True
|
333 |
+
break
|
334 |
+
|
335 |
+
if should_skip:
|
336 |
+
print(f'βοΈ Skipping {file} (excluded file type)')
|
337 |
+
skipped_count += 1
|
338 |
+
continue
|
339 |
+
|
340 |
+
file_path = os.path.join(root, file)
|
341 |
+
try:
|
342 |
+
upload_file(file_path, current_folder_id, drive_service)
|
343 |
+
uploaded_count += 1
|
344 |
+
except Exception as e:
|
345 |
+
print(f'β Error uploading {file_path}: {e}')
|
346 |
+
error_count += 1
|
347 |
+
|
348 |
+
return uploaded_count, skipped_count, error_count
|
349 |
+
|
350 |
+
def main():
|
351 |
+
"""Main execution function"""
|
352 |
+
print("=" * 60)
|
353 |
+
print("π ROBUST GOOGLE DRIVE UPLOADER WITH FALLBACK")
|
354 |
+
print("=" * 60)
|
355 |
+
|
356 |
+
# Get Drive service with fallback authentication
|
357 |
+
service, auth_method = get_drive_service()
|
358 |
+
|
359 |
+
print(f"π Successfully authenticated using: {auth_method}")
|
360 |
+
|
361 |
+
# Test folder permissions
|
362 |
+
try:
|
363 |
+
test_query = f"'{FOLDER_ID}' in parents and trashed=false"
|
364 |
+
test_results = service.files().list(q=test_query, pageSize=1).execute()
|
365 |
+
print(f"β
Successfully accessed folder. Found {len(test_results.get('files', []))} items (showing 1 max)")
|
366 |
+
except Exception as e:
|
367 |
+
print(f"π₯ ERROR: Cannot access folder {FOLDER_ID}")
|
368 |
+
print(f"Error: {e}")
|
369 |
+
if auth_method == "Service Account":
|
370 |
+
print("π‘ Make sure to share the folder with the service account email!")
|
371 |
+
sys.exit(1)
|
372 |
+
|
373 |
+
# Upload all files to Google Drive
|
374 |
+
print("\nπ€ Starting upload to Google Drive...")
|
375 |
+
print("-" * 40)
|
376 |
+
|
377 |
+
uploaded, skipped, errors = upload_directory('.', FOLDER_ID, service)
|
378 |
+
|
379 |
+
print("-" * 40)
|
380 |
+
print("π UPLOAD SUMMARY:")
|
381 |
+
print(f"β
Files uploaded: {uploaded}")
|
382 |
+
print(f"βοΈ Files skipped: {skipped}")
|
383 |
+
print(f"β Errors: {errors}")
|
384 |
+
print(f"π Authentication method: {auth_method}")
|
385 |
+
print("=" * 60)
|
386 |
+
|
387 |
+
if errors > 0:
|
388 |
+
print("β οΈ Some files failed to upload. Check the logs above for details.")
|
389 |
+
sys.exit(1)
|
390 |
+
else:
|
391 |
+
print("π Upload completed successfully!")
|
392 |
+
|
393 |
+
if __name__ == "__main__":
|
394 |
+
main()
|
templates/index.html
CHANGED
@@ -2152,7 +2152,10 @@
|
|
2152 |
const endedListener = () => {
|
2153 |
console.log(`βΉοΈ ${mode} audio ended`);
|
2154 |
stopLiveVisualization();
|
2155 |
-
|
|
|
|
|
|
|
2156 |
};
|
2157 |
|
2158 |
// Add listeners
|
|
|
2152 |
const endedListener = () => {
|
2153 |
console.log(`βΉοΈ ${mode} audio ended`);
|
2154 |
stopLiveVisualization();
|
2155 |
+
// Only draw static waveform for demo mode, not for full processing mode
|
2156 |
+
// if (mode === 'demo') {
|
2157 |
+
// drawStaticWaveform();
|
2158 |
+
// }
|
2159 |
};
|
2160 |
|
2161 |
// Add listeners
|
web_app.py
CHANGED
@@ -812,6 +812,8 @@ async def get_results(task_id: str):
|
|
812 |
# Return actual processed results
|
813 |
if task_id in processing_results:
|
814 |
results = processing_results[task_id]
|
|
|
|
|
815 |
|
816 |
# Convert to the expected format for frontend
|
817 |
formatted_results = {
|
@@ -878,11 +880,12 @@ async def get_results(task_id: str):
|
|
878 |
}
|
879 |
}
|
880 |
|
881 |
-
|
882 |
-
|
883 |
-
|
884 |
-
|
885 |
-
|
|
|
886 |
|
887 |
else:
|
888 |
# Fallback if results not found
|
|
|
812 |
# Return actual processed results
|
813 |
if task_id in processing_results:
|
814 |
results = processing_results[task_id]
|
815 |
+
logger.info(f"π Found results for task {task_id}: {type(results)}")
|
816 |
+
logger.info(f"π Results keys: {list(results.keys()) if isinstance(results, dict) else 'Not a dict'}")
|
817 |
|
818 |
# Convert to the expected format for frontend
|
819 |
formatted_results = {
|
|
|
880 |
}
|
881 |
}
|
882 |
|
883 |
+
logger.info(f"π€ Returning formatted results for task {task_id}: {len(formatted_results.get('segments', []))} segments")
|
884 |
+
return JSONResponse({
|
885 |
+
"task_id": task_id,
|
886 |
+
"status": "complete",
|
887 |
+
"results": formatted_results
|
888 |
+
})
|
889 |
|
890 |
else:
|
891 |
# Fallback if results not found
|