Joseph Pollack commited on
Commit
6434b46
·
unverified ·
1 Parent(s): 676b3f3

fix count output mismatch issue , adds authentication warning , adds authentication fallbacks

Browse files
__pycache__/interface.cpython-313.pyc CHANGED
Binary files a/__pycache__/interface.cpython-313.pyc and b/__pycache__/interface.cpython-313.pyc differ
 
interface.py CHANGED
@@ -388,9 +388,17 @@ def load_multilingual_phrases(language="en", max_phrases=None, split="train"):
388
  try:
389
  print(f"Loading phrases from NVIDIA Granary dataset for language: {language}")
390
 
 
 
 
391
  # Load Granary dataset with ASR (speech recognition) split
392
  # Use streaming to handle large datasets efficiently
393
- ds = load_dataset("nvidia/Granary", granary_lang, split="asr", streaming=True)
 
 
 
 
 
394
 
395
  phrases = []
396
  count = 0
@@ -427,7 +435,12 @@ def load_multilingual_phrases(language="en", max_phrases=None, split="train"):
427
  raise Exception("No phrases found")
428
 
429
  except Exception as e:
430
- print(f"Granary dataset loading failed for {language}: {e}")
 
 
 
 
 
431
 
432
  # Fallback to basic phrases if Granary fails
433
  print("Using fallback phrases")
@@ -500,6 +513,23 @@ with gr.Blocks(title="Voxtral ASR Fine-tuning") as demo:
500
  Read the phrases below and record them. Then start fine-tuning.
501
  """)
502
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  # Hidden state to track dataset JSONL path
504
  jsonl_path_state = gr.State("")
505
 
@@ -785,7 +815,7 @@ with gr.Blocks(title="Voxtral ASR Fine-tuning") as demo:
785
  inputs=[language_selector],
786
  outputs=[phrase_texts_state, visible_rows_state] + phrase_markdowns + rec_components + [
787
  add_rows_btn, record_dataset_btn, dataset_status, advanced_accordion,
788
- save_rec_btn, start_btn, logs_box
789
  ]
790
  )
791
 
 
388
  try:
389
  print(f"Loading phrases from NVIDIA Granary dataset for language: {language}")
390
 
391
+ # Check for authentication token
392
+ token = os.getenv("HF_TOKEN") or os.getenv("HF_WRITE_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
393
+
394
  # Load Granary dataset with ASR (speech recognition) split
395
  # Use streaming to handle large datasets efficiently
396
+ if token:
397
+ print(f"Using authentication token for Granary dataset access")
398
+ ds = load_dataset("nvidia/Granary", granary_lang, split="asr", streaming=True, token=token)
399
+ else:
400
+ print(f"No HF_TOKEN found, attempting to load Granary dataset without authentication")
401
+ ds = load_dataset("nvidia/Granary", granary_lang, split="asr", streaming=True)
402
 
403
  phrases = []
404
  count = 0
 
435
  raise Exception("No phrases found")
436
 
437
  except Exception as e:
438
+ error_msg = str(e).lower()
439
+ if "401" in error_msg or "unauthorized" in error_msg:
440
+ print(f"Granary dataset authentication failed for {language}: {e}")
441
+ print("This dataset requires a Hugging Face token. Please set HF_TOKEN environment variable.")
442
+ else:
443
+ print(f"Granary dataset loading failed for {language}: {e}")
444
 
445
  # Fallback to basic phrases if Granary fails
446
  print("Using fallback phrases")
 
513
  Read the phrases below and record them. Then start fine-tuning.
514
  """)
515
 
516
+ # Check for HF_TOKEN and show warning if missing
517
+ hf_token = os.getenv("HF_TOKEN") or os.getenv("HF_WRITE_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
518
+ if not hf_token:
519
+ gr.HTML(
520
+ """
521
+ <div style="background-color: rgba(245, 158, 11, 0.1); border: 1px solid rgba(245, 158, 11, 0.3); border-radius: 8px; padding: 12px; margin-bottom: 16px;">
522
+ <p style="color: rgb(234, 88, 12); margin: 0; font-size: 14px; font-weight: 600;">
523
+ ⚠️ No HF_TOKEN detected
524
+ </p>
525
+ <p style="color: rgb(234, 88, 12); margin: 6px 0 0; font-size: 12px;">
526
+ Set HF_TOKEN environment variable to access NVIDIA Granary dataset with authentic multilingual phrases.
527
+ Currently using fallback phrases for demonstration.
528
+ </p>
529
+ </div>
530
+ """
531
+ )
532
+
533
  # Hidden state to track dataset JSONL path
534
  jsonl_path_state = gr.State("")
535
 
 
815
  inputs=[language_selector],
816
  outputs=[phrase_texts_state, visible_rows_state] + phrase_markdowns + rec_components + [
817
  add_rows_btn, record_dataset_btn, dataset_status, advanced_accordion,
818
+ save_rec_btn, push_recordings_btn, start_btn, logs_box
819
  ]
820
  )
821