JoaquinVanschoren commited on
Commit
9c3a4be
Β·
1 Parent(s): 8ed167c

timeout warning

Browse files
Files changed (1) hide show
  1. app.py +37 -50
app.py CHANGED
@@ -17,69 +17,41 @@ import json
17
  import time
18
  import traceback
19
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
20
- import threading
21
 
22
  def process_file(file):
23
  results = []
24
  json_data = None
25
- timer = None
26
- warning_text = None # to be set if timer fires
27
 
28
  filename = file.name.split("/")[-1]
29
 
30
- # JSON validation
31
  json_valid, json_message, json_data = validate_json(file.name)
32
  json_message = json_message.replace("\nβœ“\n", "\n")
33
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
 
34
  if not json_valid:
35
- return results, None, None
36
 
37
- # Schema validation
38
  croissant_valid, croissant_message = validate_croissant(json_data)
39
  croissant_message = croissant_message.replace("\nβœ“\n", "\n")
40
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
 
41
  if not croissant_valid:
42
- return results, None, None
43
-
44
- # Start timer before records validation
45
- fired = threading.Event()
46
-
47
- def trigger_warning():
48
- nonlocal warning_text
49
- warning_text = """
50
- ⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
51
- at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
52
- In that case, we recommend using any of the following options:
53
- <ul style="text-align:left; margin: 0 auto; display:inline-block;">
54
- <li>πŸ” Duplicate this Space on Hugging Face</li>
55
- <li>πŸ’» Run it locally (GitHub or Docker)</li>
56
- <li>πŸ₯ Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
57
- </ul>
58
- """
59
- fired.set()
60
-
61
- timer = threading.Timer(0.1, trigger_warning)
62
- timer.start()
63
-
64
- try:
65
- records_valid, records_message, records_status = validate_records(json_data)
66
- finally:
67
- timer.cancel()
68
-
69
- if fired.is_set():
70
- warning_html_update = gr.update(value=warning_text, visible=True)
71
- else:
72
- warning_html_update = gr.update(visible=False)
73
 
 
 
74
  records_message = records_message.replace("\nβœ“\n", "\n")
75
  results.append(("Records Generation Test", records_valid, records_message, records_status))
76
 
 
77
  report = generate_validation_report(filename, json_data, results)
78
- return results, report, warning_html_update
 
79
 
80
  def create_ui():
81
  with gr.Blocks(theme=gr.themes.Soft()) as app:
82
- delayed_warning_html = gr.HTML("", visible=False)
83
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
84
  gr.Markdown("# πŸ₯ Croissant Validator for NeurIPS")
85
  gr.Markdown("""
@@ -510,12 +482,14 @@ def create_ui():
510
  gr.update(visible=False), # validation_results
511
  gr.update(visible=False), # validation_progress
512
  gr.update(visible=False), # report_group
513
- None, None, # report_text, report_md
514
- gr.update(visible=False) # delayed_warning_html
515
  ]
516
 
517
- results, report, warning_html_update = process_file(file)
 
518
 
 
519
  try:
520
  with open(file.name, 'r') as f:
521
  json_data = json.load(f)
@@ -523,18 +497,19 @@ def create_ui():
523
  except:
524
  dataset_name = 'unnamed'
525
 
 
526
  report_filename = f"report_croissant-validation_{dataset_name}.md"
527
  if report:
528
  with open(report_filename, "w") as f:
529
  f.write(report)
530
 
 
531
  return [
532
- build_results_html(results),
533
- gr.update(visible=False),
534
- gr.update(visible=True) if report else gr.update(visible=False),
535
- report if report else None,
536
- report_filename if report else None,
537
- warning_html_update or gr.update(visible=False)
538
  ]
539
 
540
  # Connect UI events to functions with updated outputs
@@ -564,7 +539,7 @@ def create_ui():
564
  None, # report_text
565
  None # report_md
566
  ]
567
-
568
  validate_btn.click(
569
  fn=show_progress,
570
  inputs=None,
@@ -573,7 +548,7 @@ def create_ui():
573
  ).then(
574
  fn=on_validate,
575
  inputs=file_input,
576
- outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
577
  )
578
 
579
  fetch_btn.click(
@@ -588,6 +563,18 @@ def create_ui():
588
  <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
589
  </div>
590
  """)
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
  return app
593
 
 
17
  import time
18
  import traceback
19
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
 
20
 
21
  def process_file(file):
22
  results = []
23
  json_data = None
 
 
24
 
25
  filename = file.name.split("/")[-1]
26
 
27
+ # Check 1: JSON validation
28
  json_valid, json_message, json_data = validate_json(file.name)
29
  json_message = json_message.replace("\nβœ“\n", "\n")
30
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
31
+
32
  if not json_valid:
33
+ return results, None
34
 
35
+ # Check 2: Croissant validation
36
  croissant_valid, croissant_message = validate_croissant(json_data)
37
  croissant_message = croissant_message.replace("\nβœ“\n", "\n")
38
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
39
+
40
  if not croissant_valid:
41
+ return results, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ # Check 3: Records validation (with timeout-safe and error-specific logic)
44
+ records_valid, records_message, records_status = validate_records(json_data)
45
  records_message = records_message.replace("\nβœ“\n", "\n")
46
  results.append(("Records Generation Test", records_valid, records_message, records_status))
47
 
48
+ # Generate final report
49
  report = generate_validation_report(filename, json_data, results)
50
+
51
+ return results, report
52
 
53
  def create_ui():
54
  with gr.Blocks(theme=gr.themes.Soft()) as app:
 
55
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
56
  gr.Markdown("# πŸ₯ Croissant Validator for NeurIPS")
57
  gr.Markdown("""
 
482
  gr.update(visible=False), # validation_results
483
  gr.update(visible=False), # validation_progress
484
  gr.update(visible=False), # report_group
485
+ None, # report_text
486
+ None # report_md
487
  ]
488
 
489
+ # Process the file and get results
490
+ results, report = process_file(file)
491
 
492
+ # Extract dataset name from the JSON for the report filename
493
  try:
494
  with open(file.name, 'r') as f:
495
  json_data = json.load(f)
 
497
  except:
498
  dataset_name = 'unnamed'
499
 
500
+ # Save report to file with new naming convention
501
  report_filename = f"report_croissant-validation_{dataset_name}.md"
502
  if report:
503
  with open(report_filename, "w") as f:
504
  f.write(report)
505
 
506
+ # Return final state
507
  return [
508
+ build_results_html(results), # validation_results
509
+ gr.update(visible=False), # validation_progress
510
+ gr.update(visible=True) if report else gr.update(visible=False), # report_group
511
+ report if report else None, # report_text
512
+ report_filename if report else None # report_md
 
513
  ]
514
 
515
  # Connect UI events to functions with updated outputs
 
539
  None, # report_text
540
  None # report_md
541
  ]
542
+
543
  validate_btn.click(
544
  fn=show_progress,
545
  inputs=None,
 
548
  ).then(
549
  fn=on_validate,
550
  inputs=file_input,
551
+ outputs=[validation_results, validation_progress, report_group, report_text, report_md]
552
  )
553
 
554
  fetch_btn.click(
 
563
  <p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
564
  </div>
565
  """)
566
+
567
+ gr.HTML("""
568
+ <div class="progress-status" style="text-align: left; color: #d35400;">
569
+ ⚠️ It is possible that this validator is currently being used by a lot of people at the same time, which may trigger rate limiting by the platform hosting your data.
570
+ The app will then try again and may get into a very long loop. If it takes too long to run, we recommend using any of the following options:
571
+ <ul style="text-align:left; margin: 0 auto; display:inline-block;">
572
+ <li>πŸ” Click the button with the three dots (β‹―) above and select "Duplicate this Space" to run this app in your own Hugging Face space.</li>
573
+ <li>πŸ’» Click the button with the three dots (β‹―) above and select "Run Locally" and then "Clone (git)" to get instructions to run the checker locally. You can also use docker option (you don't need the tokens).</li>
574
+ <li>πŸ₯ Run the Croissant validation code yourself (<a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a>), e.g. with <a href="https://github.com/mlcommons/croissant/tree/7a632f34438e9c8e3812c6a0049898560259c6d4/python/mlcroissant/mlcroissant/scripts" target="_blank">these scripts</a> (validate and load).</li>
575
+ </ul>
576
+ </div>
577
+ """)
578
 
579
  return app
580