JoaquinVanschoren commited on
Commit
8ed167c
·
1 Parent(s): a8989f9

timeout warning

Browse files
Files changed (2) hide show
  1. app.py +51 -25
  2. validation.py +1 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import requests
2
  import os
3
 
@@ -16,41 +17,69 @@ import json
16
  import time
17
  import traceback
18
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
 
19
 
20
  def process_file(file):
21
  results = []
22
  json_data = None
 
 
23
 
24
  filename = file.name.split("/")[-1]
25
 
26
- # Check 1: JSON validation
27
  json_valid, json_message, json_data = validate_json(file.name)
28
  json_message = json_message.replace("\n✓\n", "\n")
29
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
30
-
31
  if not json_valid:
32
- return results, None
33
 
34
- # Check 2: Croissant validation
35
  croissant_valid, croissant_message = validate_croissant(json_data)
36
  croissant_message = croissant_message.replace("\n✓\n", "\n")
37
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
38
-
39
  if not croissant_valid:
40
- return results, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- # Check 3: Records validation (with timeout-safe and error-specific logic)
43
- records_valid, records_message, records_status = validate_records(json_data)
44
  records_message = records_message.replace("\n✓\n", "\n")
45
  results.append(("Records Generation Test", records_valid, records_message, records_status))
46
 
47
- # Generate final report
48
  report = generate_validation_report(filename, json_data, results)
49
-
50
- return results, report
51
 
52
  def create_ui():
53
  with gr.Blocks(theme=gr.themes.Soft()) as app:
 
54
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
55
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
56
  gr.Markdown("""
@@ -481,14 +510,12 @@ def create_ui():
481
  gr.update(visible=False), # validation_results
482
  gr.update(visible=False), # validation_progress
483
  gr.update(visible=False), # report_group
484
- None, # report_text
485
- None # report_md
486
  ]
487
 
488
- # Process the file and get results
489
- results, report = process_file(file)
490
 
491
- # Extract dataset name from the JSON for the report filename
492
  try:
493
  with open(file.name, 'r') as f:
494
  json_data = json.load(f)
@@ -496,19 +523,18 @@ def create_ui():
496
  except:
497
  dataset_name = 'unnamed'
498
 
499
- # Save report to file with new naming convention
500
  report_filename = f"report_croissant-validation_{dataset_name}.md"
501
  if report:
502
  with open(report_filename, "w") as f:
503
  f.write(report)
504
 
505
- # Return final state
506
  return [
507
- build_results_html(results), # validation_results
508
- gr.update(visible=False), # validation_progress
509
- gr.update(visible=True) if report else gr.update(visible=False), # report_group
510
- report if report else None, # report_text
511
- report_filename if report else None # report_md
 
512
  ]
513
 
514
  # Connect UI events to functions with updated outputs
@@ -538,7 +564,7 @@ def create_ui():
538
  None, # report_text
539
  None # report_md
540
  ]
541
-
542
  validate_btn.click(
543
  fn=show_progress,
544
  inputs=None,
@@ -547,7 +573,7 @@ def create_ui():
547
  ).then(
548
  fn=on_validate,
549
  inputs=file_input,
550
- outputs=[validation_results, validation_progress, report_group, report_text, report_md]
551
  )
552
 
553
  fetch_btn.click(
 
1
+ import mlcroissant._src.operation_graph.operations.download as dl_mod
2
  import requests
3
  import os
4
 
 
17
  import time
18
  import traceback
19
  from validation import validate_json, validate_croissant, validate_records, generate_validation_report
20
+ import threading
21
 
22
  def process_file(file):
23
  results = []
24
  json_data = None
25
+ timer = None
26
+ warning_text = None # to be set if timer fires
27
 
28
  filename = file.name.split("/")[-1]
29
 
30
+ # JSON validation
31
  json_valid, json_message, json_data = validate_json(file.name)
32
  json_message = json_message.replace("\n✓\n", "\n")
33
  results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
 
34
  if not json_valid:
35
+ return results, None, None
36
 
37
+ # Schema validation
38
  croissant_valid, croissant_message = validate_croissant(json_data)
39
  croissant_message = croissant_message.replace("\n✓\n", "\n")
40
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
 
41
  if not croissant_valid:
42
+ return results, None, None
43
+
44
+ # Start timer before records validation
45
+ fired = threading.Event()
46
+
47
+ def trigger_warning():
48
+ nonlocal warning_text
49
+ warning_text = """
50
+ ⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
51
+ at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
52
+ In that case, we recommend using any of the following options:
53
+ <ul style="text-align:left; margin: 0 auto; display:inline-block;">
54
+ <li>🔁 Duplicate this Space on Hugging Face</li>
55
+ <li>💻 Run it locally (GitHub or Docker)</li>
56
+ <li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
57
+ </ul>
58
+ """
59
+ fired.set()
60
+
61
+ timer = threading.Timer(0.1, trigger_warning)
62
+ timer.start()
63
+
64
+ try:
65
+ records_valid, records_message, records_status = validate_records(json_data)
66
+ finally:
67
+ timer.cancel()
68
+
69
+ if fired.is_set():
70
+ warning_html_update = gr.update(value=warning_text, visible=True)
71
+ else:
72
+ warning_html_update = gr.update(visible=False)
73
 
 
 
74
  records_message = records_message.replace("\n✓\n", "\n")
75
  results.append(("Records Generation Test", records_valid, records_message, records_status))
76
 
 
77
  report = generate_validation_report(filename, json_data, results)
78
+ return results, report, warning_html_update
 
79
 
80
  def create_ui():
81
  with gr.Blocks(theme=gr.themes.Soft()) as app:
82
+ delayed_warning_html = gr.HTML("", visible=False)
83
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
84
  gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
85
  gr.Markdown("""
 
510
  gr.update(visible=False), # validation_results
511
  gr.update(visible=False), # validation_progress
512
  gr.update(visible=False), # report_group
513
+ None, None, # report_text, report_md
514
+ gr.update(visible=False) # delayed_warning_html
515
  ]
516
 
517
+ results, report, warning_html_update = process_file(file)
 
518
 
 
519
  try:
520
  with open(file.name, 'r') as f:
521
  json_data = json.load(f)
 
523
  except:
524
  dataset_name = 'unnamed'
525
 
 
526
  report_filename = f"report_croissant-validation_{dataset_name}.md"
527
  if report:
528
  with open(report_filename, "w") as f:
529
  f.write(report)
530
 
 
531
  return [
532
+ build_results_html(results),
533
+ gr.update(visible=False),
534
+ gr.update(visible=True) if report else gr.update(visible=False),
535
+ report if report else None,
536
+ report_filename if report else None,
537
+ warning_html_update or gr.update(visible=False)
538
  ]
539
 
540
  # Connect UI events to functions with updated outputs
 
564
  None, # report_text
565
  None # report_md
566
  ]
567
+
568
  validate_btn.click(
569
  fn=show_progress,
570
  inputs=None,
 
573
  ).then(
574
  fn=on_validate,
575
  inputs=file_input,
576
+ outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
577
  )
578
 
579
  fetch_btn.click(
validation.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import requests
2
  import os
3
 
 
1
+ import mlcroissant._src.operation_graph.operations.download as dl_mod
2
  import requests
3
  import os
4