Commit
Β·
9c3a4be
1
Parent(s):
8ed167c
timeout warning
Browse files
app.py
CHANGED
@@ -17,69 +17,41 @@ import json
|
|
17 |
import time
|
18 |
import traceback
|
19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
20 |
-
import threading
|
21 |
|
22 |
def process_file(file):
|
23 |
results = []
|
24 |
json_data = None
|
25 |
-
timer = None
|
26 |
-
warning_text = None # to be set if timer fires
|
27 |
|
28 |
filename = file.name.split("/")[-1]
|
29 |
|
30 |
-
# JSON validation
|
31 |
json_valid, json_message, json_data = validate_json(file.name)
|
32 |
json_message = json_message.replace("\nβ\n", "\n")
|
33 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
|
|
34 |
if not json_valid:
|
35 |
-
return results, None
|
36 |
|
37 |
-
#
|
38 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
39 |
croissant_message = croissant_message.replace("\nβ\n", "\n")
|
40 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
|
|
41 |
if not croissant_valid:
|
42 |
-
return results, None
|
43 |
-
|
44 |
-
# Start timer before records validation
|
45 |
-
fired = threading.Event()
|
46 |
-
|
47 |
-
def trigger_warning():
|
48 |
-
nonlocal warning_text
|
49 |
-
warning_text = """
|
50 |
-
β οΈ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
|
51 |
-
at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
|
52 |
-
In that case, we recommend using any of the following options:
|
53 |
-
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
54 |
-
<li>π Duplicate this Space on Hugging Face</li>
|
55 |
-
<li>π» Run it locally (GitHub or Docker)</li>
|
56 |
-
<li>π₯ Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
|
57 |
-
</ul>
|
58 |
-
"""
|
59 |
-
fired.set()
|
60 |
-
|
61 |
-
timer = threading.Timer(0.1, trigger_warning)
|
62 |
-
timer.start()
|
63 |
-
|
64 |
-
try:
|
65 |
-
records_valid, records_message, records_status = validate_records(json_data)
|
66 |
-
finally:
|
67 |
-
timer.cancel()
|
68 |
-
|
69 |
-
if fired.is_set():
|
70 |
-
warning_html_update = gr.update(value=warning_text, visible=True)
|
71 |
-
else:
|
72 |
-
warning_html_update = gr.update(visible=False)
|
73 |
|
|
|
|
|
74 |
records_message = records_message.replace("\nβ\n", "\n")
|
75 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
76 |
|
|
|
77 |
report = generate_validation_report(filename, json_data, results)
|
78 |
-
|
|
|
79 |
|
80 |
def create_ui():
|
81 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
82 |
-
delayed_warning_html = gr.HTML("", visible=False)
|
83 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
84 |
gr.Markdown("# π₯ Croissant Validator for NeurIPS")
|
85 |
gr.Markdown("""
|
@@ -510,12 +482,14 @@ def create_ui():
|
|
510 |
gr.update(visible=False), # validation_results
|
511 |
gr.update(visible=False), # validation_progress
|
512 |
gr.update(visible=False), # report_group
|
513 |
-
None,
|
514 |
-
|
515 |
]
|
516 |
|
517 |
-
|
|
|
518 |
|
|
|
519 |
try:
|
520 |
with open(file.name, 'r') as f:
|
521 |
json_data = json.load(f)
|
@@ -523,18 +497,19 @@ def create_ui():
|
|
523 |
except:
|
524 |
dataset_name = 'unnamed'
|
525 |
|
|
|
526 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
527 |
if report:
|
528 |
with open(report_filename, "w") as f:
|
529 |
f.write(report)
|
530 |
|
|
|
531 |
return [
|
532 |
-
build_results_html(results),
|
533 |
-
gr.update(visible=False),
|
534 |
-
gr.update(visible=True) if report else gr.update(visible=False),
|
535 |
-
report if report else None,
|
536 |
-
report_filename if report else None
|
537 |
-
warning_html_update or gr.update(visible=False)
|
538 |
]
|
539 |
|
540 |
# Connect UI events to functions with updated outputs
|
@@ -564,7 +539,7 @@ def create_ui():
|
|
564 |
None, # report_text
|
565 |
None # report_md
|
566 |
]
|
567 |
-
|
568 |
validate_btn.click(
|
569 |
fn=show_progress,
|
570 |
inputs=None,
|
@@ -573,7 +548,7 @@ def create_ui():
|
|
573 |
).then(
|
574 |
fn=on_validate,
|
575 |
inputs=file_input,
|
576 |
-
outputs=[validation_results, validation_progress, report_group, report_text, report_md
|
577 |
)
|
578 |
|
579 |
fetch_btn.click(
|
@@ -588,6 +563,18 @@ def create_ui():
|
|
588 |
<p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
|
589 |
</div>
|
590 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
|
592 |
return app
|
593 |
|
|
|
17 |
import time
|
18 |
import traceback
|
19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
|
|
20 |
|
21 |
def process_file(file):
|
22 |
results = []
|
23 |
json_data = None
|
|
|
|
|
24 |
|
25 |
filename = file.name.split("/")[-1]
|
26 |
|
27 |
+
# Check 1: JSON validation
|
28 |
json_valid, json_message, json_data = validate_json(file.name)
|
29 |
json_message = json_message.replace("\nβ\n", "\n")
|
30 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
31 |
+
|
32 |
if not json_valid:
|
33 |
+
return results, None
|
34 |
|
35 |
+
# Check 2: Croissant validation
|
36 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
37 |
croissant_message = croissant_message.replace("\nβ\n", "\n")
|
38 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
39 |
+
|
40 |
if not croissant_valid:
|
41 |
+
return results, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
# Check 3: Records validation (with timeout-safe and error-specific logic)
|
44 |
+
records_valid, records_message, records_status = validate_records(json_data)
|
45 |
records_message = records_message.replace("\nβ\n", "\n")
|
46 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
47 |
|
48 |
+
# Generate final report
|
49 |
report = generate_validation_report(filename, json_data, results)
|
50 |
+
|
51 |
+
return results, report
|
52 |
|
53 |
def create_ui():
|
54 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
55 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
56 |
gr.Markdown("# π₯ Croissant Validator for NeurIPS")
|
57 |
gr.Markdown("""
|
|
|
482 |
gr.update(visible=False), # validation_results
|
483 |
gr.update(visible=False), # validation_progress
|
484 |
gr.update(visible=False), # report_group
|
485 |
+
None, # report_text
|
486 |
+
None # report_md
|
487 |
]
|
488 |
|
489 |
+
# Process the file and get results
|
490 |
+
results, report = process_file(file)
|
491 |
|
492 |
+
# Extract dataset name from the JSON for the report filename
|
493 |
try:
|
494 |
with open(file.name, 'r') as f:
|
495 |
json_data = json.load(f)
|
|
|
497 |
except:
|
498 |
dataset_name = 'unnamed'
|
499 |
|
500 |
+
# Save report to file with new naming convention
|
501 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
502 |
if report:
|
503 |
with open(report_filename, "w") as f:
|
504 |
f.write(report)
|
505 |
|
506 |
+
# Return final state
|
507 |
return [
|
508 |
+
build_results_html(results), # validation_results
|
509 |
+
gr.update(visible=False), # validation_progress
|
510 |
+
gr.update(visible=True) if report else gr.update(visible=False), # report_group
|
511 |
+
report if report else None, # report_text
|
512 |
+
report_filename if report else None # report_md
|
|
|
513 |
]
|
514 |
|
515 |
# Connect UI events to functions with updated outputs
|
|
|
539 |
None, # report_text
|
540 |
None # report_md
|
541 |
]
|
542 |
+
|
543 |
validate_btn.click(
|
544 |
fn=show_progress,
|
545 |
inputs=None,
|
|
|
548 |
).then(
|
549 |
fn=on_validate,
|
550 |
inputs=file_input,
|
551 |
+
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
552 |
)
|
553 |
|
554 |
fetch_btn.click(
|
|
|
563 |
<p>Learn more about <a href="https://github.com/mlcommons/croissant" target="_blank">Croissant</a>.</p>
|
564 |
</div>
|
565 |
""")
|
566 |
+
|
567 |
+
gr.HTML("""
|
568 |
+
<div class="progress-status" style="text-align: left; color: #d35400;">
|
569 |
+
β οΈ It is possible that this validator is currently being used by a lot of people at the same time, which may trigger rate limiting by the platform hosting your data.
|
570 |
+
The app will then try again and may get into a very long loop. If it takes too long to run, we recommend using any of the following options:
|
571 |
+
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
572 |
+
<li>π Click the button with the three dots (β―) above and select "Duplicate this Space" to run this app in your own Hugging Face space.</li>
|
573 |
+
<li>π» Click the button with the three dots (β―) above and select "Run Locally" and then "Clone (git)" to get instructions to run the checker locally. You can also use docker option (you don't need the tokens).</li>
|
574 |
+
<li>π₯ Run the Croissant validation code yourself (<a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a>), e.g. with <a href="https://github.com/mlcommons/croissant/tree/7a632f34438e9c8e3812c6a0049898560259c6d4/python/mlcroissant/mlcroissant/scripts" target="_blank">these scripts</a> (validate and load).</li>
|
575 |
+
</ul>
|
576 |
+
</div>
|
577 |
+
""")
|
578 |
|
579 |
return app
|
580 |
|