Commit
·
8ed167c
1
Parent(s):
a8989f9
timeout warning
Browse files- app.py +51 -25
- validation.py +1 -0
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import requests
|
2 |
import os
|
3 |
|
@@ -16,41 +17,69 @@ import json
|
|
16 |
import time
|
17 |
import traceback
|
18 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
|
|
19 |
|
20 |
def process_file(file):
|
21 |
results = []
|
22 |
json_data = None
|
|
|
|
|
23 |
|
24 |
filename = file.name.split("/")[-1]
|
25 |
|
26 |
-
#
|
27 |
json_valid, json_message, json_data = validate_json(file.name)
|
28 |
json_message = json_message.replace("\n✓\n", "\n")
|
29 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
30 |
-
|
31 |
if not json_valid:
|
32 |
-
return results, None
|
33 |
|
34 |
-
#
|
35 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
36 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
37 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
38 |
-
|
39 |
if not croissant_valid:
|
40 |
-
return results, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
# Check 3: Records validation (with timeout-safe and error-specific logic)
|
43 |
-
records_valid, records_message, records_status = validate_records(json_data)
|
44 |
records_message = records_message.replace("\n✓\n", "\n")
|
45 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
46 |
|
47 |
-
# Generate final report
|
48 |
report = generate_validation_report(filename, json_data, results)
|
49 |
-
|
50 |
-
return results, report
|
51 |
|
52 |
def create_ui():
|
53 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
54 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
55 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
56 |
gr.Markdown("""
|
@@ -481,14 +510,12 @@ def create_ui():
|
|
481 |
gr.update(visible=False), # validation_results
|
482 |
gr.update(visible=False), # validation_progress
|
483 |
gr.update(visible=False), # report_group
|
484 |
-
None,
|
485 |
-
|
486 |
]
|
487 |
|
488 |
-
|
489 |
-
results, report = process_file(file)
|
490 |
|
491 |
-
# Extract dataset name from the JSON for the report filename
|
492 |
try:
|
493 |
with open(file.name, 'r') as f:
|
494 |
json_data = json.load(f)
|
@@ -496,19 +523,18 @@ def create_ui():
|
|
496 |
except:
|
497 |
dataset_name = 'unnamed'
|
498 |
|
499 |
-
# Save report to file with new naming convention
|
500 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
501 |
if report:
|
502 |
with open(report_filename, "w") as f:
|
503 |
f.write(report)
|
504 |
|
505 |
-
# Return final state
|
506 |
return [
|
507 |
-
build_results_html(results),
|
508 |
-
gr.update(visible=False),
|
509 |
-
gr.update(visible=True) if report else gr.update(visible=False),
|
510 |
-
report if report else None,
|
511 |
-
report_filename if report else None
|
|
|
512 |
]
|
513 |
|
514 |
# Connect UI events to functions with updated outputs
|
@@ -538,7 +564,7 @@ def create_ui():
|
|
538 |
None, # report_text
|
539 |
None # report_md
|
540 |
]
|
541 |
-
|
542 |
validate_btn.click(
|
543 |
fn=show_progress,
|
544 |
inputs=None,
|
@@ -547,7 +573,7 @@ def create_ui():
|
|
547 |
).then(
|
548 |
fn=on_validate,
|
549 |
inputs=file_input,
|
550 |
-
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
551 |
)
|
552 |
|
553 |
fetch_btn.click(
|
|
|
1 |
+
import mlcroissant._src.operation_graph.operations.download as dl_mod
|
2 |
import requests
|
3 |
import os
|
4 |
|
|
|
17 |
import time
|
18 |
import traceback
|
19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
20 |
+
import threading
|
21 |
|
22 |
def process_file(file):
|
23 |
results = []
|
24 |
json_data = None
|
25 |
+
timer = None
|
26 |
+
warning_text = None # to be set if timer fires
|
27 |
|
28 |
filename = file.name.split("/")[-1]
|
29 |
|
30 |
+
# JSON validation
|
31 |
json_valid, json_message, json_data = validate_json(file.name)
|
32 |
json_message = json_message.replace("\n✓\n", "\n")
|
33 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
|
|
34 |
if not json_valid:
|
35 |
+
return results, None, None
|
36 |
|
37 |
+
# Schema validation
|
38 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
39 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
40 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
|
|
41 |
if not croissant_valid:
|
42 |
+
return results, None, None
|
43 |
+
|
44 |
+
# Start timer before records validation
|
45 |
+
fired = threading.Event()
|
46 |
+
|
47 |
+
def trigger_warning():
|
48 |
+
nonlocal warning_text
|
49 |
+
warning_text = """
|
50 |
+
⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
|
51 |
+
at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
|
52 |
+
In that case, we recommend using any of the following options:
|
53 |
+
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
54 |
+
<li>🔁 Duplicate this Space on Hugging Face</li>
|
55 |
+
<li>💻 Run it locally (GitHub or Docker)</li>
|
56 |
+
<li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
|
57 |
+
</ul>
|
58 |
+
"""
|
59 |
+
fired.set()
|
60 |
+
|
61 |
+
timer = threading.Timer(0.1, trigger_warning)
|
62 |
+
timer.start()
|
63 |
+
|
64 |
+
try:
|
65 |
+
records_valid, records_message, records_status = validate_records(json_data)
|
66 |
+
finally:
|
67 |
+
timer.cancel()
|
68 |
+
|
69 |
+
if fired.is_set():
|
70 |
+
warning_html_update = gr.update(value=warning_text, visible=True)
|
71 |
+
else:
|
72 |
+
warning_html_update = gr.update(visible=False)
|
73 |
|
|
|
|
|
74 |
records_message = records_message.replace("\n✓\n", "\n")
|
75 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
76 |
|
|
|
77 |
report = generate_validation_report(filename, json_data, results)
|
78 |
+
return results, report, warning_html_update
|
|
|
79 |
|
80 |
def create_ui():
|
81 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
82 |
+
delayed_warning_html = gr.HTML("", visible=False)
|
83 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
84 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
85 |
gr.Markdown("""
|
|
|
510 |
gr.update(visible=False), # validation_results
|
511 |
gr.update(visible=False), # validation_progress
|
512 |
gr.update(visible=False), # report_group
|
513 |
+
None, None, # report_text, report_md
|
514 |
+
gr.update(visible=False) # delayed_warning_html
|
515 |
]
|
516 |
|
517 |
+
results, report, warning_html_update = process_file(file)
|
|
|
518 |
|
|
|
519 |
try:
|
520 |
with open(file.name, 'r') as f:
|
521 |
json_data = json.load(f)
|
|
|
523 |
except:
|
524 |
dataset_name = 'unnamed'
|
525 |
|
|
|
526 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
527 |
if report:
|
528 |
with open(report_filename, "w") as f:
|
529 |
f.write(report)
|
530 |
|
|
|
531 |
return [
|
532 |
+
build_results_html(results),
|
533 |
+
gr.update(visible=False),
|
534 |
+
gr.update(visible=True) if report else gr.update(visible=False),
|
535 |
+
report if report else None,
|
536 |
+
report_filename if report else None,
|
537 |
+
warning_html_update or gr.update(visible=False)
|
538 |
]
|
539 |
|
540 |
# Connect UI events to functions with updated outputs
|
|
|
564 |
None, # report_text
|
565 |
None # report_md
|
566 |
]
|
567 |
+
|
568 |
validate_btn.click(
|
569 |
fn=show_progress,
|
570 |
inputs=None,
|
|
|
573 |
).then(
|
574 |
fn=on_validate,
|
575 |
inputs=file_input,
|
576 |
+
outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
|
577 |
)
|
578 |
|
579 |
fetch_btn.click(
|
validation.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import requests
|
2 |
import os
|
3 |
|
|
|
1 |
+
import mlcroissant._src.operation_graph.operations.download as dl_mod
|
2 |
import requests
|
3 |
import os
|
4 |
|