Spaces:
Runtime error
Runtime error
| from fasthtml.common import * | |
| from fasthtml.components import * | |
| import json | |
| import string | |
| import random | |
| import jsonlines | |
| def gen_random_id() -> str: | |
| return "".join(random.choices(string.ascii_lowercase, k=8)) | |
| def view_data( | |
| before, | |
| after, | |
| doc_id, | |
| data_source: str = None, | |
| data_sources=None, | |
| target: str = "colcontent", | |
| ): | |
| if data_sources is not None: | |
| drop_down = Select( | |
| *[ | |
| Option(ds, value=ds, selected=(ds == data_source)) | |
| for ds in data_sources | |
| ], | |
| name=f"data_source_{target}", | |
| hx_get=f"/curated/{target}", | |
| hx_target=f"#{target}", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| ) | |
| slider = Input( | |
| type="range", | |
| name=f"doc_id_{target}", | |
| min="0", | |
| max="9", | |
| value=str(doc_id), | |
| hx_get=f"/curated/{target}", | |
| hx_target=f"#{target}", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| hx_include=f'[name="data_source_{target}"]', | |
| ) | |
| form = Form( | |
| Div( | |
| Label("Data source: ", drop_down), | |
| ) | |
| if (data_sources is not None) | |
| else None, | |
| Div( | |
| Label("Data sample: ", slider, f"{doc_id}", cls="plotly_slider"), | |
| ), | |
| cls="plotly_input_container", | |
| ) | |
| col1 = Div( | |
| H3("Raw format"), | |
| Pre( | |
| json.dumps(before, indent=4), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: left; overflow-x: auto;", | |
| ) | |
| col2 = Div( | |
| H3("Extracted format"), | |
| Pre( | |
| json.dumps(after, indent=4), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: right; overflow-x: auto;", | |
| ) | |
| data_display = Div( | |
| col1, | |
| col2, | |
| style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
| ) | |
| return Div(form, data_display, style="margin-top: 10px;", id=target) | |
| def DVS( | |
| left, | |
| header, | |
| ): | |
| col1 = Div( | |
| Pre( | |
| json.dumps(left, indent=4, ensure_ascii=False), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="float: left; overflow-x: auto;", | |
| ) | |
| data_display = Div( | |
| col1, | |
| style="overflow: auto; clear: both; height: 200px; border: 1px solid #ccc; padding: 20px;", | |
| ) | |
| return Div(H3(header), data_display, style="margin-top: 10px;") | |
| def DV( | |
| left_file, | |
| doc_id, | |
| header, | |
| target: str = None, | |
| ): | |
| if target is None: | |
| target = "".join(random.choices(string.ascii_lowercase, k=8)) | |
| if left_file.endswith("jsonl"): | |
| left = [x for x in jsonlines.open(left_file)] | |
| else: | |
| left = json.load(open(left_file, encoding="utf-8")) | |
| max_doc_id = len(left) - 1 | |
| slider = Input( | |
| type="range", | |
| name=f"doc_id_{target}", | |
| min="0", | |
| max=str(max_doc_id), | |
| value=str(doc_id), | |
| hx_get=f"/update/{target}", | |
| hx_target=f"#{target}", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| hx_vals=json.dumps({"left_file": f"{left_file}", "header": f"{header}"}), | |
| ) | |
| form = Div( | |
| H3(header), | |
| Label( | |
| "Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" | |
| ), | |
| cls="plotly_input_container", | |
| style="padding: 20px;", | |
| ) | |
| col1 = Div( | |
| Pre( | |
| json.dumps(left[doc_id], indent=4, ensure_ascii=False), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="float: left; overflow-x: auto;", | |
| ) | |
| data_display = Div( | |
| col1, | |
| style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
| ) | |
| return Div(form, data_display, style="margin-top: 10px;", id=target) | |
| def DV2( | |
| left_file, | |
| right_file, | |
| doc_id, | |
| target: str = None, | |
| ): | |
| if target is None: | |
| target = "".join(random.choices(string.ascii_lowercase, k=8)) | |
| left = json.load(open(left_file, encoding="utf-8")) | |
| right = json.load(open(right_file, encoding="utf-8")) | |
| max_doc_id = len(left) - 1 | |
| slider = Input( | |
| type="range", | |
| name=f"doc_id_{target}", | |
| min="0", | |
| max=str(max_doc_id), | |
| value=str(doc_id), | |
| hx_get=f"/update/{target}", | |
| hx_target=f"#{target}", | |
| hx_trigger="change", | |
| hx_swap="innerHTML", | |
| hx_vals=json.dumps( | |
| {"left_file": f"{left_file}", "right_file": f"{right_file}"} | |
| ), | |
| ) | |
| form = Div( | |
| Label( | |
| "Data sample: ", slider, f"{doc_id} of {max_doc_id}", cls="plotly_slider" | |
| ), | |
| cls="plotly_input_container", | |
| style="padding: 20px;", | |
| ) | |
| col1 = Div( | |
| H3("Raw format", style="margin-top: 0px;"), | |
| Pre( | |
| json.dumps(left[doc_id], indent=4, ensure_ascii=False), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: left; overflow-x: auto;", | |
| ) | |
| col2 = Div( | |
| H3("Extracted format", style="margin-top: 0px;"), | |
| Pre( | |
| json.dumps(right[doc_id], indent=4, ensure_ascii=False), | |
| style="white-space: pre-wrap; word-break: break-all;", | |
| ), | |
| style="width: 48%; float: right; overflow-x: auto;", | |
| ) | |
| data_display = Div( | |
| col1, | |
| col2, | |
| style="overflow: auto; clear: both; height: 600px; border: 1px solid #ccc; padding: 20px;", | |
| ) | |
| return Div(form, data_display, style="margin-top: 10px;", id=target) | |
| def update(target: str, request): | |
| params = request.query_params | |
| doc_id = int(params.get(f"doc_id_{target}", 3)) | |
| left_file = params.get("left_file") | |
| right_file = params.get("right_file") | |
| if left_file and right_file: | |
| return ( | |
| DV2( | |
| left_file, | |
| right_file, | |
| doc_id, | |
| target, | |
| ), | |
| ) | |
| else: | |
| return DV( | |
| left_file, | |
| doc_id, | |
| params.get("header"), | |
| target, | |
| ) | |