{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "585da432", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of parquet files 30\n", "Reading geclm-datasets/samples/c4/20230404_102105_00007_t8w9z_da4e86ed-bac9-440c-ae5e-29e551e62ec0\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_code/20230404_102116_00007_ajvns_2a7caa57-9adc-48f6-900e-f87f572f8c3b\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_github_issues/20230404_102127_00022_yv77i_4b3257ed-3e44-4961-bd02-017d135e96f0\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_jupyter_markdowned_clean_dedup/20230404_102137_00026_vwcg7_8778ba21-a464-4949-8d71-aa1414a45d3c\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/books3/20230404_102143_00027_t4kwf_b39fa726-6484-4103-a9a3-fd8774796e75\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/gutenberg_raw/20230404_102215_00007_x3ntt_ddbaef74-459c-40a0-8b8f-d2f17af55991\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/reddit_threaded/20230404_102241_00049_xj4uk_61f1e105-1765-4c37-a659-5895ca3398e2\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/enwiki_data/20230404_102246_00007_ye63c_c3bd1037-1438-4ab3-97cd-24fd8ede501a\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/s2orc_dedup/20230404_102252_00080_6ce5q_c45e4ff8-83fe-4b65-b5ae-f52e2b27e96c\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/stackexchange2/20230404_102308_00031_qvnh6_fc1b4f61-9b84-481f-95bc-d7e0b8542030\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/commoncrawl/20230404_124237_00026_sin5w_96df9c84-d8b3-454c-a3ce-ad9550da36bc\n", "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "