File size: 178,193 Bytes
b4ec773
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"machine_shape":"hm","gpuType":"A100","authorship_tag":"ABX9TyOhvtU20bH9dCL4a+j/lxDN"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"563d2d5cfb4f49f2b58358dbaad3f3f3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_eeb28e6ffce34baa872b00720553300d","IPY_MODEL_c93a558c64a449ba896b2e9ead2c4abd","IPY_MODEL_e8234bb489c147058b9254db2586f431"],"layout":"IPY_MODEL_71ff49917bd74a7eb5adfa4a3dba3eb7"}},"eeb28e6ffce34baa872b00720553300d":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f900e38236ed4ae3a3b96b9715ef9516","placeholder":"​","style":"IPY_MODEL_86092064ea1e4960a8c54afd7158ddcd","value":"tokenizer_config.json: 100%"}},"c93a558c64a449ba896b2e9ead2c4abd":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_509a20eca0374dd395a8c13638c911a2","max":1156999,"min":0,"orientation":"horizontal","style":"IPY_MODEL_68156342278a40de86542d87e01bf8d0","value":1156999}},"e8234bb489c147058b9254db2586f431":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_765fd4fd7b0541759de6b3bbd73565ed","placeholder":"","style":"IPY_MODEL_52f22103ca7642e5b1423bf46ff35c5b","value":"1.16M/1.16M [00:00&lt;00:00, 4.94MB/s]"}},"71ff49917bd74a7eb5adfa4a3dba3eb7":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f900e38236ed4ae3a3b96b9715ef9516":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"86092064ea1e4960a8c54afd7158ddcd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"509a20eca0374dd395a8c13638c911a2":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"68156342278a40de86542d87e01bf8d0":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"765fd4fd7b0541759de6b3bbd73565ed":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"52f22103ca7642e5b1423bf46ff35c5b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"d5c687e1c3bb414fb9f98a670125546f":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_718cc1a4e6f74798b03b5c0f85a41f75","IPY_MODEL_dbe78729a3d14a1991e942d610d39854","IPY_MODEL_8987e510abb747749908c4f9d23feb1e"],"layout":"IPY_MODEL_c0891ca2464b4bb7b78032033d0e58be"}},"718cc1a4e6f74798b03b5c0f85a41f75":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_8c08355a6e8d427fa1a6f07628606ff1","placeholder":"","style":"IPY_MODEL_81716e19ed424c138e48b32b8480634f","value":"tokenizer.model: 100%"}},"dbe78729a3d14a1991e942d610d39854":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_dc09859473914d7e8125365f057b48d8","max":4689074,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e03a1d6211c542b0ab7df61d56782e6f","value":4689074}},"8987e510abb747749908c4f9d23feb1e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_70c5d0b38158480c9f30026b32da8f64","placeholder":"​","style":"IPY_MODEL_4d6e15d3ffda41ac9dba105dc3718e2e","value":" 4.69M/4.69M [00:00&lt;00:00, 76.0MB/s]"}},"c0891ca2464b4bb7b78032033d0e58be":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8c08355a6e8d427fa1a6f07628606ff1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"81716e19ed424c138e48b32b8480634f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"dc09859473914d7e8125365f057b48d8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e03a1d6211c542b0ab7df61d56782e6f":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"70c5d0b38158480c9f30026b32da8f64":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4d6e15d3ffda41ac9dba105dc3718e2e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"11aec296d8004b5385d57c48b37e49e2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_2ded71d4e5404de39f5cb0b54b4bacfc","IPY_MODEL_3eb9d0198d9f46c398333cd0ee56dc9c","IPY_MODEL_fcf5e713adb0422e964df8f7b18645cf"],"layout":"IPY_MODEL_f07ce4a8a0764bad81153297095d81ca"}},"2ded71d4e5404de39f5cb0b54b4bacfc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_98b9fae6be374692823365c8f656377e","placeholder":"​","style":"IPY_MODEL_c4c7941930f64504b1aa127bfeb8183a","value":"tokenizer.json: 100%"}},"3eb9d0198d9f46c398333cd0ee56dc9c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_722cda0a54f449f99c32f33abecfd5ca","max":33384568,"min":0,"orientation":"horizontal","style":"IPY_MODEL_2a30f0c696584d87ae7ff780d579af73","value":33384568}},"fcf5e713adb0422e964df8f7b18645cf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_5e47cc11522c45998ecdab75ec314c60","placeholder":"","style":"IPY_MODEL_18e69b91c0a34e8d8458f8e41e27b537","value":"33.4M/33.4M [00:00&lt;00:00, 297MB/s]"}},"f07ce4a8a0764bad81153297095d81ca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"98b9fae6be374692823365c8f656377e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c4c7941930f64504b1aa127bfeb8183a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"722cda0a54f449f99c32f33abecfd5ca":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2a30f0c696584d87ae7ff780d579af73":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"5e47cc11522c45998ecdab75ec314c60":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"18e69b91c0a34e8d8458f8e41e27b537":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"bb537d2348754728928a6de84e6a1637":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_e919b6d661544c879690de70135364a6","IPY_MODEL_2eabed983fa240d0b74b819c32e35a5c","IPY_MODEL_2010f0e2be9d49cd8a07cd8cd7491bcb"],"layout":"IPY_MODEL_6babf02db1b34465beacbff3f60cb0ea"}},"e919b6d661544c879690de70135364a6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_4bef93394bef4243a2d94c099fbd3740","placeholder":"","style":"IPY_MODEL_ba95eb0c97dd432eb171a9250cf02bfc","value":"added_tokens.json: 100%"}},"2eabed983fa240d0b74b819c32e35a5c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_2730d424c73242b58c3f58c8be2c1490","max":35,"min":0,"orientation":"horizontal","style":"IPY_MODEL_e180eac1d12548ff901d96e87cd84d2b","value":35}},"2010f0e2be9d49cd8a07cd8cd7491bcb":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_107c7340420243ae902d64a42201a347","placeholder":"​","style":"IPY_MODEL_c0c766b557314520b3039844078bd17e","value":" 35.0/35.0 [00:00&lt;00:00, 4.50kB/s]"}},"6babf02db1b34465beacbff3f60cb0ea":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"4bef93394bef4243a2d94c099fbd3740":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"ba95eb0c97dd432eb171a9250cf02bfc":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2730d424c73242b58c3f58c8be2c1490":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"e180eac1d12548ff901d96e87cd84d2b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"107c7340420243ae902d64a42201a347":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c0c766b557314520b3039844078bd17e":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"4edc50ee571b41018538a118749ab37c":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_01ea5245a00b46d194e29124f45596b0","IPY_MODEL_3e7a6aaab6314efdb80af76de5de2676","IPY_MODEL_74af0b7936ff4b74b902cf9489650ec3"],"layout":"IPY_MODEL_05edf850cf164537bf9df336501a0251"}},"01ea5245a00b46d194e29124f45596b0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_2a43ba42c8414d29b51823a7068985dd","placeholder":"​","style":"IPY_MODEL_d4b28fbe8a654e9e9686824d45ecde79","value":"special_tokens_map.json: 100%"}},"3e7a6aaab6314efdb80af76de5de2676":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_afacc51fd58d43c69b219df5dbe8b84a","max":662,"min":0,"orientation":"horizontal","style":"IPY_MODEL_792b4f5f685543dc82c62dcf1a55ecfe","value":662}},"74af0b7936ff4b74b902cf9489650ec3":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a589cb1f826845f4808cf4fc4ed35dd3","placeholder":"","style":"IPY_MODEL_dabeee50c3624c5fba012ba38a0fed2d","value":"662/662 [00:00&lt;00:00, 87.9kB/s]"}},"05edf850cf164537bf9df336501a0251":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2a43ba42c8414d29b51823a7068985dd":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d4b28fbe8a654e9e9686824d45ecde79":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"afacc51fd58d43c69b219df5dbe8b84a":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"792b4f5f685543dc82c62dcf1a55ecfe":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a589cb1f826845f4808cf4fc4ed35dd3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"dabeee50c3624c5fba012ba38a0fed2d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"cd245abb7c4c4ac5a02c033a726956f3":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_f4699de85b4842bcbff3809dc82f01ce","IPY_MODEL_64b75b21831246cabfff2138295c1ae0","IPY_MODEL_f4d80d46fbd04cf584a48bce4ba7a75e"],"layout":"IPY_MODEL_ddc3f89efa4e4eb39bdea5e86b1a7154"}},"f4699de85b4842bcbff3809dc82f01ce":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1d4690c6ddd84ffcb348fd385212f636","placeholder":"","style":"IPY_MODEL_279170ca7ea34f1ca9d8fa93db82dede","value":"README.md: 100%"}},"64b75b21831246cabfff2138295c1ae0":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_466d69d5282843758ddb72aeac2ecc35","max":1061,"min":0,"orientation":"horizontal","style":"IPY_MODEL_63ea53cd047249578e9a839c5ee3af88","value":1061}},"f4d80d46fbd04cf584a48bce4ba7a75e":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_a55695f08e114b9e8bd5ea74b630b114","placeholder":"​","style":"IPY_MODEL_73e7a2b6afa94487990dfeb691e5a479","value":" 1.06k/1.06k [00:00&lt;00:00, 124kB/s]"}},"ddc3f89efa4e4eb39bdea5e86b1a7154":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1d4690c6ddd84ffcb348fd385212f636":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"279170ca7ea34f1ca9d8fa93db82dede":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"466d69d5282843758ddb72aeac2ecc35":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"63ea53cd047249578e9a839c5ee3af88":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"a55695f08e114b9e8bd5ea74b630b114":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"73e7a2b6afa94487990dfeb691e5a479":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"205101af4fe5422b9b06381994944bba":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_b55f1bc35a1d4233a1572fb67c7776b1","IPY_MODEL_3a2b0619a50a4018b1ca83eb315df2ac","IPY_MODEL_b8939bccb7f84b809774f30df86e5ebc"],"layout":"IPY_MODEL_5b85c4e79f0044e1804f098e73b1b923"}},"b55f1bc35a1d4233a1572fb67c7776b1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1b7ce6af5c154812951d065ad692eb83","placeholder":"​","style":"IPY_MODEL_49892386c1cc4dc9ad807fbbad4c091d","value":"(…)-00000-of-00004-2d5a1467fff1081b.parquet: 100%"}},"3a2b0619a50a4018b1ca83eb315df2ac":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"danger","description":"","description_tooltip":null,"layout":"IPY_MODEL_43c853a237dd40db85b5da5718f1a156","max":248731111,"min":0,"orientation":"horizontal","style":"IPY_MODEL_87e6b870f8e342a8b883dcda28a7e972","value":248731088}},"b8939bccb7f84b809774f30df86e5ebc":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_3d74ab9f41f641d1bf3f93b4919e7a9d","placeholder":"","style":"IPY_MODEL_f587ec490939492abe219f5f02d6fec5","value":"249M/249M [00:00&lt;00:00, 412MB/s]"}},"5b85c4e79f0044e1804f098e73b1b923":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1b7ce6af5c154812951d065ad692eb83":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"49892386c1cc4dc9ad807fbbad4c091d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"43c853a237dd40db85b5da5718f1a156":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"87e6b870f8e342a8b883dcda28a7e972":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"3d74ab9f41f641d1bf3f93b4919e7a9d":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"f587ec490939492abe219f5f02d6fec5":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5e777fce2d8c44c690b4ae03a418728b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_1a111e98d9ea40a1936bd45f51e5f604","IPY_MODEL_cf6829a7567c4ae1a1f11a16414b3ebc","IPY_MODEL_3b164c763c664600bb1f1d00a9a989f7"],"layout":"IPY_MODEL_3bb8cb2c1d9e46efa7e5f9d3935e9a30"}},"1a111e98d9ea40a1936bd45f51e5f604":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_9f2bab320ab24b71a38e053c32ecf190","placeholder":"","style":"IPY_MODEL_303a01fe6824463ca6d3b237c9bdd5b2","value":"(…)-00001-of-00004-5852b56a2bd28fd9.parquet: 100%"}},"cf6829a7567c4ae1a1f11a16414b3ebc":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"danger","description":"","description_tooltip":null,"layout":"IPY_MODEL_2910354ce8ea4e6db038fe19f392fad8","max":248171980,"min":0,"orientation":"horizontal","style":"IPY_MODEL_15def3ffba3d456db2259285a7be3635","value":248171957}},"3b164c763c664600bb1f1d00a9a989f7":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_451dc0636375463b93bbe34e2bd3f91b","placeholder":"​","style":"IPY_MODEL_2b2d39b39bf84017a122f6e030e77fb2","value":" 248M/248M [00:00&lt;00:00, 668MB/s]"}},"3bb8cb2c1d9e46efa7e5f9d3935e9a30":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9f2bab320ab24b71a38e053c32ecf190":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"303a01fe6824463ca6d3b237c9bdd5b2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"2910354ce8ea4e6db038fe19f392fad8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"15def3ffba3d456db2259285a7be3635":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"451dc0636375463b93bbe34e2bd3f91b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2b2d39b39bf84017a122f6e030e77fb2":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5634e0c4d6c349ebba40ab2343ec4411":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_d8434ec9303643a68c19f9c5a6dd2ebf","IPY_MODEL_c8f270144730457fba2e4d46fccad38a","IPY_MODEL_ddc4f437d9a24cec897f62cbb7b22ad1"],"layout":"IPY_MODEL_87a5fac2e1d6417e9ca047b2801f9287"}},"d8434ec9303643a68c19f9c5a6dd2ebf":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_1455f70c4d1f4695a98fb0acf062e14e","placeholder":"​","style":"IPY_MODEL_8f45cefe2f7045ec9493096cebba8036","value":"(…)-00002-of-00004-a26307300439e943.parquet: 100%"}},"c8f270144730457fba2e4d46fccad38a":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"danger","description":"","description_tooltip":null,"layout":"IPY_MODEL_419ca851355a42c5b3703e925ac1cc45","max":245894874,"min":0,"orientation":"horizontal","style":"IPY_MODEL_d4124dcbeb2f4f49aa82d604e3029c36","value":245894851}},"ddc4f437d9a24cec897f62cbb7b22ad1":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_001e78622cf84a33890e7e9ed4363210","placeholder":"","style":"IPY_MODEL_d238dfebad4346709a1dcca8341af69b","value":"246M/246M [00:00&lt;00:00, 439MB/s]"}},"87a5fac2e1d6417e9ca047b2801f9287":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"1455f70c4d1f4695a98fb0acf062e14e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8f45cefe2f7045ec9493096cebba8036":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"419ca851355a42c5b3703e925ac1cc45":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d4124dcbeb2f4f49aa82d604e3029c36":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"001e78622cf84a33890e7e9ed4363210":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"d238dfebad4346709a1dcca8341af69b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"da990536aab647d5924e682a3c51ded1":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8ca4bc4c7dcd4698adeae5e5cc5ab8d5","IPY_MODEL_14a51230c0eb44b3bf512515c26f55d3","IPY_MODEL_803fffe7556246459796032dd2acde89"],"layout":"IPY_MODEL_223177953d56420d94fcb6750e361acb"}},"8ca4bc4c7dcd4698adeae5e5cc5ab8d5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_7707a1611a9b4fbcbdc08bac92bcac65","placeholder":"","style":"IPY_MODEL_a6a061a4333f46b59c19a29061f0a19c","value":"(…)-00003-of-00004-d243063613e5a057.parquet: 100%"}},"14a51230c0eb44b3bf512515c26f55d3":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"danger","description":"","description_tooltip":null,"layout":"IPY_MODEL_3beb6a659bae4e82a5b349e97cd63ee1","max":247988350,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c6a183405e614a308917b26f80852784","value":247988327}},"803fffe7556246459796032dd2acde89":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_e51755a55d994e0e8a58ed52757a6a05","placeholder":"​","style":"IPY_MODEL_8a3e33728a1348c9a33297624514675c","value":" 248M/248M [00:00&lt;00:00, 651MB/s]"}},"223177953d56420d94fcb6750e361acb":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"7707a1611a9b4fbcbdc08bac92bcac65":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a6a061a4333f46b59c19a29061f0a19c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3beb6a659bae4e82a5b349e97cd63ee1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c6a183405e614a308917b26f80852784":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"e51755a55d994e0e8a58ed52757a6a05":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8a3e33728a1348c9a33297624514675c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"52ce8b0e35a942118dbbb9ade0890b07":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_0675f9c8719a49e5b5c728cdac6061ef","IPY_MODEL_a5b94699a2914ad1983565864a503a25","IPY_MODEL_a1e39c1df5834fd29206317d69149251"],"layout":"IPY_MODEL_28f44f9e35b943849b58d2238447f82e"}},"0675f9c8719a49e5b5c728cdac6061ef":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_554d8bef71d842948d0322f2d64fc1c3","placeholder":"​","style":"IPY_MODEL_555402dee2ec43078acedee153840b98","value":"(…)-00000-of-00001-869c898b519ad725.parquet: 100%"}},"a5b94699a2914ad1983565864a503a25":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_c65c45c1840849bf81afe75ef5e57b13","max":9989127,"min":0,"orientation":"horizontal","style":"IPY_MODEL_012f1b7b7d6c40beb7e22ff7a2b87e77","value":9989127}},"a1e39c1df5834fd29206317d69149251":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_f906d6f3b1714ff29512ddb24436e8cc","placeholder":"","style":"IPY_MODEL_8de007ad84ea4e91bc553ba48461e41b","value":"9.99M/9.99M [00:00&lt;00:00, 100MB/s]"}},"28f44f9e35b943849b58d2238447f82e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"554d8bef71d842948d0322f2d64fc1c3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"555402dee2ec43078acedee153840b98":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"c65c45c1840849bf81afe75ef5e57b13":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"012f1b7b7d6c40beb7e22ff7a2b87e77":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"f906d6f3b1714ff29512ddb24436e8cc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"8de007ad84ea4e91bc553ba48461e41b":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"b69ef8fa743340a08702ee00c685ea5b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_6160b51c90664150841e4a312d886650","IPY_MODEL_64e91205cc4b4595938c51486d528b7b","IPY_MODEL_11929056139c4209aff3e2cd680bb05b"],"layout":"IPY_MODEL_097b9376c2514c3f9a3202435539310c"}},"6160b51c90664150841e4a312d886650":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_fa05d0ac19c24074b5b2f32382f9afde","placeholder":"","style":"IPY_MODEL_a87be2d63e74433385a7d45f9369ec63","value":"Generating train split: 100%"}},"64e91205cc4b4595938c51486d528b7b":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_5a526d36cd3749a09813f5f0c5623836","max":2119719,"min":0,"orientation":"horizontal","style":"IPY_MODEL_9ecacb9973f345a0bfdbe2d614fc4608","value":2119719}},"11929056139c4209aff3e2cd680bb05b":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_c7d0431afc3548ac91aaa383e53b446e","placeholder":"​","style":"IPY_MODEL_6c35e25d01314dc680d3d7e6e3a03ae0","value":" 2119719/2119719 [00:06&lt;00:00, 336802.54 examples/s]"}},"097b9376c2514c3f9a3202435539310c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"fa05d0ac19c24074b5b2f32382f9afde":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"a87be2d63e74433385a7d45f9369ec63":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"5a526d36cd3749a09813f5f0c5623836":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"9ecacb9973f345a0bfdbe2d614fc4608":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"c7d0431afc3548ac91aaa383e53b446e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6c35e25d01314dc680d3d7e6e3a03ae0":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"3400dd9919f741e6a9089cfe1d575cd8":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_99a728668d5a41f7a23181cda3f17046","IPY_MODEL_f7ccf1070d7f451f8e7701f092265e81","IPY_MODEL_a970b982079e470c8f8435a6fedb4927"],"layout":"IPY_MODEL_4387cfd045f5495bac074054326fc3e0"}},"99a728668d5a41f7a23181cda3f17046":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_0151e6954b544526a38f68863a1261e1","placeholder":"​","style":"IPY_MODEL_624aec19c10549ce9f212a6f826e6833","value":"Generating validation split: 100%"}},"f7ccf1070d7f451f8e7701f092265e81":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_1abacc70390542febd37e71cb1614d8c","max":21990,"min":0,"orientation":"horizontal","style":"IPY_MODEL_6a2e3db7d02049539c2deb1929ac1a60","value":21990}},"a970b982079e470c8f8435a6fedb4927":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_efe03831c24c41da95614179eb8d3c68","placeholder":"","style":"IPY_MODEL_58f6e6a239864fd0aac277d1cd794a65","value":"21990/21990 [00:00&lt;00:00, 290625.90 examples/s]"}},"4387cfd045f5495bac074054326fc3e0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"0151e6954b544526a38f68863a1261e1":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"624aec19c10549ce9f212a6f826e6833":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"1abacc70390542febd37e71cb1614d8c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"6a2e3db7d02049539c2deb1929ac1a60":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"efe03831c24c41da95614179eb8d3c68":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"58f6e6a239864fd0aac277d1cd794a65":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bPQr049JPfLi","executionInfo":{"status":"ok","timestamp":1745724839605,"user_tz":-420,"elapsed":21649,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"82a304a2-19f0-4c65-e1fe-55d993e5636a"},"outputs":[{"output_type":"stream","name":"stdout","text":["Requirement already satisfied: torchtune in /usr/local/lib/python3.11/dist-packages (0.6.1)\n","Requirement already satisfied: torchdata==0.11.0 in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.11.0)\n","Requirement already satisfied: datasets in /usr/local/lib/python3.11/dist-packages (from torchtune) (3.5.0)\n","Requirement already satisfied: huggingface_hub[hf_transfer] in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.30.2)\n","Requirement already satisfied: safetensors in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.5.3)\n","Requirement already satisfied: kagglehub in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.3.11)\n","Requirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.2.0)\n","Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.9.0)\n","Requirement already satisfied: blobfile>=2 in /usr/local/lib/python3.11/dist-packages (from torchtune) (3.0.0)\n","Requirement already satisfied: tokenizers in /usr/local/lib/python3.11/dist-packages (from torchtune) (0.21.1)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from torchtune) (2.0.2)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from torchtune) (4.67.1)\n","Requirement already satisfied: omegaconf in /usr/local/lib/python3.11/dist-packages (from torchtune) (2.3.0)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from torchtune) (5.9.5)\n","Requirement already satisfied: Pillow>=9.4.0 in /usr/local/lib/python3.11/dist-packages (from torchtune) (11.1.0)\n","Requirement already satisfied: urllib3>=1.25 in /usr/local/lib/python3.11/dist-packages (from torchdata==0.11.0->torchtune) (2.3.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from torchdata==0.11.0->torchtune) (2.32.3)\n","Requirement already satisfied: torch>=2 in /usr/local/lib/python3.11/dist-packages (from torchdata==0.11.0->torchtune) (2.6.0+cu124)\n","Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.11/dist-packages (from blobfile>=2->torchtune) (3.22.0)\n","Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.11/dist-packages (from blobfile>=2->torchtune) (5.3.2)\n","Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.11/dist-packages (from blobfile>=2->torchtune) (3.18.0)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (18.1.0)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (2.2.2)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (3.5.0)\n","Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (0.70.16)\n","Requirement already satisfied: fsspec<=2024.12.0,>=2023.1.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets->torchtune) (2024.12.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (3.11.15)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (24.2)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets->torchtune) (6.0.2)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub[hf_transfer]->torchtune) (4.13.2)\n","Requirement already satisfied: hf-transfer>=0.1.4 in /usr/local/lib/python3.11/dist-packages (from huggingface_hub[hf_transfer]->torchtune) (0.1.9)\n","Requirement already satisfied: antlr4-python3-runtime==4.9.* in /usr/local/lib/python3.11/dist-packages (from omegaconf->torchtune) (4.9.3)\n","Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken->torchtune) (2024.11.6)\n","Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (2.6.1)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (1.3.2)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (25.3.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (1.6.0)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (6.4.3)\n","Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (0.3.1)\n","Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->torchtune) (1.20.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->torchdata==0.11.0->torchtune) (3.4.1)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->torchdata==0.11.0->torchtune) (3.10)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->torchdata==0.11.0->torchtune) (2025.1.31)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (3.4.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (3.1.6)\n","Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.127)\n","Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.127)\n","Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (9.1.0.70)\n","Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.5.8)\n","Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (11.2.1.3)\n","Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (10.3.5.147)\n","Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (11.6.1.9)\n","Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.3.1.170)\n","Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (0.6.2)\n","Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (2.21.5)\n","Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.127)\n","Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (12.4.127)\n","Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (3.2.0)\n","Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=2->torchdata==0.11.0->torchtune) (1.13.1)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=2->torchdata==0.11.0->torchtune) (1.3.0)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune) (2025.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->torchtune) (2025.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->torchtune) (1.17.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=2->torchdata==0.11.0->torchtune) (3.0.2)\n","Requirement already satisfied: torchao in /usr/local/lib/python3.11/dist-packages (0.10.0)\n","Requirement already satisfied: wandb in /usr/local/lib/python3.11/dist-packages (0.19.9)\n","Requirement already satisfied: click!=8.0.0,>=7.1 in /usr/local/lib/python3.11/dist-packages (from wandb) (8.1.8)\n","Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (0.4.0)\n","Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (3.1.44)\n","Requirement already satisfied: platformdirs in /usr/local/lib/python3.11/dist-packages (from wandb) (4.3.7)\n","Requirement already satisfied: protobuf!=4.21.0,!=5.28.0,<6,>=3.19.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (5.29.4)\n","Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (5.9.5)\n","Requirement already satisfied: pydantic<3 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.11.3)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.11/dist-packages (from wandb) (6.0.2)\n","Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.32.3)\n","Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from wandb) (2.26.1)\n","Requirement already satisfied: setproctitle in /usr/local/lib/python3.11/dist-packages (from wandb) (1.3.5)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from wandb) (75.2.0)\n","Requirement already satisfied: typing-extensions<5,>=4.4 in /usr/local/lib/python3.11/dist-packages (from wandb) (4.13.2)\n","Requirement already satisfied: six>=1.4.0 in /usr/local/lib/python3.11/dist-packages (from docker-pycreds>=0.4.0->wandb) (1.17.0)\n","Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3->wandb) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic<3->wandb) (2.33.1)\n","Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3->wandb) (0.4.0)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.4.1)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (3.10)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2.3.0)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2.0.0->wandb) (2025.1.31)\n","Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.2)\n"]}],"source":["!pip install torchtune\n","!pip install torchao\n","!pip install wandb\n","\n","\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import math\n","import tqdm\n","from dataclasses import dataclass\n","from torchtune.modules import RMSNorm\n","from tokenizers import Tokenizer\n","from pathlib import Path\n","import torch.multiprocessing as mp\n","from torch.utils.data.distributed import DistributedSampler\n","from torch.nn.parallel import DistributedDataParallel as DDP\n","from torch.distributed import init_process_group, destroy_process_group\n","import wandb\n","from torch.utils.data import DataLoader\n","from datasets import load_dataset, concatenate_datasets"]},{"cell_type":"code","source":["!nvidia-smi"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"86xFUcCOTuf-","executionInfo":{"status":"ok","timestamp":1745724840175,"user_tz":-420,"elapsed":169,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"8c9e36ea-0b03-40e6-e76d-c0b8402b4a9c"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["Sun Apr 27 03:34:00 2025       \n","+-----------------------------------------------------------------------------------------+\n","| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |\n","|-----------------------------------------+------------------------+----------------------+\n","| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n","| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n","|                                         |                        |               MIG M. |\n","|=========================================+========================+======================|\n","|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |\n","| N/A   31C    P0             45W /  400W |       5MiB /  40960MiB |      0%      Default |\n","|                                         |                        |             Disabled |\n","+-----------------------------------------+------------------------+----------------------+\n","                                                                                         \n","+-----------------------------------------------------------------------------------------+\n","| Processes:                                                                              |\n","|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |\n","|        ID   ID                                                               Usage      |\n","|=========================================================================================|\n","|  No running processes found                                                             |\n","+-----------------------------------------------------------------------------------------+\n"]}]},{"cell_type":"code","source":["import wandb\n","from google.colab import userdata\n","\n","wandb.login(key=userdata.get('WANDB'))"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"0kZs2vvFQOjC","executionInfo":{"status":"ok","timestamp":1745724846667,"user_tz":-420,"elapsed":2001,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"3345cb17-3342-4004-8b44-a3d3f053c897"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stderr","text":["\u001b[34m\u001b[1mwandb\u001b[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.\n","\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n","\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n","\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n","\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mlaampt\u001b[0m to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"]},{"output_type":"execute_result","data":{"text/plain":["True"]},"metadata":{},"execution_count":3}]},{"cell_type":"code","source":["import os\n","\n","def setup(rank=None, world_size=None):\n","    # os.environ['MASTER_ADDR'] = 'localhost'\n","    # os.environ['MASTER_PORT'] = '12355'\n","    init_process_group(\"nccl\")\n","\n","def cleanup():\n","    destroy_process_group()"],"metadata":{"id":"JACXqQwtQrta","executionInfo":{"status":"ok","timestamp":1745724848078,"user_tz":-420,"elapsed":6,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","source":["from pathlib import Path\n","data_path = Path('data')\n","data_path.mkdir(exist_ok=True)\n","!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\n","!cp input.txt data/input.txt"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vLrdBLeQRS8T","executionInfo":{"status":"ok","timestamp":1745724850023,"user_tz":-420,"elapsed":494,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"f7147770-4032-4e0b-f052-3251e4752ffd"},"execution_count":5,"outputs":[{"output_type":"stream","name":"stdout","text":["--2025-04-27 03:34:09--  https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt\n","Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n","Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n","HTTP request sent, awaiting response... 200 OK\n","Length: 1115394 (1.1M) [text/plain]\n","Saving to: ‘input.txt’\n","\n","input.txt           100%[===================>]   1.06M  --.-KB/s    in 0.006s  \n","\n","2025-04-27 03:34:09 (167 MB/s) - ‘input.txt’ saved [1115394/1115394]\n","\n"]}]},{"cell_type":"code","source":["# Load model directly\n","from transformers import AutoTokenizer, AutoModelForCausalLM\n","\n","tokenizer = AutoTokenizer.from_pretrained(\"google/gemma-3-27b-it\", token=userdata.get('HF_TOKEN'))\n","tokenizer.add_special_tokens({'pad_token': '[PAD]'})"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":195,"referenced_widgets":["563d2d5cfb4f49f2b58358dbaad3f3f3","eeb28e6ffce34baa872b00720553300d","c93a558c64a449ba896b2e9ead2c4abd","e8234bb489c147058b9254db2586f431","71ff49917bd74a7eb5adfa4a3dba3eb7","f900e38236ed4ae3a3b96b9715ef9516","86092064ea1e4960a8c54afd7158ddcd","509a20eca0374dd395a8c13638c911a2","68156342278a40de86542d87e01bf8d0","765fd4fd7b0541759de6b3bbd73565ed","52f22103ca7642e5b1423bf46ff35c5b","d5c687e1c3bb414fb9f98a670125546f","718cc1a4e6f74798b03b5c0f85a41f75","dbe78729a3d14a1991e942d610d39854","8987e510abb747749908c4f9d23feb1e","c0891ca2464b4bb7b78032033d0e58be","8c08355a6e8d427fa1a6f07628606ff1","81716e19ed424c138e48b32b8480634f","dc09859473914d7e8125365f057b48d8","e03a1d6211c542b0ab7df61d56782e6f","70c5d0b38158480c9f30026b32da8f64","4d6e15d3ffda41ac9dba105dc3718e2e","11aec296d8004b5385d57c48b37e49e2","2ded71d4e5404de39f5cb0b54b4bacfc","3eb9d0198d9f46c398333cd0ee56dc9c","fcf5e713adb0422e964df8f7b18645cf","f07ce4a8a0764bad81153297095d81ca","98b9fae6be374692823365c8f656377e","c4c7941930f64504b1aa127bfeb8183a","722cda0a54f449f99c32f33abecfd5ca","2a30f0c696584d87ae7ff780d579af73","5e47cc11522c45998ecdab75ec314c60","18e69b91c0a34e8d8458f8e41e27b537","bb537d2348754728928a6de84e6a1637","e919b6d661544c879690de70135364a6","2eabed983fa240d0b74b819c32e35a5c","2010f0e2be9d49cd8a07cd8cd7491bcb","6babf02db1b34465beacbff3f60cb0ea","4bef93394bef4243a2d94c099fbd3740","ba95eb0c97dd432eb171a9250cf02bfc","2730d424c73242b58c3f58c8be2c1490","e180eac1d12548ff901d96e87cd84d2b","107c7340420243ae902d64a42201a347","c0c766b557314520b3039844078bd17e","4edc50ee571b41018538a118749ab37c","01ea5245a00b46d194e29124f45596b0","3e7a6aaab6314efdb80af76de5de2676","74af0b7936ff4b74b902cf9489650ec3","05edf850cf164537bf9df336501a0251","2a43ba42c8414d29b51823a7068985dd","d4b28fbe8a654e9e9686824d45ecde79","afacc51fd58d43c69b219df5dbe8b84a","792b4f5f685543dc82c62dcf1a55ecfe","a589cb1f826845f4808cf4fc4ed35dd3","dabeee50c3624c5fba012ba38a0fed2d"]},"id":"g-XcB5APRWMw","executionInfo":{"status":"ok","timestamp":1745724859031,"user_tz":-420,"elapsed":7599,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"75b79cbd-b1af-4103-e008-7693c30a8878"},"execution_count":6,"outputs":[{"output_type":"display_data","data":{"text/plain":["tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"563d2d5cfb4f49f2b58358dbaad3f3f3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"d5c687e1c3bb414fb9f98a670125546f"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"11aec296d8004b5385d57c48b37e49e2"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"bb537d2348754728928a6de84e6a1637"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"4edc50ee571b41018538a118749ab37c"}},"metadata":{}},{"output_type":"execute_result","data":{"text/plain":["1"]},"metadata":{},"execution_count":6}]},{"cell_type":"code","source":["tokenizer.vocab_size"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"fmpGwLh6Rau6","executionInfo":{"status":"ok","timestamp":1745724859041,"user_tz":-420,"elapsed":7,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"b29d8124-2a63-4c4b-c332-5d3bd5dd0de0"},"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["262144"]},"metadata":{},"execution_count":7}]},{"cell_type":"code","source":["@dataclass\n","class ModelArgs:\n","    #Hyperparameters\n","\n","    block_size = 256\n","    batch_size = 64\n","    embeddings_dims = 512\n","    attn_dropout = 0.1\n","    no_of_heads = 8 #IMP needs to be thoroughly calculated\n","    dropout = 0.1\n","    epochs = 100\n","    max_lr = 2.5e-4\n","    no_of_decoder_layers = 6 #IMP needs to be thoroughly calculated\n","    weight_decay_optim = 0.1\n","    beta_1 = 0.9\n","    beta_2 = 0.95\n","    device = 'cuda:0'\n","    no_kv_heads = 2\n","    scaling_factor = 0.5\n","    vocab_size = len(tokenizer.get_vocab()) + 768\n","    local_block_size = 128\n","    base_freq=10000"],"metadata":{"id":"jg3L1bAjSM6f","executionInfo":{"status":"ok","timestamp":1745724860012,"user_tz":-420,"elapsed":13,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":8,"outputs":[]},{"cell_type":"code","source":["#Datasets\n","\n","# Using tinyshakespeare\n","\n","with open('data/input.txt', 'r', encoding='utf-8') as f:\n","    text = f.read()"],"metadata":{"id":"uxX4dpQUSTMO","executionInfo":{"status":"ok","timestamp":1745724860260,"user_tz":-420,"elapsed":5,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["def save_checkpoint(model):\n","    ckp = model.module.state_dict()\n","    torch.save(ckp, \"checkpoint.pt\")\n","    print(\"Checkpoint saved\")"],"metadata":{"id":"JaCKCAhMSaZB","executionInfo":{"status":"ok","timestamp":1745724860715,"user_tz":-420,"elapsed":0,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":10,"outputs":[]},{"cell_type":"code","source":["#Subword level tokenization\n","\n","#Loading custom trained BPE\n","# Load the tokenizer\n","# tokenizer = Tokenizer.from_file(\"data/bpe_tokenizer_tinyshakespeare_1k.json\")\n","# vocab_size = tokenizer.get_vocab_size()\n","# Encode and decode functions\n","# encode = lambda s: tokenizer.encode(s).ids\n","# decode = lambda l: tokenizer.decode(l)\n","\n","\n","\n","\n","\n","###############################################################################\n","#Character level tokenization\n","\n","# # here are all the unique characters that occur in this text\n","chars = sorted(list(set(text)))\n","vocab_size = len(chars)\n","\n","\n","# create a mapping from characters to integers\n","stoi = { ch: i for i,ch in enumerate(chars) }\n","itos = { i:ch for i,ch in enumerate(chars) }\n","encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers\n","decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string\n","\n","\n","# Train and test splits\n","data = torch.tensor(encode(text), dtype=torch.long)\n","n = int(0.9*len(data)) # first 90% will be train, rest val\n","train_data = data[:n]\n","val_data = data[n:]\n","\n","# data loading\n","def get_batch(split):\n","    # generate a small batch of data of inputs x and targets y\n","    data = train_data if split == 'train' else val_data\n","    ix = torch.randint(len(data) - ModelArgs.block_size, (ModelArgs.batch_size,))\n","    x = torch.stack([data[i:i+ModelArgs.block_size] for i in ix])\n","    y = torch.stack([data[i+1:i+ModelArgs.block_size+1] for i in ix])\n","    x, y = x.to(ModelArgs.device), y.to(ModelArgs.device)\n","    return x, y"],"metadata":{"id":"HhNB_QQKSct4","executionInfo":{"status":"ok","timestamp":1745724861056,"user_tz":-420,"elapsed":128,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":11,"outputs":[]},{"cell_type":"code","source":["tinystories = True\n","fw = False\n","fw_train = None\n","fw_test = None\n","if(tinystories):\n","\n","    fw_train = load_dataset(\"roneneldan/TinyStories\", split=\"train\")\n","    fw_test = load_dataset(\"roneneldan/TinyStories\", split=\"validation\")\n","    print(fw_train)\n","    print(fw_test)\n","if(fw):\n","    fw_train = load_dataset(\"HuggingFaceFW/fineweb\", name=\"sample-10BT\", split=\"train\", streaming=False)\n","    fw_train = fw_train.train_test_split(test_size=0.01)\n","    print(fw_train)\n","    print(fw_train)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":416,"referenced_widgets":["cd245abb7c4c4ac5a02c033a726956f3","f4699de85b4842bcbff3809dc82f01ce","64b75b21831246cabfff2138295c1ae0","f4d80d46fbd04cf584a48bce4ba7a75e","ddc3f89efa4e4eb39bdea5e86b1a7154","1d4690c6ddd84ffcb348fd385212f636","279170ca7ea34f1ca9d8fa93db82dede","466d69d5282843758ddb72aeac2ecc35","63ea53cd047249578e9a839c5ee3af88","a55695f08e114b9e8bd5ea74b630b114","73e7a2b6afa94487990dfeb691e5a479","205101af4fe5422b9b06381994944bba","b55f1bc35a1d4233a1572fb67c7776b1","3a2b0619a50a4018b1ca83eb315df2ac","b8939bccb7f84b809774f30df86e5ebc","5b85c4e79f0044e1804f098e73b1b923","1b7ce6af5c154812951d065ad692eb83","49892386c1cc4dc9ad807fbbad4c091d","43c853a237dd40db85b5da5718f1a156","87e6b870f8e342a8b883dcda28a7e972","3d74ab9f41f641d1bf3f93b4919e7a9d","f587ec490939492abe219f5f02d6fec5","5e777fce2d8c44c690b4ae03a418728b","1a111e98d9ea40a1936bd45f51e5f604","cf6829a7567c4ae1a1f11a16414b3ebc","3b164c763c664600bb1f1d00a9a989f7","3bb8cb2c1d9e46efa7e5f9d3935e9a30","9f2bab320ab24b71a38e053c32ecf190","303a01fe6824463ca6d3b237c9bdd5b2","2910354ce8ea4e6db038fe19f392fad8","15def3ffba3d456db2259285a7be3635","451dc0636375463b93bbe34e2bd3f91b","2b2d39b39bf84017a122f6e030e77fb2","5634e0c4d6c349ebba40ab2343ec4411","d8434ec9303643a68c19f9c5a6dd2ebf","c8f270144730457fba2e4d46fccad38a","ddc4f437d9a24cec897f62cbb7b22ad1","87a5fac2e1d6417e9ca047b2801f9287","1455f70c4d1f4695a98fb0acf062e14e","8f45cefe2f7045ec9493096cebba8036","419ca851355a42c5b3703e925ac1cc45","d4124dcbeb2f4f49aa82d604e3029c36","001e78622cf84a33890e7e9ed4363210","d238dfebad4346709a1dcca8341af69b","da990536aab647d5924e682a3c51ded1","8ca4bc4c7dcd4698adeae5e5cc5ab8d5","14a51230c0eb44b3bf512515c26f55d3","803fffe7556246459796032dd2acde89","223177953d56420d94fcb6750e361acb","7707a1611a9b4fbcbdc08bac92bcac65","a6a061a4333f46b59c19a29061f0a19c","3beb6a659bae4e82a5b349e97cd63ee1","c6a183405e614a308917b26f80852784","e51755a55d994e0e8a58ed52757a6a05","8a3e33728a1348c9a33297624514675c","52ce8b0e35a942118dbbb9ade0890b07","0675f9c8719a49e5b5c728cdac6061ef","a5b94699a2914ad1983565864a503a25","a1e39c1df5834fd29206317d69149251","28f44f9e35b943849b58d2238447f82e","554d8bef71d842948d0322f2d64fc1c3","555402dee2ec43078acedee153840b98","c65c45c1840849bf81afe75ef5e57b13","012f1b7b7d6c40beb7e22ff7a2b87e77","f906d6f3b1714ff29512ddb24436e8cc","8de007ad84ea4e91bc553ba48461e41b","b69ef8fa743340a08702ee00c685ea5b","6160b51c90664150841e4a312d886650","64e91205cc4b4595938c51486d528b7b","11929056139c4209aff3e2cd680bb05b","097b9376c2514c3f9a3202435539310c","fa05d0ac19c24074b5b2f32382f9afde","a87be2d63e74433385a7d45f9369ec63","5a526d36cd3749a09813f5f0c5623836","9ecacb9973f345a0bfdbe2d614fc4608","c7d0431afc3548ac91aaa383e53b446e","6c35e25d01314dc680d3d7e6e3a03ae0","3400dd9919f741e6a9089cfe1d575cd8","99a728668d5a41f7a23181cda3f17046","f7ccf1070d7f451f8e7701f092265e81","a970b982079e470c8f8435a6fedb4927","4387cfd045f5495bac074054326fc3e0","0151e6954b544526a38f68863a1261e1","624aec19c10549ce9f212a6f826e6833","1abacc70390542febd37e71cb1614d8c","6a2e3db7d02049539c2deb1929ac1a60","efe03831c24c41da95614179eb8d3c68","58f6e6a239864fd0aac277d1cd794a65"]},"id":"ZPgF3lApSfk1","executionInfo":{"status":"ok","timestamp":1745724879129,"user_tz":-420,"elapsed":18025,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"799588f7-35d5-47a7-d862-6714468c6632"},"execution_count":12,"outputs":[{"output_type":"display_data","data":{"text/plain":["README.md:   0%|          | 0.00/1.06k [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"cd245abb7c4c4ac5a02c033a726956f3"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["(…)-00000-of-00004-2d5a1467fff1081b.parquet:   0%|          | 0.00/249M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"205101af4fe5422b9b06381994944bba"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["(…)-00001-of-00004-5852b56a2bd28fd9.parquet:   0%|          | 0.00/248M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5e777fce2d8c44c690b4ae03a418728b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["(…)-00002-of-00004-a26307300439e943.parquet:   0%|          | 0.00/246M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5634e0c4d6c349ebba40ab2343ec4411"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["(…)-00003-of-00004-d243063613e5a057.parquet:   0%|          | 0.00/248M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"da990536aab647d5924e682a3c51ded1"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["(…)-00000-of-00001-869c898b519ad725.parquet:   0%|          | 0.00/9.99M [00:00<?, ?B/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"52ce8b0e35a942118dbbb9ade0890b07"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating train split:   0%|          | 0/2119719 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"b69ef8fa743340a08702ee00c685ea5b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["Generating validation split:   0%|          | 0/21990 [00:00<?, ? examples/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"3400dd9919f741e6a9089cfe1d575cd8"}},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Dataset({\n","    features: ['text'],\n","    num_rows: 2119719\n","})\n","Dataset({\n","    features: ['text'],\n","    num_rows: 21990\n","})\n"]}]},{"cell_type":"code","source":["def prepare_dataset(split, device, batch_size):\n","    print(\"Device is: \", device)\n","\n","    def collate_fn(batch):\n","        # Extract text data\n","        texts = [item [\"text\"] for item in batch]\n","\n","        input_encodings = tokenizer(texts, max_length = ModelArgs.block_size, padding='max_length', truncation=True, return_tensors=\"pt\")\n","\n","        input_encodings[\"labels\"] = input_encodings[\"input_ids\"].clone()  # Use `input_ids` as labels\n","\n","        input_encodings[\"labels\"][:, :-1] = input_encodings[\"input_ids\"][:, 1:]  # Shift right\n","        input_encodings[\"labels\"][:, -1] = tokenizer.eos_token_id  # Let the last token be end\n","\n","        return input_encodings\n","\n","\n","    dataloader = None\n","    if(tinystories):\n","        if(split == 'train'):\n","            data_loader = DataLoader(\n","            fw_train,\n","            # generator=generator,\n","            batch_size=batch_size,\n","\n","            # sampler=DistributedSampler(fw_train, shuffle=True),\n","            collate_fn=collate_fn,\n","            drop_last=True,\n","            shuffle=False\n","        )\n","        elif(split == 'val'):\n","            data_loader = DataLoader(\n","            fw_test,\n","\n","\n","            batch_size=batch_size,\n","            # sampler=DistributedSampler(fw_test, shuffle=True),\n","            collate_fn=collate_fn,\n","            drop_last=True,\n","            shuffle=False\n","        )\n","    elif(fw):\n","        if(split == 'train'):\n","            data_loader = DataLoader(\n","            fw_train['train'],\n","            batch_size=batch_size,\n","\n","\n","            sampler=DistributedSampler(fw_train['train'], shuffle=True),\n","            collate_fn=collate_fn,\n","            drop_last=True,\n","            shuffle=False\n","    )\n","        elif(split == 'val'):\n","            data_loader = DataLoader(\n","            fw_train['test'],\n","            batch_size=batch_size,\n","                # generator=generator,\n","            sampler=DistributedSampler(fw_train[\"test\"]),\n","            collate_fn=collate_fn,\n","\n","            drop_last=True,\n","            shuffle=False\n","        )\n","    return data_loader"],"metadata":{"id":"GBlTWwgBSha9","executionInfo":{"status":"ok","timestamp":1745724879133,"user_tz":-420,"elapsed":2,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["# from andrej karapathy github\n","def topk_sampling(model, prompt, device, max_length=50, top_k=50, temperature=1.0):\n","    input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)\n","    generated_tokens = []\n","    ModelArgs.inference=True\n","    for _ in range(max_length):\n","        with torch.no_grad(), torch.autocast(device_type=ModelArgs.device, dtype=torch.bfloat16):\n","            outputs = model(input_ids)\n","            logits = outputs[:, -1, :]\n","\n","            probs = F.softmax(logits, dim=-1)\n","\n","            # Top-k filtering\n","            top_k_probs, top_k_indices = torch.topk(probs, top_k, dim=-1)\n","\n","\n","            # Apply temperature scaling\n","            probs = probs / temperature\n","\n","            # Sample from top-k\n","            next_token = torch.multinomial(top_k_probs, num_samples=1)\n","\n","\n","            # generated_tokens.append(next_token.item())\n","\n","            xcol = torch.gather(top_k_indices, -1, next_token)\n","            # generated_tokens.append(xcol)\n","            input_ids = torch.cat([input_ids, xcol], dim=1) #1 because is it the dimension of the sequence\n","\n","    return tokenizer.decode(input_ids[0], skip_special_tokens=True)"],"metadata":{"id":"x1Z8ak6WSkR4","executionInfo":{"status":"ok","timestamp":1745724879135,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":14,"outputs":[]},{"cell_type":"code","source":["class Normalization(nn.Module):\n","    def __init__(\n","        self,\n","        embeddings_dims: int = ModelArgs.embeddings_dims\n","    ):\n","        super().__init__()\n","        self.rmsnorm_layer = RMSNorm(dim=embeddings_dims)\n","\n","\n","    def forward(self, x):\n","\n","        x = self.rmsnorm_layer(x)\n","        return x"],"metadata":{"id":"bruqowU4SnX7","executionInfo":{"status":"ok","timestamp":1745724879137,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":15,"outputs":[]},{"cell_type":"code","source":["# import numpy as np\n","class RotaryEmbeddings(nn.Module):\n","    def __init__(\n","        self,\n","         device,\n","        embeddings_dims: int = ModelArgs.embeddings_dims,\n","        block_size: int = ModelArgs.block_size,\n","        batch_size: int = ModelArgs.batch_size,\n","        scaling_factor: float = 0.5,\n","    ):\n","        super().__init__()\n","\n","        self.embeddings_dims = embeddings_dims\n","        self.block_size = block_size\n","        self.batch_size = batch_size\n","        self.scaling_factor = scaling_factor\n","        self.theta = 0\n","        self.device=device\n","\n","    def apply_rope(self, seq, base_freq):\n","        batch_size, seq_len, embeds_dims = seq.shape\n","        token_indices = torch.arange(0 , seq_len, dtype=torch.float32,  device = self.device).unsqueeze(1)\n","        positions = torch.arange(0 , self.embeddings_dims, 2, dtype=torch.float32,  device = self.device).unsqueeze(0)\n","        theta = base_freq ** (-2 * (positions * self.scaling_factor) / self.embeddings_dims) #Position Interpolation\n","        angles = token_indices * theta\n","        angles = angles.expand(seq_len, -1) # because this thing needs to be applied to every sequence in the batch but with embeds dims halved\n","        x_reshaped = seq.view(batch_size, seq_len, self.embeddings_dims // 2, 2)\n","\n","        cos_angles = torch.cos(angles)\n","        sin_angles = torch.sin(angles)\n","\n","\n","        out = torch.stack([x_reshaped[..., 0]*cos_angles - (x_reshaped[...,1] * sin_angles), x_reshaped[...,1] * cos_angles + x_reshaped[..., 0] * sin_angles], dim=1)\n","        out = out.view(batch_size, seq_len, embeds_dims)\n","        return out\n","\n","    def forward(self, x, base_freq):\n","\n","        res = self.apply_rope(x,base_freq=base_freq)\n","        return res"],"metadata":{"id":"fyfZ2dJ1SpJK","executionInfo":{"status":"ok","timestamp":1745724879138,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":16,"outputs":[]},{"cell_type":"code","source":["class MQA(nn.Module):\n","    def __init__(\n","        self,\n","        device,\n","        no_of_q_heads: int,\n","        embeddings_dims: int = ModelArgs.embeddings_dims,\n","        block_size: int = ModelArgs.block_size,\n","\n","\n","    ):\n","        super().__init__()\n","\n","\n","        # self.no_of_q_heads = no_of_heads // no_of_kv_heads\n","        # self.no_of_q_heads = no_of_q_heads\n","        self.no_of_kv_heads = 2 # I want to have a kv for each pair of query heads\n","        self.head_size = embeddings_dims // no_of_q_heads\n","        # self.kv_head_size = (embeddings_dims // self.no_of_kv_heads) * 2\n","        self.rotary= RotaryEmbeddings(embeddings_dims=self.head_size,  device = device)\n","        # self.rotary_k = RotaryEmbeddings(embeddings_dims=self.kv_head_size,  device = device)\n","        # self.query = nn.Linear(in_features=embeddings_dims, out_features=self.head_size,  bias=False)\n","        self.key = nn.Linear(in_features=embeddings_dims, out_features=self.head_size,  dtype=torch.float32, bias=False,  device = device)\n","        self.value = nn.Linear(in_features=embeddings_dims, out_features=self.head_size,  dtype=torch.float32, bias=False,  device = device)\n","        self.dropout = nn.Dropout(p = ModelArgs.attn_dropout)\n","        self.linear_layer = nn.Linear(in_features=self.head_size * self.no_of_kv_heads, out_features=embeddings_dims,  dtype=torch.float32, bias=False,  device = device)\n","        self.device = device\n","        self.multi_query = nn.ModuleList([nn.Linear(in_features=embeddings_dims, out_features=self.head_size,  bias=False,  device = self.device) for _ in range(self.no_of_kv_heads)])\n","\n","    def scaled_dot_product(self, q, k, v, block_size, base_freq):\n","\n","            # masked = torch.tril(torch.ones((block_size, block_size),  requires_grad=False,  device = self.device))\n","            normalized_q = q * (torch.norm(q, p=2)** -1)\n","            q = self.rotary(normalized_q, base_freq)\n","            masked_table = torch.tril(torch.ones((block_size, block_size),  requires_grad=False,  device = self.device))\n","            # rotary_query = matrix @ q.permute(1,2,0) # (B,T, C,C) @ (B,T,C) -> (B,C,T) = (B,T,C,T)\n","            # rotary_key = matrix @ k.permute(1,2,0)  #  (B,T, C,C  ) @ (B,T,C) -> (B,C,T) = (B,T,C,T)\n","            # print(\"Query: \", q.shape)\n","            # print(\"Keys: \", k.shape)\n","            # print(q.permute(2,0,1).shape)\n","            # print(k.permute(2,0,1).transpose(-2, -1).shape)\n","            # weights = q.permute(2,0,1) @ k.permute(2,0,1).transpose(-2, -1)#(B,T,C,T) @ (B,T,C,T) = (T,C,C,T)\n","            # weights = q @ k.permute(2,1,0)\n","            # print(weights.shape)\n","            # print(masked.shape)\n","            weights = q @ torch.transpose(k, dim0=-2, dim1=-1) * (k.shape[-1] ** -0.5)\n","            masked_values = weights.masked_fill(masked_table[: block_size, : block_size] == 0, float('-inf'))\n","            weights_normalized = nn.functional.softmax(masked_values, dim=-1) #Normalize along the embeddings dimension for all the tokens\n","            weights_normalized = self.dropout(weights_normalized)\n","            out = weights_normalized @ v\n","            return out\n","\n","    def forward(self,x, base_freq=10000):\n","        # print(\"MQA: \", x.shape)\n","        batch, block_size, embeddings_dims = x.shape\n","\n","        # query = self.query(x)\n","        # matrix = self.rotary_matrix(block_size)\n","\n","\n","        key = self.key(x)\n","        key_normalized = key * (torch.norm(key, p=2)** -1)\n","        values = self.value(x)\n","        # print(\"Keys: \", key.shape)\n","        # print(\"Values: \", values.shape)\n","        # rotary_value = self.rotary(values)\n","        rotary_key = self.rotary(key_normalized, base_freq)\n","        multi_query_concat = torch.cat([self.scaled_dot_product(query(x), rotary_key, values, block_size, base_freq) for query in self.multi_query], dim=-1)\n","        # print(\"Multi query: \", multi_query_concat.shape)\n","\n","        linear_layer= self.linear_layer(multi_query_concat)\n","        # out = self.dropout(linear_layer)\n","        return linear_layer"],"metadata":{"id":"hwwhCNGaSsR6","executionInfo":{"status":"ok","timestamp":1745724879147,"user_tz":-420,"elapsed":8,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":17,"outputs":[]},{"cell_type":"code","source":["class GQA(nn.Module):\n","    def __init__(\n","        self,\n","         device,\n","        embeddings_dims: int = ModelArgs.embeddings_dims,\n","        block_size: int = ModelArgs.block_size,\n","        # no_of_q_heads: int = ModelArgs.no_of_heads,\n","        mqa_heads: int = ModelArgs.no_kv_heads\n","    ):\n","        super().__init__()\n","\n","        # self.no_of_kv_heads = no_of_kv_heads\n","        self.no_of_q_heads = ModelArgs.no_of_heads // mqa_heads\n","        # self.head_dim = embeddings_dims // self.no_kv_heads\n","        self.dropout = nn.Dropout(p = ModelArgs.attn_dropout)\n","        self.linear_layer = nn.Linear(in_features=embeddings_dims * self.no_of_q_heads, out_features=embeddings_dims , dtype=torch.float32,  bias=False,  device = device)\n","        self.device = device\n","        self.mqa = nn.ModuleList([MQA(no_of_q_heads=self.no_of_q_heads, embeddings_dims=embeddings_dims, device = self.device, block_size=block_size) for _ in range(self.no_of_q_heads)])\n","        # self.mqa = MQA(no_of_q_heads=self.no_of_q_heads, device=self.device, embeddings_dims=embeddings_dims, block_size=block_size)\n","    def forward(self,x, base_freq):\n","\n","        batch, block_size, embeddings_dims = x.shape\n","\n","        # res = self.mqa(x)\n","        grouped_query_concat = torch.cat([group(x, base_freq) for group in self.mqa], dim=-1)\n","\n","        linear_layer= self.linear_layer(grouped_query_concat) #Basically MQA is made into GQA with no_of_q_heads and this class right here is just to consolidate everything into one\n","        out = self.dropout(linear_layer)\n","        return out"],"metadata":{"id":"nNGyxK33SulA","executionInfo":{"status":"ok","timestamp":1745724879155,"user_tz":-420,"elapsed":8,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":18,"outputs":[]},{"cell_type":"code","source":["class Swish(nn.Module):\n","    def __init__(\n","        self,\n","        device,\n","        block_size: int = ModelArgs.block_size,\n","        embeddings_dims: int = ModelArgs.embeddings_dims\n","    ):\n","        super().__init__()\n","\n","        self.sig = torch.nn.Sigmoid()\n","\n","\n","    def forward(self, x):\n","        swish = x * self.sig(x)\n","\n","        return swish"],"metadata":{"id":"yQK13pbqSwnM","executionInfo":{"status":"ok","timestamp":1745724879160,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":19,"outputs":[]},{"cell_type":"code","source":["class SWiGLU(nn.Module):\n","    def __init__(\n","        self,\n","        device,\n","        block_size: int = ModelArgs.block_size,\n","        embeddings_dims: int = ModelArgs.embeddings_dims\n","    ):\n","        super().__init__()\n","        self.hidden_dims = int(2 * ( 4 * embeddings_dims) / 3)\n","        self.swish = Swish(block_size=block_size, embeddings_dims=embeddings_dims, device=device)\n","        self.linear_layer1 = nn.Linear(in_features=embeddings_dims, out_features=self.hidden_dims,  bias=False, dtype=torch.float32,  device = device)\n","        self.linear_layer2 = nn.Linear(in_features=embeddings_dims, out_features=self.hidden_dims,  bias=False, dtype=torch.float32,  device = device)\n","        self.linear_layer3 = nn.Linear(in_features=self.hidden_dims, out_features=embeddings_dims,  bias=False, dtype=torch.float32,  device = device)\n","\n","\n","\n","\n","    def forward(self, x):\n","        swish_res = self.swish(self.linear_layer1(x))\n","        x_V = self.linear_layer2(x)\n","        res = torch.mul(swish_res, x_V)\n","        out = self.linear_layer3(res)\n","        return out"],"metadata":{"id":"Nr-7R-fwSynV","executionInfo":{"status":"ok","timestamp":1745724879161,"user_tz":-420,"elapsed":0,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":20,"outputs":[]},{"cell_type":"code","source":["class FFN(nn.Module):\n","    def __init__(self,\n","                  device,\n","                  embeddings_dims: int = ModelArgs.embeddings_dims,\n","                  block_size: int = ModelArgs.block_size,\n","                  vocab_size: int = ModelArgs.vocab_size,\n","                   dropout = ModelArgs.dropout\n","\n","                 ):\n","        super().__init__()\n","\n","        self.linear_layer = nn.Linear(in_features=embeddings_dims, out_features=embeddings_dims,  dtype=torch.float32,  device = device)\n","        self.swiglue = SWiGLU(block_size=block_size, embeddings_dims=embeddings_dims,  device = device)\n","        # self.dropout = nn.Dropout(p = dropout)\n","    def forward(self, x):\n","\n","        x = self.swiglue(x)\n","        x = self.linear_layer(x)\n","        # x = self.dropout(x)\n","        return x"],"metadata":{"id":"VUcDxtYdS0zs","executionInfo":{"status":"ok","timestamp":1745724879162,"user_tz":-420,"elapsed":0,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":21,"outputs":[]},{"cell_type":"code","source":["class DecoderLayer(nn.Module):\n","    def __init__(self,\n","                device,\n","                embeddings_dims: int = ModelArgs.embeddings_dims,\n","                dropout = ModelArgs.dropout,\n","                block_size: int = ModelArgs.block_size,\n","                vocab_size: int = ModelArgs.vocab_size,\n","\n","                 ) :\n","        super().__init__()\n","\n","        # self.base_freq = ModelArgs.base_freq\n","        self.feedforward_network = FFN(embeddings_dims=embeddings_dims, block_size=block_size, vocab_size=vocab_size,  device = device)\n","        self.gqa = GQA(embeddings_dims=embeddings_dims, block_size=block_size, mqa_heads=2,  device = device)\n","        # self.norm = Normalization(embeddings_dims=embeddings_dims)\n","        self.norm1 = Normalization(embeddings_dims=embeddings_dims)\n","        self.norm2 = Normalization(embeddings_dims=embeddings_dims)\n","        self.dropout = nn.Dropout(p = dropout)\n","    def forward(self, x, base_freq):\n","\n","        x = x + self.gqa(self.norm1(x), base_freq)\n","        x = x + self.feedforward_network(self.norm2(x))\n","        return x"],"metadata":{"id":"FKz6gL7tS2iw","executionInfo":{"status":"ok","timestamp":1745724879163,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":22,"outputs":[]},{"cell_type":"code","source":["class Gemma(nn.Module):\n","    def __init__(self,\n","                    device,\n","                  embeddings_dims: int = ModelArgs.embeddings_dims,\n","                  no_of_decoder_layers: int = ModelArgs.no_of_decoder_layers,\n","                  block_size: int = ModelArgs.block_size,\n","                  vocab_size: int = ModelArgs.vocab_size,\n","                  dropout = ModelArgs.dropout\n","\n","                 ) :\n","        super().__init__()\n","        self.base_freq = ModelArgs.base_freq\n","        self.embeddings = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embeddings_dims,  dtype=torch.float32,  device = device)\n","        self.decoder = nn.ModuleList(DecoderLayer(embeddings_dims=embeddings_dims, block_size=block_size, vocab_size=vocab_size, dropout=dropout,  device = device) for _ in range(no_of_decoder_layers))\n","        self.linear_layer = nn.Linear(in_features=embeddings_dims, out_features=vocab_size,  dtype=torch.float32,  device = device)\n","        self.dropout = nn.Dropout(p = dropout)\n","        self.norm = Normalization(embeddings_dims)\n","\n","\n","        #weight tying\n","        # self.embeddings.weight = self.linear_layer.weight\n","\n","        self.apply(self._init_weights)\n","\n","    def _init_weights(self, module):\n","            if isinstance(module, nn.Linear):\n","                nn.init.normal_(module.weight, mean=0.0, std=0.02)\n","\n","                if module.bias is not None:\n","                    nn.init.zeros_(module.bias)\n","            elif isinstance(module, nn.Embedding):\n","                nn.init.normal_(module.weight, mean=0.0, std=0.02)\n","\n","\n","\n","    def forward(self, x):\n","        global_base_freq = 100000\n","        local_base_freq = 10000\n","        index = 0\n","        no_of_layers = 0\n","        x = self.embeddings(x)\n","        x = self.dropout(x)\n","        temp = x.clone()\n","        # x = self.decoder(x)\n","        for layer in self.decoder:\n","            if no_of_layers % 5 == 0:\n","                x = layer(x, global_base_freq)\n","                # print(\"x shape: \", x.shape)\n","            else:\n","\n","                local_block = temp[:, : index + ModelArgs.local_block_size, :]\n","                x = layer(local_block, local_base_freq)\n","                index += ModelArgs.local_block_size\n","                # print(\"x shape local: \", x.shape)\n","            no_of_layers += 1\n","        # print(x.shape)\n","        x = self.norm(x)\n","        x = self.linear_layer(x)\n","\n","        return x"],"metadata":{"id":"oE1eU5TLS4kx","executionInfo":{"status":"ok","timestamp":1745724879164,"user_tz":-420,"elapsed":1,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":23,"outputs":[]},{"cell_type":"code","source":["# Instantiating the model\n","# device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n","# device = \"cpu\"\n","# ModelArgs.device = device\n","model = Gemma(embeddings_dims=ModelArgs.embeddings_dims, block_size=ModelArgs.block_size, vocab_size=ModelArgs.vocab_size, dropout=ModelArgs.dropout, device=ModelArgs.device)\n","model = model.to(ModelArgs.device)\n","\n","# model = DDP(model, device_ids=[gpu_ids])"],"metadata":{"id":"wLJmGTUYS60z","executionInfo":{"status":"ok","timestamp":1745724879401,"user_tz":-420,"elapsed":237,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}}},"execution_count":24,"outputs":[]},{"cell_type":"code","source":["!pip install torchinfo"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"j4zLLYm4TC01","executionInfo":{"status":"ok","timestamp":1745724882280,"user_tz":-420,"elapsed":2870,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"de02e25c-d493-4b35-9d1e-a80809931e1d"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting torchinfo\n","  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)\n","Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)\n","Installing collected packages: torchinfo\n","Successfully installed torchinfo-1.8.0\n"]}]},{"cell_type":"code","source":["#Printing a summary of the architecture\n","from torchinfo import summary\n","idx, targets = get_batch('test')\n","idx = idx.to(ModelArgs.device)\n","summary(model=model,\n","        input_data=idx,\n","        # input_size=(ModelArgs.batch_size, ModelArgs.block_size, ModelArgs.embeddings_dims),\n","        col_names=[\"input_size\", \"output_size\", \"num_params\", \"trainable\"],\n","        col_width=20,\n","        row_settings=[\"var_names\"])"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"caS4njY1S-JI","executionInfo":{"status":"ok","timestamp":1745724883090,"user_tz":-420,"elapsed":807,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"40a86a55-905a-4ff1-d6cb-3be85fb90056"},"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/plain":["============================================================================================================================================\n","Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable\n","============================================================================================================================================\n","Gemma (Gemma)                                                [64, 256]            [64, 256, 262914]    --                   True\n","├─Embedding (embeddings)                                     [64, 256]            [64, 256, 512]       134,611,968          True\n","├─Dropout (dropout)                                          [64, 256, 512]       [64, 256, 512]       --                   --\n","├─ModuleList (decoder)                                       --                   --                   --                   True\n","│    └─DecoderLayer (0)                                      [64, 256, 512]       [64, 256, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 256, 512]       [64, 256, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 256, 512]       [64, 256, 512]       2,359,296            True\n","│    └─DecoderLayer (1)                                      [64, 128, 512]       [64, 128, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 128, 512]       [64, 128, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 128, 512]       [64, 128, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 128, 512]       [64, 128, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 128, 512]       [64, 128, 512]       2,359,296            True\n","│    └─DecoderLayer (2)                                      [64, 256, 512]       [64, 256, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 256, 512]       [64, 256, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 256, 512]       [64, 256, 512]       2,359,296            True\n","│    └─DecoderLayer (3)                                      [64, 256, 512]       [64, 256, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 256, 512]       [64, 256, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 256, 512]       [64, 256, 512]       2,359,296            True\n","│    └─DecoderLayer (4)                                      [64, 256, 512]       [64, 256, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 256, 512]       [64, 256, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 256, 512]       [64, 256, 512]       2,359,296            True\n","│    └─DecoderLayer (5)                                      [64, 256, 512]       [64, 256, 512]       --                   True\n","│    │    └─Normalization (norm1)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─GQA (gqa)                                        [64, 256, 512]       [64, 256, 512]       2,621,440            True\n","│    │    └─Normalization (norm2)                            [64, 256, 512]       [64, 256, 512]       512                  True\n","│    │    └─FFN (feedforward_network)                        [64, 256, 512]       [64, 256, 512]       2,359,296            True\n","├─Normalization (norm)                                       [64, 256, 512]       [64, 256, 512]       --                   True\n","│    └─RMSNorm (rmsnorm_layer)                               [64, 256, 512]       [64, 256, 512]       512                  True\n","├─Linear (linear_layer)                                      [64, 256, 512]       [64, 256, 262914]    134,874,882          True\n","============================================================================================================================================\n","Total params: 299,377,922\n","Trainable params: 299,377,922\n","Non-trainable params: 0\n","Total mult-adds (Units.GIGABYTES): 19.16\n","============================================================================================================================================\n","Input size (MB): 0.13\n","Forward/backward pass size (MB): 41361.21\n","Params size (MB): 1197.51\n","Estimated Total Size (MB): 42558.85\n","============================================================================================================================================"]},"metadata":{},"execution_count":26}]},{"cell_type":"code","source":["# import tqdm\n","def train():\n","    # Set device to CUDA if available\n","    device = ModelArgs.device\n","    print(f\"Start running training on {device}.\")\n","\n","    # Initialize wandb for experiment tracking\n","    wandb.init(\n","        project = 'Gemma-Training',\n","        # config = ModelArgs, # you can uncomment this to log model config\n","    )\n","\n","    # Create model and move to GPU\n","    model = Gemma(embeddings_dims=ModelArgs.embeddings_dims, block_size=ModelArgs.block_size,\n","                  vocab_size=ModelArgs.vocab_size, dropout=ModelArgs.dropout, device=device)\n","    model = model.to(device)\n","\n","    print(\"Model loaded\")\n","    # Setup optimizer\n","    optimizer = torch.optim.AdamW(params=model.parameters(), lr=ModelArgs.max_lr)\n","\n","    # Training parameters\n","    save_checkpoint_iter = 500\n","    total_iters = 25000\n","    eval_iters = 500\n","\n","\n","    # Training progress bar\n","    train_epoch_iterator = tqdm.tqdm(range(total_iters), desc=\"Training\")\n","    val_dataloader = prepare_dataset('val', device, ModelArgs.batch_size)\n","    val_iterator = iter(val_dataloader)\n","    # Get batches for training\n","    @torch.inference_mode()\n","    def estimate_loss():\n","        out = {}\n","        model.eval()\n","        count = 0\n","        for split in ['val']:\n","            print(f\"Starting with {split} evaluation...\")\n","            losses = torch.zeros(eval_iters)\n","            for k in range(eval_iters):\n","\n","                nonlocal val_iterator\n","\n","                # for k, batch in enumerate(dataloader):\n","                try:\n","                    batch = next(val_iterator)\n","                except StopIteration:\n","                    val_iterator = iter(val_dataloader)\n","                    batch = next(val_iterator)\n","\n","                input_ids = batch[\"input_ids\"].to(device)\n","                targets = batch[\"labels\"].to(device)\n","\n","                logits = model(input_ids)\n","                batch_size, block_size, embeddings_dims = logits.shape\n","                logits = logits.view(batch_size*block_size, embeddings_dims)\n","                targets = targets.view(batch_size * block_size)\n","                loss = nn.functional.cross_entropy(logits, targets)\n","                losses[k] = loss.item()\n","                # count += 1\n","            out[split] = losses.mean()\n","\n","        model.train()\n","        return out\n","    token_count = 0\n","    # Start training loop\n","    model.train()\n","    print(\"Lessgoo...\")\n","    dataloader = prepare_dataset('train', device, ModelArgs.batch_size)\n","    train_dataloader = iter(dataloader)\n","    accumulated_loss = 0.0\n","    for step in train_epoch_iterator:\n","        # Periodically evaluate loss on train and val sets\n","        if (step % eval_iters == 0 and step != 0) or step == total_iters - 1:\n","            losses = estimate_loss()\n","            avg_val_loss = torch.Tensor([losses['val']]).to(device)\n","            print(f\"step {step}: train loss {accumulated_loss:.4f}, val loss {losses['val']:.4f}\")\n","            val_perplexity = torch.exp(torch.tensor(avg_val_loss)).item()\n","            # Log metrics to wandb\n","            wandb.log({\n","                \"val_perplexity\": val_perplexity,\n","                # \"val_step_loss\": losses['train'],\n","                \"val_step_loss\": losses['val'],\n","                \"step\": step\n","            })\n","\n","        # Save checkpoint periodically\n","        if step % save_checkpoint_iter == 0 and step != 0:\n","            print(f\"Saving the model checkpoint for step: {step}\")\n","            torch.save(model.state_dict(), \"checkpoint.pt\")\n","            print(\"Checkpoint saved\")\n","\n","        # Get batch for training step\n","        try:\n","            batch = next(train_dataloader)\n","        except StopIteration:\n","            train_dataloader = iter(dataloader)\n","            batch = next(train_dataloader)\n","\n","        # for batch in dataloader:\n","        input_ids = batch[\"input_ids\"].to(device)\n","        targets = batch[\"labels\"].to(device)\n","\n","        # Forward pass\n","        logits = model(input_ids)\n","        batch_size, block_size, embeddings_dims = logits.shape\n","        logits = logits.view(batch_size*block_size, embeddings_dims)\n","        targets = targets.view(batch_size * block_size)\n","        loss = nn.functional.cross_entropy(logits, targets)\n","\n","        token_count += (len(input_ids) * ModelArgs.batch_size)\n","\n","        # Backward pass\n","        optimizer.zero_grad(set_to_none=True)\n","        loss.backward()\n","        optimizer.step()\n","        accumulated_loss = loss.item()\n","        perplexity = torch.exp(torch.tensor(accumulated_loss)).item()  # Calculate perplexity\n","        # if(device == 0):\n","        wandb.log({\n","                    # \"Learning Rate\": scheduler.get_last_lr()[0],\n","                    \"Train_Loss\": accumulated_loss,\n","                    # \"Train loss\": loss.item(),\n","                    \"Train Perplexity\": perplexity,\n","                    \"Total Tokens Processed\": token_count,\n","                    \"Step\": step,\n","                    # \"Gradient Norm\": total_norm_before.item(),\n","                    # \"Epoch\": epoch\n","\n","        })\n","\n","        if(step % eval_iters == 0):\n","                prompt = \"Once upon a time \"\n","                generated_text = topk_sampling(model, prompt, max_length=ModelArgs.block_size, top_k=50, temperature=1.0, device=device)\n","\n","\n","                print(f\" Step: {step} | Generated Text: {generated_text}\")\n","\n","    # Finish wandb run\n","    wandb.finish()\n","\n","# Print CUDA device count but won't be using DDP\n","world_size = torch.cuda.device_count()\n","print(f\"CUDA devices available: {world_size}\")\n","train()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":577},"id":"arvDyilXTAk9","executionInfo":{"status":"error","timestamp":1745724893499,"user_tz":-420,"elapsed":1747,"user":{"displayName":"Laam Pham","userId":"04566654796696849937"}},"outputId":"2f308db6-8a2d-4512-caaa-9fd5a69fe167"},"execution_count":27,"outputs":[{"output_type":"stream","name":"stdout","text":["CUDA devices available: 1\n","Start running training on cuda:0.\n"]},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Tracking run with wandb version 0.19.9"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Run data is saved locally in <code>/content/wandb/run-20250427_033451-2l5ijm60</code>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":["Syncing run <strong><a href='https://wandb.ai/laampt/Gemma-Training/runs/2l5ijm60' target=\"_blank\">apricot-frost-2</a></strong> to <a href='https://wandb.ai/laampt/Gemma-Training' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/developer-guide' target=\"_blank\">docs</a>)<br>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View project at <a href='https://wandb.ai/laampt/Gemma-Training' target=\"_blank\">https://wandb.ai/laampt/Gemma-Training</a>"]},"metadata":{}},{"output_type":"display_data","data":{"text/plain":["<IPython.core.display.HTML object>"],"text/html":[" View run at <a href='https://wandb.ai/laampt/Gemma-Training/runs/2l5ijm60' target=\"_blank\">https://wandb.ai/laampt/Gemma-Training/runs/2l5ijm60</a>"]},"metadata":{}},{"output_type":"stream","name":"stdout","text":["Model loaded\n"]},{"output_type":"stream","name":"stderr","text":["\rTraining:   0%|          | 0/25000 [00:00<?, ?it/s]"]},{"output_type":"stream","name":"stdout","text":["Device is:  cuda:0\n","Lessgoo...\n","Device is:  cuda:0\n"]},{"output_type":"stream","name":"stderr","text":["\rTraining:   0%|          | 0/25000 [00:00<?, ?it/s]\n"]},{"output_type":"error","ename":"OutOfMemoryError","evalue":"CUDA out of memory. Tried to allocate 16.05 GiB. GPU 0 has a total capacity of 39.56 GiB of which 5.13 GiB is free. Process 26277 has 34.42 GiB memory in use. Of the allocated memory 21.05 GiB is allocated by PyTorch, and 12.88 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)","\u001b[0;32m<ipython-input-27-0bb2f39bb694>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[0mworld_size\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    145\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"CUDA devices available: {world_size}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 146\u001b[0;31m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m","\u001b[0;32m<ipython-input-27-0bb2f39bb694>\u001b[0m in \u001b[0;36mtrain\u001b[0;34m()\u001b[0m\n\u001b[1;32m    108\u001b[0m         \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mblock_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0membeddings_dims\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m         \u001b[0mtargets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtargets\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_size\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mblock_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 110\u001b[0;31m         \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcross_entropy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    111\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    112\u001b[0m         \u001b[0mtoken_count\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_ids\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mModelArgs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/functional.py\u001b[0m in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[0m\n\u001b[1;32m   3492\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0msize_average\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mreduce\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3493\u001b[0m         \u001b[0mreduction\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_Reduction\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlegacy_get_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msize_average\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3494\u001b[0;31m     return torch._C._nn.cross_entropy_loss(\n\u001b[0m\u001b[1;32m   3495\u001b[0m         \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3496\u001b[0m         \u001b[0mtarget\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 16.05 GiB. GPU 0 has a total capacity of 39.56 GiB of which 5.13 GiB is free. Process 26277 has 34.42 GiB memory in use. Of the allocated memory 21.05 GiB is allocated by PyTorch, and 12.88 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"]}]},{"cell_type":"code","source":[],"metadata":{"id":"KHAc8sCWTMXy"},"execution_count":null,"outputs":[]}]}