{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "1dc105e0a29149ba8b646f0e46bba8db": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_afd89e11dd7546d4949321da8a5e27ff", "IPY_MODEL_4cf46922d8cb4faf82275b6a723fa78c", "IPY_MODEL_f50fc1856ff94bb28ab1b47050503e9c" ], "layout": "IPY_MODEL_0844c41065fb4d82a886cf020e7c6b58" } }, "afd89e11dd7546d4949321da8a5e27ff": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9e1336e21abe47c28a08407d7fc04cae", "placeholder": "​", "style": "IPY_MODEL_9ea0bf46bac04c82b97a795a6bba829f", "value": "config.json: 100%" } }, "4cf46922d8cb4faf82275b6a723fa78c": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_72138e774ff44ad6a9c5c1cfa0b54113", "max": 7338, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_151749c129f84de8a35ea6a04aed7f26", "value": 7338 } }, "f50fc1856ff94bb28ab1b47050503e9c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_db25ab7af40c4475acc5b815e837404e", "placeholder": "​", "style": "IPY_MODEL_000e4c425a834befb256ac2811ddd944", "value": " 7.34k/7.34k [00:00<00:00, 394kB/s]" } }, "0844c41065fb4d82a886cf020e7c6b58": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9e1336e21abe47c28a08407d7fc04cae": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9ea0bf46bac04c82b97a795a6bba829f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "72138e774ff44ad6a9c5c1cfa0b54113": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "151749c129f84de8a35ea6a04aed7f26": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "db25ab7af40c4475acc5b815e837404e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "000e4c425a834befb256ac2811ddd944": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "084508f9d92f4f6d9240013fdc8997ca": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c3e31f57c1b54affba9263a51a538609", "IPY_MODEL_c3df42f2e57b47c08e182297a5e92bfd", "IPY_MODEL_85669881366d44a7b355887ebbef3c23" ], "layout": "IPY_MODEL_f37cf46fdb154251ab601551be42d4a8" } }, "c3e31f57c1b54affba9263a51a538609": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a4958a323880496f84c0cda36ed0fafd", "placeholder": "​", "style": "IPY_MODEL_2795d1dfe834418c9335a65284129a4f", "value": "model.safetensors: 100%" } }, "c3df42f2e57b47c08e182297a5e92bfd": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e345836890264e929b6a465d7bf20b78", "max": 3751321772, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_0d82a674efaa4a7a94de1c3308fe2d56", "value": 3751321772 } }, "85669881366d44a7b355887ebbef3c23": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_907e757aec2d48fcb61bd277fcd4fb71", "placeholder": "​", "style": "IPY_MODEL_57e0019906214ffb952ab6ffdefda922", "value": " 3.75G/3.75G [00:29<00:00, 180MB/s]" } }, "f37cf46fdb154251ab601551be42d4a8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a4958a323880496f84c0cda36ed0fafd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2795d1dfe834418c9335a65284129a4f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e345836890264e929b6a465d7bf20b78": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0d82a674efaa4a7a94de1c3308fe2d56": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "907e757aec2d48fcb61bd277fcd4fb71": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "57e0019906214ffb952ab6ffdefda922": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ff0e9084f6ea47d9aa48a7cd7277f151": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f3ea3ae08746479fb0cf4d180b175302", "IPY_MODEL_f804716c88cc47238790c01a30ba73df", "IPY_MODEL_c6ea52dffc5b4b4e8471830d0f6fd69a" ], "layout": "IPY_MODEL_f61266500648479388516acda6f54d39" } }, "f3ea3ae08746479fb0cf4d180b175302": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_bdf3fc82e04844f0aa4465d70c667d25", "placeholder": "​", "style": "IPY_MODEL_e1c9a766de204d379a9c02a5bd6d5e10", "value": "generation_config.json: 100%" } }, "f804716c88cc47238790c01a30ba73df": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_87bdf4a01aaa4983bd906a9c2b8be9db", "max": 223, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_827fcc11fc8f4a1287ea417068c338b5", "value": 223 } }, "c6ea52dffc5b4b4e8471830d0f6fd69a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0b83eb781952498db57f06cbdd356d85", "placeholder": "​", "style": "IPY_MODEL_08979837c80940c0a168e6e2f1ec8ccc", "value": " 223/223 [00:00<00:00, 17.0kB/s]" } }, "f61266500648479388516acda6f54d39": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bdf3fc82e04844f0aa4465d70c667d25": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e1c9a766de204d379a9c02a5bd6d5e10": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "87bdf4a01aaa4983bd906a9c2b8be9db": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "827fcc11fc8f4a1287ea417068c338b5": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0b83eb781952498db57f06cbdd356d85": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "08979837c80940c0a168e6e2f1ec8ccc": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7d7f88b27323461da0a20f2c98632cf1": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c851f10d02eb4516a9f45faaf1b72ad0", "IPY_MODEL_7fc666136dce4732941f37e5ad4bb667", "IPY_MODEL_dc48d1e2d5404b0db407abd817855ee7" ], "layout": "IPY_MODEL_389f8686300249e1b7d2057e4b803e76" } }, "c851f10d02eb4516a9f45faaf1b72ad0": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7396e06208ad4bd1aeb471497856d184", "placeholder": "​", "style": "IPY_MODEL_942d523bbc14450eb4f26b096cb31dbe", "value": "tokenizer_config.json: 100%" } }, "7fc666136dce4732941f37e5ad4bb667": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0e78c9d3d8f943b596a1769073153877", "max": 990, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_f0c9de96a49e49339acd5295a27949e3", "value": 990 } }, "dc48d1e2d5404b0db407abd817855ee7": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_48fbad129de24aa7bbd24956ed093e7d", "placeholder": "​", "style": "IPY_MODEL_120bf0b214a441d9beddf5cc2d29672c", "value": " 990/990 [00:00<00:00, 77.0kB/s]" } }, "389f8686300249e1b7d2057e4b803e76": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7396e06208ad4bd1aeb471497856d184": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "942d523bbc14450eb4f26b096cb31dbe": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0e78c9d3d8f943b596a1769073153877": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f0c9de96a49e49339acd5295a27949e3": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "48fbad129de24aa7bbd24956ed093e7d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "120bf0b214a441d9beddf5cc2d29672c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ddc0650fe60d47ebb0534bb08f6d35dc": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_55cd8af6e6ba4e9f9fa5d8d7526f13ff", "IPY_MODEL_789a26caac5c4ddaae5a172a12d824fe", "IPY_MODEL_79aa723e35e745ecb9fbd57a0946e24c" ], "layout": "IPY_MODEL_89958b4c86024acd9b0141078a05d264" } }, "55cd8af6e6ba4e9f9fa5d8d7526f13ff": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_737a64890f99408fa5daa51934df792e", "placeholder": "​", "style": "IPY_MODEL_3dc95c4ce88640c8a72440a17d75950b", "value": "tokenizer.model: 100%" } }, "789a26caac5c4ddaae5a172a12d824fe": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b7e00e0de7ad46ae8228b95f8de3eeab", "max": 1795391, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_950ce1075335428d9a419c7ad662a72a", "value": 1795391 } }, "79aa723e35e745ecb9fbd57a0946e24c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8e0eda96a9144ac995117007aafb7973", "placeholder": "​", "style": "IPY_MODEL_a53bffe501c344bfb888c269e6105311", "value": " 1.80M/1.80M [00:00<00:00, 72.8MB/s]" } }, "89958b4c86024acd9b0141078a05d264": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "737a64890f99408fa5daa51934df792e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3dc95c4ce88640c8a72440a17d75950b": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b7e00e0de7ad46ae8228b95f8de3eeab": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "950ce1075335428d9a419c7ad662a72a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8e0eda96a9144ac995117007aafb7973": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a53bffe501c344bfb888c269e6105311": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e0d722e8eefb47bfa733d04acb7dfe26": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0ec3130f76354e1d88e9c6cf1684dfef", "IPY_MODEL_be62cee63ea24543926adc3ef1931964", "IPY_MODEL_00ffc95bbd7a41daace497357350b9f6" ], "layout": "IPY_MODEL_dcf8deffa17c4949a0c520d4a2e88ba7" } }, "0ec3130f76354e1d88e9c6cf1684dfef": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1f0ca07f78994acfaf73a21ac6024ad3", "placeholder": "​", "style": "IPY_MODEL_70e05ab4620040f39595d7f5f01c942c", "value": "tokenizer.json: 100%" } }, "be62cee63ea24543926adc3ef1931964": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5eb3464a5da64ee0afaf937ad9fede13", "max": 10272460, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ab0600de0bc744c2961951c81f8f3464", "value": 10272460 } }, "00ffc95bbd7a41daace497357350b9f6": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_811541ee9b314868aea0492c07e24686", "placeholder": "​", "style": "IPY_MODEL_f5f75b0f29bd40a3bc7a2f577847563f", "value": " 10.3M/10.3M [00:01<00:00, 9.57MB/s]" } }, "dcf8deffa17c4949a0c520d4a2e88ba7": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1f0ca07f78994acfaf73a21ac6024ad3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "70e05ab4620040f39595d7f5f01c942c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5eb3464a5da64ee0afaf937ad9fede13": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ab0600de0bc744c2961951c81f8f3464": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "811541ee9b314868aea0492c07e24686": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f5f75b0f29bd40a3bc7a2f577847563f": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9ce18a4480e54ade87cab05118ff7b6c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8a2a6a206b494ad3894905ee1ef36b3c", "IPY_MODEL_288d3a7309fe42ae9142c1b41af6211e", "IPY_MODEL_3224681a0e154e65905f67064caa9711" ], "layout": "IPY_MODEL_994961ff43444351ba3dd79f394ec752" } }, "8a2a6a206b494ad3894905ee1ef36b3c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_19cbd35e15674da490dbf595931e74c8", "placeholder": "​", "style": "IPY_MODEL_78598a2a81c94a7eab378539f6219d82", "value": "special_tokens_map.json: 100%" } }, "288d3a7309fe42ae9142c1b41af6211e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0ecfbeae600e4495be8480be32af73db", "max": 552, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_f891346ff14b43179c5cc0fedbe3bcc6", "value": 552 } }, "3224681a0e154e65905f67064caa9711": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6af2379bb3284ecf927aa0ff0c953826", "placeholder": "​", "style": "IPY_MODEL_a47f614043b84baab2763b1bde12d7bf", "value": " 552/552 [00:00<00:00, 59.0kB/s]" } }, "994961ff43444351ba3dd79f394ec752": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "19cbd35e15674da490dbf595931e74c8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "78598a2a81c94a7eab378539f6219d82": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0ecfbeae600e4495be8480be32af73db": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f891346ff14b43179c5cc0fedbe3bcc6": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6af2379bb3284ecf927aa0ff0c953826": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a47f614043b84baab2763b1bde12d7bf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "source": [ "# @title 0. Installations\n", "# Install necessary packages (run this cell once if needed)\n", "\n", "!pip install gradio torch torchvision torchaudio openai-whisper soundfile parler-tts transformers google-generativeai numpy librosa flash-attn" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "l7kKPw9qgBll", "outputId": "cc6b0841-ef47-46b6-eaa1-7d0a9dd4a793" }, "execution_count": 1, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting gradio\n", " Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (0.21.0+cu124)\n", "Requirement already satisfied: torchaudio in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", "Collecting openai-whisper\n", " Downloading openai-whisper-20240930.tar.gz (800 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m800.5/800.5 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: soundfile in /usr/local/lib/python3.11/dist-packages (0.13.1)\n", "Collecting parler-tts\n", " Downloading parler_tts-0.2.3.tar.gz (80 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n", "Requirement already satisfied: google-generativeai in /usr/local/lib/python3.11/dist-packages (0.8.5)\n", "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (2.0.2)\n", "Requirement already satisfied: librosa in /usr/local/lib/python3.11/dist-packages (0.11.0)\n", "Collecting flash-attn\n", " Downloading flash_attn-2.7.4.post1.tar.gz (6.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting aiofiles<25.0,>=22.0 (from gradio)\n", " Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.9.0)\n", "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", " Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)\n", "Collecting ffmpy (from gradio)\n", " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", "Collecting gradio-client==1.10.1 (from gradio)\n", " Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)\n", "Collecting groovy~=0.1 (from gradio)\n", " Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)\n", "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.31.2)\n", "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n", "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.0.2)\n", "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.18)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n", "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n", "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.2.1)\n", "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.11.4)\n", "Collecting pydub (from gradio)\n", " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", "Collecting python-multipart>=0.0.18 (from gradio)\n", " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n", "Collecting ruff>=0.9.3 (from gradio)\n", " Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", "Collecting semantic-version~=2.0 (from gradio)\n", " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", " Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)\n", "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.15.3)\n", "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.13.2)\n", "Collecting uvicorn>=0.14.0 (from gradio)\n", " Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (2025.3.2)\n", "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (15.0.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.18.0)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", "Requirement already satisfied: numba in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (0.60.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (4.67.1)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (10.7.0)\n", "Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (0.9.0)\n", "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.11/dist-packages (from soundfile) (1.17.1)\n", "Collecting transformers\n", " Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.1/44.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (from parler-tts) (0.2.0)\n", "Collecting descript-audio-codec-unofficial (from parler-tts)\n", " Downloading descript_audio_codec_unofficial-1.0.0.tar.gz (24 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting descript-audiotools-unofficial (from parler-tts)\n", " Downloading descript_audiotools_unofficial-0.7.4.tar.gz (100 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.7/100.7 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: protobuf>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from parler-tts) (5.29.4)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", "Collecting tokenizers<0.21,>=0.20 (from transformers)\n", " Downloading tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", "Requirement already satisfied: google-ai-generativelanguage==0.6.15 in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (0.6.15)\n", "Requirement already satisfied: google-api-core in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.24.2)\n", "Requirement already satisfied: google-api-python-client in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.169.0)\n", "Requirement already satisfied: google-auth>=2.15.0 in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.38.0)\n", "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.11/dist-packages (from google-ai-generativelanguage==0.6.15->google-generativeai) (1.26.1)\n", "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.11/dist-packages (from librosa) (3.0.1)\n", "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.15.3)\n", "Requirement already satisfied: scikit-learn>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.6.1)\n", "Requirement already satisfied: joblib>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.5.0)\n", "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (4.4.2)\n", "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.8.2)\n", "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.11/dist-packages (from librosa) (0.5.0.post1)\n", "Requirement already satisfied: lazy_loader>=0.1 in /usr/local/lib/python3.11/dist-packages (from librosa) (0.4)\n", "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.1.0)\n", "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from flash-attn) (0.8.1)\n", "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.0->soundfile) (2.22)\n", "Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.56.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core->google-generativeai) (1.70.0)\n", "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (5.5.2)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (0.4.2)\n", "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (4.9.1)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.4.26)\n", "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.9)\n", "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.16.0)\n", "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba->openai-whisper) (0.43.0)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from pooch>=1.1->librosa) (4.3.8)\n", "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)\n", "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (2.33.2)\n", "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.4.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.2)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.1.0->librosa) (3.6.0)\n", "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.2.0)\n", "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n", "Collecting argbind>=0.3.7 (from descript-audio-codec-unofficial->parler-tts)\n", " Downloading argbind-0.3.9.tar.gz (17 kB)\n", " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting pyloudnorm (from descript-audiotools-unofficial->parler-tts)\n", " Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)\n", "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (6.5.2)\n", "Collecting julius (from descript-audiotools-unofficial->parler-tts)\n", " Downloading julius-0.2.7.tar.gz (59 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.6/59.6 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: ipython in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (7.34.0)\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (3.10.0)\n", "Collecting pystoi (from descript-audiotools-unofficial->parler-tts)\n", " Downloading pystoi-0.4.1-py2.py3-none-any.whl.metadata (4.0 kB)\n", "Collecting torch_stoi (from descript-audiotools-unofficial->parler-tts)\n", " Downloading torch_stoi-0.2.3-py3-none-any.whl.metadata (3.6 kB)\n", "Collecting flatten-dict (from descript-audiotools-unofficial->parler-tts)\n", " Downloading flatten_dict-0.4.2-py2.py3-none-any.whl.metadata (9.2 kB)\n", "Collecting markdown2 (from descript-audiotools-unofficial->parler-tts)\n", " Downloading markdown2-2.5.3-py3-none-any.whl.metadata (2.1 kB)\n", "Collecting randomname (from descript-audiotools-unofficial->parler-tts)\n", " Downloading randomname-0.2.1.tar.gz (64 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.2/64.2 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Collecting protobuf>=4.0.0 (from parler-tts)\n", " Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", "Requirement already satisfied: tensorboard in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (2.18.0)\n", "Requirement already satisfied: httplib2<1.0.0,>=0.19.0 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (0.22.0)\n", "Requirement already satisfied: google-auth-httplib2<1.0.0,>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (0.2.0)\n", "Requirement already satisfied: uritemplate<5,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (4.1.1)\n", "Requirement already satisfied: docstring-parser in /usr/local/lib/python3.11/dist-packages (from argbind>=0.3.7->descript-audio-codec-unofficial->parler-tts) (0.16)\n", "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai) (1.71.0)\n", "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai) (1.71.0)\n", "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /usr/local/lib/python3.11/dist-packages (from httplib2<1.0.0,>=0.19.0->google-api-python-client->google-generativeai) (3.2.3)\n", "Requirement already satisfied: pyasn1<0.7.0,>=0.6.1 in /usr/local/lib/python3.11/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=2.15.0->google-generativeai) (0.6.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.1)\n", "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (75.2.0)\n", "Collecting jedi>=0.16 (from ipython->descript-audiotools-unofficial->parler-tts)\n", " Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)\n", "Requirement already satisfied: pickleshare in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.7.5)\n", "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (5.7.1)\n", "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (3.0.51)\n", "Requirement already satisfied: backcall in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.2.0)\n", "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.1.7)\n", "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (4.9.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (1.3.2)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (4.58.0)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (1.4.8)\n", "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.11/dist-packages (from pyloudnorm->descript-audiotools-unofficial->parler-tts) (1.0.0)\n", "Collecting fire (from randomname->descript-audiotools-unofficial->parler-tts)\n", " Downloading fire-0.7.0.tar.gz (87 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (1.4.0)\n", "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (3.8)\n", "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (0.7.2)\n", "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (3.1.3)\n", "INFO: pip is looking at multiple versions of grpcio-status to determine which version is compatible with other requirements. This could take a while.\n", "Collecting grpcio-status<2.0.dev0,>=1.33.2 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai)\n", " Downloading grpcio_status-1.70.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.69.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.68.1-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.68.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.67.1-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.67.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.66.2-py3-none-any.whl.metadata (1.1 kB)\n", "INFO: pip is still looking at multiple versions of grpcio-status to determine which version is compatible with other requirements. This could take a while.\n", " Downloading grpcio_status-1.66.1-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.66.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.65.5-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.65.4-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.65.2-py3-none-any.whl.metadata (1.1 kB)\n", "INFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n", " Downloading grpcio_status-1.65.1-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.64.3-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.64.1-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.64.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.63.2-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.63.0-py3-none-any.whl.metadata (1.1 kB)\n", " Downloading grpcio_status-1.62.3-py3-none-any.whl.metadata (1.3 kB)\n", "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.11/dist-packages (from jedi>=0.16->ipython->descript-audiotools-unofficial->parler-tts) (0.8.4)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.11/dist-packages (from pexpect>4.3->ipython->descript-audiotools-unofficial->parler-tts) (0.7.0)\n", "Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython->descript-audiotools-unofficial->parler-tts) (0.2.13)\n", "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->randomname->descript-audiotools-unofficial->parler-tts) (3.1.0)\n", "Downloading gradio-5.30.0-py3-none-any.whl (54.2 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 MB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading gradio_client-1.10.1-py3-none-any.whl (323 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.1/323.1 kB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m92.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m58.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading transformers-4.46.1-py3-none-any.whl (10.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.0/10.0 MB\u001b[0m \u001b[31m132.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading aiofiles-24.1.0-py3-none-any.whl (15 kB)\n", "Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading groovy-0.1.2-py3-none-any.whl (14 kB)\n", "Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", "Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m100.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", "Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", "Downloading starlette-0.46.2-py3-none-any.whl (72 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m83.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", "Downloading uvicorn-0.34.2-py3-none-any.whl (62 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", "Downloading flatten_dict-0.4.2-py2.py3-none-any.whl (9.7 kB)\n", "Downloading markdown2-2.5.3-py3-none-any.whl (48 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.5/48.5 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n", "Downloading pystoi-0.4.1-py2.py3-none-any.whl (8.2 kB)\n", "Downloading torch_stoi-0.2.3-py3-none-any.whl (8.1 kB)\n", "Downloading grpcio_status-1.62.3-py3-none-any.whl (14 kB)\n", "Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: openai-whisper, parler-tts, flash-attn, descript-audio-codec-unofficial, descript-audiotools-unofficial, argbind, julius, randomname, fire\n", " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803404 sha256=cd5f700c94815efdc68bd77072a7f440edf76ce136ec56fd066eb5e0bc6fa119\n", " Stored in directory: /root/.cache/pip/wheels/2f/f2/ce/6eb23db4091d026238ce76703bd66da60b969d70bcc81d5d3a\n", " Building wheel for parler-tts (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for parler-tts: filename=parler_tts-0.2.3-py3-none-any.whl size=81609 sha256=be391f71bef44fa8593fa11a7f0c57454726ed568307e20f4b3030313cfcb0cd\n", " Stored in directory: /root/.cache/pip/wheels/bb/94/4a/89d2bbf31af3caa0b79ef47a0823224b7e2ddae2febc10920b\n", " Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for flash-attn: filename=flash_attn-2.7.4.post1-cp311-cp311-linux_x86_64.whl size=187831595 sha256=58853b28a5a926cae14402bfd8d4d93a45ebf8f9e79533f37ab09d0d77a99c05\n", " Stored in directory: /root/.cache/pip/wheels/3d/88/d8/284b89f56af7d5bf366b10d6b8e251ac8a7c7bf3f04203fb4f\n", " Building wheel for descript-audio-codec-unofficial (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for descript-audio-codec-unofficial: filename=descript_audio_codec_unofficial-1.0.0-py3-none-any.whl size=27053 sha256=b1fb6e45123b5c5c473b08ecc87d859b8aa3bf8435ea81448ee418545db3c3c1\n", " Stored in directory: /root/.cache/pip/wheels/c4/18/5f/17a643fe763770d2451bb1ee893c188fe5680288c28c238fcf\n", " Building wheel for descript-audiotools-unofficial (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for descript-audiotools-unofficial: filename=descript_audiotools_unofficial-0.7.4-py2.py3-none-any.whl size=108048 sha256=ca8df6790be6b8ff67089d00e1153baef98cb15c575330f7ad8986ea5ba6ec16\n", " Stored in directory: /root/.cache/pip/wheels/e6/ed/8f/fec8ded5f11f4b4a0bfd716b7516b7a09a9aacaf5d19acb3da\n", " Building wheel for argbind (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for argbind: filename=argbind-0.3.9-py2.py3-none-any.whl size=11730 sha256=3daaedabc8350dfc3081472a99e21ee172756a00bd24d5c3bbe3ea028febb74b\n", " Stored in directory: /root/.cache/pip/wheels/36/3a/34/e858fa3cf5f8c33a040734efcc17e95cb5cfd99c256a7fcecf\n", " Building wheel for julius (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for julius: filename=julius-0.2.7-py3-none-any.whl size=21870 sha256=3d270315f111d7e918f02c638970614e44f36494c4bd87c8c9b2c13aa2333bca\n", " Stored in directory: /root/.cache/pip/wheels/16/15/d4/edd724cefe78050a6ba3344b8b0c6672db829a799dbb9f81ff\n", " Building wheel for randomname (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for randomname: filename=randomname-0.2.1-py3-none-any.whl size=89194 sha256=c1068a2a33e81ff31497cb97664c0c41eaeb63d922a3c04f4538bb4bb21e792c\n", " Stored in directory: /root/.cache/pip/wheels/99/b3/ae/c137ed34d7c385b74ae440b4f008183264ebe466ea0341db09\n", " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=97d18f069a923dacda67ea757052748600a3fcc711d281d3767e92fc60b56dfd\n", " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", "Successfully built openai-whisper parler-tts flash-attn descript-audio-codec-unofficial descript-audiotools-unofficial argbind julius randomname fire\n", "Installing collected packages: pydub, uvicorn, tomlkit, semantic-version, ruff, python-multipart, protobuf, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, markdown2, jedi, groovy, flatten-dict, fire, ffmpy, argbind, aiofiles, starlette, randomname, pystoi, pyloudnorm, nvidia-cusparse-cu12, nvidia-cudnn-cu12, tokenizers, safehttpx, nvidia-cusolver-cu12, grpcio-status, gradio-client, fastapi, transformers, gradio, openai-whisper, julius, flash-attn, torch_stoi, descript-audiotools-unofficial, descript-audio-codec-unofficial, parler-tts\n", " Attempting uninstall: protobuf\n", " Found existing installation: protobuf 5.29.4\n", " Uninstalling protobuf-5.29.4:\n", " Successfully uninstalled protobuf-5.29.4\n", " Attempting uninstall: nvidia-nvjitlink-cu12\n", " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", " Attempting uninstall: nvidia-curand-cu12\n", " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", " Attempting uninstall: nvidia-cufft-cu12\n", " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", " Attempting uninstall: nvidia-cuda-runtime-cu12\n", " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", " Attempting uninstall: nvidia-cuda-cupti-cu12\n", " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", " Attempting uninstall: nvidia-cublas-cu12\n", " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", " Attempting uninstall: nvidia-cusparse-cu12\n", " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", " Attempting uninstall: nvidia-cudnn-cu12\n", " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", " Attempting uninstall: tokenizers\n", " Found existing installation: tokenizers 0.21.1\n", " Uninstalling tokenizers-0.21.1:\n", " Successfully uninstalled tokenizers-0.21.1\n", " Attempting uninstall: nvidia-cusolver-cu12\n", " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", " Attempting uninstall: grpcio-status\n", " Found existing installation: grpcio-status 1.71.0\n", " Uninstalling grpcio-status-1.71.0:\n", " Successfully uninstalled grpcio-status-1.71.0\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.51.3\n", " Uninstalling transformers-4.51.3:\n", " Successfully uninstalled transformers-4.51.3\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "ydf 0.11.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 4.25.7 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed aiofiles-24.1.0 argbind-0.3.9 descript-audio-codec-unofficial-1.0.0 descript-audiotools-unofficial-0.7.4 fastapi-0.115.12 ffmpy-0.5.0 fire-0.7.0 flash-attn-2.7.4.post1 flatten-dict-0.4.2 gradio-5.30.0 gradio-client-1.10.1 groovy-0.1.2 grpcio-status-1.62.3 jedi-0.19.2 julius-0.2.7 markdown2-2.5.3 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 openai-whisper-20240930 parler-tts-0.2.3 protobuf-4.25.7 pydub-0.25.1 pyloudnorm-0.1.1 pystoi-0.4.1 python-multipart-0.0.20 randomname-0.2.1 ruff-0.11.10 safehttpx-0.1.6 semantic-version-2.10.0 starlette-0.46.2 tokenizers-0.20.3 tomlkit-0.13.2 torch_stoi-0.2.3 transformers-4.46.1 uvicorn-0.34.2\n" ] } ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "Ld89BbIoap8b" }, "outputs": [], "source": [ "# @title 1. Setup and Imports\n", "\n", "import asyncio\n", "import base64\n", "import io\n", "import logging\n", "import os\n", "from threading import Thread, Event\n", "import time\n", "import queue # For streamer's queue.Empty exception\n", "\n", "import soundfile as sf\n", "import torch\n", "import whisper\n", "from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer\n", "from transformers import AutoTokenizer, GenerationConfig as HFGeLE\n", "import google.generativeai as genai\n", "# from google.colab import userdata\n", "# from flash_attn_triton import FlashAttention\n", "\n", "import gradio as gr\n", "\n", "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", "WHISPER_MODEL_SIZE = \"tiny\"\n", "TTS_MODEL_NAME = \"ai4bharat/indic-parler-tts\"\n", "attention_implementation = \"sdpa\" # Also try with flash_attention_2\n", "GEMINI_MODEL_NAME_NOTEBOOK = \"gemini-1.5-flash-latest\"\n", "GOOGLE_API_KEY = \"AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8\"\n", "torch_dtype_tts = torch.bfloat16 if DEVICE == \"cuda\" and torch.cuda.is_bf16_supported() else (torch.float16 if DEVICE == \"cuda\" else torch.float32)\n", "torch_dtype_whisper = torch.float16 if DEVICE == \"cuda\" else torch.float32\n", "# userdata.get('GOOGLE_API_KEY')\n", "\n", "TTS_STREAMING_PARAMS_NOTEBOOK = {\n", " \"do_sample\": True,\n", " \"temperature\": 1.0,\n", " \"min_new_tokens\": 5,\n", "}\n", "\n", "# --- Logging ---\n", "logging.basicConfig(level=logging.INFO)\n", "logger_nb = logging.getLogger(\"notebook_ai_pipeline\") # Use a specific logger for the notebook\n", "logger_nb.setLevel(logging.INFO)\n", "\n", "\n", "# --- Global Model Variables for Notebook ---\n", "whisper_model_nb = None\n", "gemini_model_instance_nb = None\n", "tts_model_nb = None\n", "tts_tokenizer_nb = None" ] }, { "cell_type": "code", "source": [ "# @title 2. Model Loading Functions\n", "def load_all_resources_notebook():\n", " global whisper_model_nb, tts_model_nb, tts_tokenizer_nb, gemini_model_instance_nb\n", " logger_nb.info(f\"Notebook: Loading models. Whisper on {DEVICE} with {torch_dtype_whisper}, TTS on {DEVICE} with {torch_dtype_tts}\")\n", "\n", " if whisper_model_nb is None:\n", " logger_nb.info(f\"Notebook: Loading Whisper model: {WHISPER_MODEL_SIZE}\")\n", " whisper_model_nb = whisper.load_model(WHISPER_MODEL_SIZE, device=DEVICE)\n", " logger_nb.info(\"Notebook: Whisper model loaded successfully.\")\n", "\n", " if tts_model_nb is None:\n", " logger_nb.info(f\"Notebook: Loading IndicParler-TTS model: {TTS_MODEL_NAME}\")\n", " tts_model_nb = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL_NAME).to(DEVICE, dtype=torch_dtype_tts)\n", " tts_tokenizer_nb = AutoTokenizer.from_pretrained(TTS_MODEL_NAME)\n", " if tts_tokenizer_nb and tts_tokenizer_nb.pad_token_id is not None:\n", " TTS_STREAMING_PARAMS_NOTEBOOK[\"pad_token_id\"] = tts_tokenizer_nb.pad_token_id\n", " logger_nb.info(f\"Notebook: IndicParler-TTS model loaded. Streaming params: {TTS_STREAMING_PARAMS_NOTEBOOK}\")\n", "\n", " if gemini_model_instance_nb is None:\n", " if not GOOGLE_API_KEY or GOOGLE_API_KEY != \"AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8\":\n", " logger_nb.warning(\"Notebook: GOOGLE_API_KEY not found or not replaced. LLM functionality will be limited.\")\n", " else:\n", " try:\n", " genai.configure(api_key=GOOGLE_API_KEY)\n", " gemini_model_instance_nb = genai.GenerativeModel(GEMINI_MODEL_NAME_NOTEBOOK)\n", " logger_nb.info(f\"Notebook: Gemini API configured with model: {GEMINI_MODEL_NAME_NOTEBOOK}\")\n", " except Exception as e:\n", " logger_nb.error(f\"Notebook: Failed to configure Gemini API: {e}\", exc_info=True)\n", " gemini_model_instance_nb = None\n", " logger_nb.info(\"Notebook: All resources loaded (or attempted).\")" ], "metadata": { "id": "T176tmyQ0DC8" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 3. Helper Functions for AI Pipeline\n", "async def transcribe_audio_notebook(audio_input_tuple):\n", " if not whisper_model_nb:\n", " logger_nb.error(\"Notebook STT: Whisper model not loaded.\")\n", " return \"Error: Whisper model not loaded.\"\n", "\n", " if audio_input_tuple is None:\n", " logger_nb.warning(\"Notebook STT: No audio provided.\")\n", " return \"No audio provided.\"\n", "\n", " sample_rate, audio_numpy = audio_input_tuple\n", "\n", " if audio_numpy is None or audio_numpy.size == 0:\n", " logger_nb.warning(\"Notebook STT: Audio numpy array is empty.\")\n", " return \"Empty audio received.\"\n", "\n", " # Ensure audio is mono float32, which is a common expectation\n", " if audio_numpy.ndim > 1:\n", " if audio_numpy.shape[0] == 2 and audio_numpy.ndim == 2:\n", " audio_numpy = librosa.to_mono(audio_numpy)\n", " elif audio_numpy.shape[1] == 2 and audio_numpy.ndim == 2:\n", " audio_numpy = np.mean(audio_numpy, axis=1)\n", "\n", " if audio_numpy.dtype != np.float32:\n", " if np.issubdtype(audio_numpy.dtype, np.integer):\n", " audio_numpy = audio_numpy.astype(np.float32) / np.iinfo(audio_numpy.dtype).max\n", " else:\n", " audio_numpy = audio_numpy.astype(np.float32)\n", "\n", " # Whisper expects audio at its own sampling rate (usually 16kHz, but it handles resampling)\n", " # However, providing it correctly can sometimes be better.\n", " # For simplicity, we let Whisper handle resampling from the input sample_rate.\n", "\n", " try:\n", " logger_nb.info(f\"Notebook STT: Transcribing audio of shape {audio_numpy.shape} with original sample rate {sample_rate}\")\n", " # Whisper's transcribe method can take a numpy array directly\n", " result = whisper_model_nb.transcribe(audio_numpy, sample_rate=sample_rate, fp16=(DEVICE == \"cuda\" and torch_dtype_whisper == torch.float16))\n", " transcribed_text = result[\"text\"].strip()\n", " logger_nb.info(f\"Notebook STT: Transcription: {transcribed_text}\")\n", " return transcribed_text if transcribed_text else \"Transcription resulted in empty text.\"\n", " except Exception as e:\n", " logger_nb.error(f\"Notebook STT: Error during transcription: {e}\", exc_info=True)\n", " return f\"Error during transcription: {str(e)}\"\n", "\n", "async def generate_gemini_response_notebook(text: str):\n", " if not gemini_model_instance_nb:\n", " logger_nb.error(\"Notebook LLM: Gemini model instance not available.\")\n", " return \"Sorry, the language model is currently unavailable (Gemini not configured).\"\n", " if not isinstance(text, str) or not text.strip() or text.startswith(\"Error:\") or \"No audio provided\" in text or \"Empty audio\" in text:\n", " logger_nb.warning(f\"Notebook LLM: Invalid input for Gemini: '{text}'. Skipping.\")\n", " return \"LLM (Gemini) skipped due to prior error or no input.\"\n", " try:\n", " full_prompt = f\"User: {text}\\nAssistant:\"\n", " logger_nb.info(f\"Notebook LLM: Sending prompt to Gemini: \\\"{full_prompt[:100]}...\\\"\")\n", "\n", " # Running blocking IO in an executor for async compatibility in notebook if needed\n", " loop = asyncio.get_event_loop()\n", " response = await loop.run_in_executor(None, gemini_model_instance_nb.generate_content, full_prompt)\n", "\n", " response_text = \"I'm sorry, I couldn't generate a response for that (Gemini).\"\n", " if hasattr(response, 'text') and response.text:\n", " response_text = response.text.strip()\n", " elif hasattr(response, 'parts') and response.parts:\n", " response_text = \"\".join(part.text for part in response.parts).strip()\n", " elif response.candidates and response.candidates[0].content.parts:\n", " response_text = response.candidates[0].content.parts[0].text.strip()\n", " else:\n", " safety_feedback = \"\"\n", " if hasattr(response, 'prompt_feedback') and response.prompt_feedback:\n", " safety_feedback = f\" Safety Feedback: {response.prompt_feedback}\"\n", " elif response.candidates and hasattr(response.candidates[0], 'finish_reason') and response.candidates[0].finish_reason != \"STOP\":\n", " safety_feedback = f\" Finish Reason: {response.candidates[0].finish_reason}\"\n", " logger_nb.warning(f\"Notebook LLM: Gemini response might be empty or blocked.{safety_feedback}\")\n", " logger_nb.info(f\"Notebook LLM: Gemini Response: {response_text}\")\n", " return response_text\n", " except Exception as e:\n", " logger_nb.error(f\"Notebook LLM: Error during Gemini generation: {e}\", exc_info=True)\n", " return f\"Sorry, I encountered an error trying to respond with Gemini: {str(e)}\"\n", "\n", "async def synthesize_speech_streaming_notebook(text: str, description: str = \"A clear, female voice speaking in English.\", play_steps_in_s: float = 0.4):\n", " if not tts_model_nb or not tts_tokenizer_nb:\n", " logger_nb.error(\"Notebook TTS: Model or tokenizer not loaded.\")\n", " yield None, None # Yield None for sample_rate, None for chunk\n", " return\n", "\n", " if not isinstance(text, str) or not text.strip() or text.startswith(\"Error:\") or \"LLM skipped\" in text or \"unavailable\" in text:\n", " logger_nb.warning(f\"Notebook TTS: Invalid input text for TTS: '{text}'. Yielding no audio.\")\n", " yield None, None\n", " return\n", "\n", " streamer = None\n", " thread = None\n", " # This event is to signal the main loop that the generation thread is done/exited\n", " thread_done_event = Event()\n", "\n", " try:\n", " logger_nb.info(f\"Notebook TTS Streamer: Starting for text: \\\"{text[:50]}...\\\"\")\n", "\n", " if hasattr(tts_model_nb.config, 'audio_encoder') and hasattr(tts_model_nb.config.audio_encoder, 'sampling_rate'):\n", " sampling_rate = tts_model_nb.config.audio_encoder.sampling_rate\n", " else:\n", " logger_nb.warning(\"Notebook TTS Streamer: Could not find sampling_rate, defaulting to 24000\")\n", " sampling_rate = 24000\n", "\n", " try:\n", " frame_rate = getattr(tts_model_nb.config.audio_encoder, 'frame_rate', 100)\n", " except AttributeError:\n", " logger_nb.warning(\"Notebook TTS Streamer: frame_rate not found, using default of 100 Hz.\")\n", " frame_rate = 100\n", "\n", " play_steps = int(frame_rate * play_steps_in_s)\n", " if play_steps == 0 : play_steps = 1\n", "\n", " logger_nb.info(f\"Notebook TTS Streamer: params: sampling_rate={sampling_rate}, frame_rate={frame_rate}, play_steps={play_steps}\")\n", "\n", " streamer = ParlerTTSStreamer(tts_model_nb, device=DEVICE, play_steps=play_steps)\n", " description_inputs = tts_tokenizer_nb(description, return_tensors=\"pt\")\n", " prompt_inputs = tts_tokenizer_nb(text, return_tensors=\"pt\")\n", "\n", " current_streaming_params = TTS_STREAMING_PARAMS_NOTEBOOK.copy()\n", " if tts_tokenizer_nb.pad_token_id is not None: # Ensure pad_token_id is set if model expects it\n", " current_streaming_params[\"pad_token_id\"] = tts_tokenizer_nb.pad_token_id\n", "\n", " thread_generation_kwargs = {\n", " \"input_ids\": description_inputs.input_ids.to(DEVICE),\n", " \"prompt_input_ids\": prompt_inputs.input_ids.to(DEVICE),\n", " \"attention_mask\": description_inputs.attention_mask.to(DEVICE) if hasattr(description_inputs, 'attention_mask') else None,\n", " \"streamer\": streamer,\n", " **current_streaming_params\n", " }\n", " if thread_generation_kwargs[\"attention_mask\"] is None:\n", " del thread_generation_kwargs[\"attention_mask\"]\n", "\n", " def _generate_in_thread_notebook():\n", " try:\n", " logger_nb.info(f\"Notebook TTS generation thread: Started for text \\\"{text[:30]}...\\\"\")\n", " with torch.no_grad():\n", " tts_model_nb.generate(**thread_generation_kwargs)\n", " logger_nb.info(f\"Notebook TTS generation thread: Finished model.generate() for text \\\"{text[:30]}...\\\"\")\n", " except Exception as e_thread:\n", " logger_nb.error(f\"Notebook TTS generation thread: Error: {e_thread}\", exc_info=True)\n", " finally:\n", " if streamer: streamer.end()\n", " logger_nb.info(f\"Notebook TTS generation thread: Called streamer.end() for text \\\"{text[:30]}...\\\"\")\n", " thread_done_event.set()\n", "\n", " thread = Thread(target=_generate_in_thread_notebook)\n", " thread.daemon = True\n", " thread.start()\n", "\n", " loop = asyncio.get_event_loop() # Get current loop for notebook\n", " streamer_iter_count = 0\n", "\n", " # Yield sample rate once at the beginning\n", " yield sampling_rate, None # Signal sample rate, no audio chunk yet\n", "\n", " while not thread_done_event.is_set():\n", " audio_chunk_tensor = None\n", " try:\n", " logger_nb.debug(f\"Notebook TTS Streamer: Attempting to get chunk {streamer_iter_count}...\")\n", " # Use run_in_executor for the blocking queue get\n", " audio_chunk_tensor = await loop.run_in_executor(None, lambda: streamer.audio_queue.get(timeout=0.1)) # Shorter timeout\n", "\n", " if audio_chunk_tensor is None:\n", " logger_nb.info(\"Notebook TTS Streamer: Yielded None, assuming end from producer.\")\n", " # thread_done_event might not be set yet if this None is the final sentinel\n", " if not streamer.is_active: # Check if streamer itself thinks it's done\n", " break\n", " continue # If streamer is active but yielded None, it's unusual, maybe wait\n", "\n", " if not isinstance(audio_chunk_tensor, torch.Tensor) or audio_chunk_tensor.numel() == 0:\n", " logger_nb.debug(\"Notebook TTS Streamer: Yielded empty or non-tensor chunk.\")\n", " await asyncio.sleep(0.01) # Brief sleep\n", " continue\n", "\n", " audio_chunk_np = audio_chunk_tensor.cpu().to(torch.float32).numpy().squeeze()\n", " if audio_chunk_np.size == 0:\n", " continue\n", "\n", " # For Gradio, we yield the numpy array directly with the sample rate\n", " yield None, audio_chunk_np # No sample rate on subsequent chunks, only audio\n", " streamer_iter_count += 1\n", "\n", " except queue.Empty: # Timeout from streamer.audio_queue.get\n", " logger_nb.debug(\"Notebook TTS Streamer: Queue empty, checking thread status.\")\n", " if not thread.is_alive() and not thread_done_event.is_set(): # Thread died before signaling\n", " logger_nb.warning(\"Notebook TTS Streamer: Generation thread died unexpectedly. Ending stream.\")\n", " thread_done_event.set()\n", " continue # Loop again to check thread_done_event\n", " except StopIteration: # Should not happen if streamer.end() is used correctly by thread\n", " logger_nb.info(\"Notebook TTS Streamer: Finished (StopIteration).\")\n", " thread_done_event.set()\n", " break\n", " except Exception as e_stream_iter:\n", " logger_nb.error(f\"Notebook TTS Streamer: Error iterating streamer: {e_stream_iter}\", exc_info=True)\n", " thread_done_event.set()\n", " break\n", "\n", " logger_nb.info(f\"Notebook TTS Streamer: Finished iteration. Yielded {streamer_iter_count} chunks.\")\n", "\n", " except Exception as e:\n", " logger_nb.error(f\"Notebook TTS Streamer: Error in main function: {e}\", exc_info=True)\n", " yield None, None\n", " finally:\n", " logger_nb.info(f\"Notebook TTS Streamer: Exiting for text \\\"{text[:50]}...\\\".\")\n", " if streamer: streamer.end()\n", " if thread and thread.is_alive():\n", " logger_nb.info(\"Notebook TTS Streamer: Waiting for thread in finally...\")\n", " thread.join(timeout=2.0)\n", " if thread.is_alive():\n", " logger_nb.warning(\"Notebook TTS Streamer: Thread still alive after join timeout.\")\n", " yield None, None # Signal end of stream" ], "metadata": { "id": "DFYsuydk0Lxm" }, "execution_count": 4, "outputs": [] }, { "cell_type": "code", "source": [ "# @title 4. Gradio Interface Definition and Pipeline\n", "\n", "# Load models once when this cell is run (or re-run)\n", "if whisper_model_nb is None or tts_model_nb is None or gemini_model_instance_nb is None: # Basic check to prevent re-loading if cell is run multiple times\n", " load_all_resources_notebook()\n", "\n", "async def full_ai_pipeline_notebook(audio_input_microphone):\n", " \"\"\"\n", " Gradio function that processes audio input through STT, LLM (Gemini),\n", " and yields streaming TTS audio chunks for Gradio's streaming audio output.\n", " \"\"\"\n", " logger_nb.info(\"Gradio Pipeline: Started.\")\n", "\n", " # 1. STT\n", " if audio_input_microphone is None:\n", " yield \"Please provide audio input.\", \"Recording not provided.\", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Empty audio\n", " return\n", "\n", " transcribed_text = await transcribe_audio_notebook(audio_input_microphone)\n", " logger_nb.info(f\"Gradio Pipeline: Transcription: {transcribed_text}\")\n", " yield transcribed_text, \"Processing with LLM...\", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI\n", "\n", " # 2. LLM (Gemini)\n", " if transcribed_text.startswith(\"Error:\") or \"No audio provided\" in transcribed_text or \"Empty audio\" in transcribed_text:\n", " llm_response_text = \"Cannot proceed with LLM due to STT error.\"\n", " else:\n", " llm_response_text = await generate_gemini_response_notebook(transcribed_text)\n", " logger_nb.info(f\"Gradio Pipeline: LLM Response: {llm_response_text}\")\n", " yield transcribed_text, llm_response_text, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI\n", "\n", " # 3. TTS Streaming\n", " if llm_response_text.startswith(\"Error:\") or \"LLM skipped\" in llm_response_text or \"unavailable\" in llm_response_text:\n", " logger_nb.warning(\"Gradio Pipeline: Skipping TTS due to LLM error.\")\n", " final_llm_text_with_tts_status = f\"{llm_response_text} (TTS Skipped)\"\n", " yield transcribed_text, final_llm_text_with_tts_status, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32))\n", " return\n", "\n", " tts_description = \"A clear, female voice speaking in English.\"\n", "\n", " # For Gradio's streaming audio output, we yield (sample_rate, chunk_np_array)\n", " # The first yield should be (sample_rate, None) to set the rate.\n", " # Subsequent yields are (None, chunk_np_array).\n", " # Final yield is (None, None) to signal end.\n", "\n", " first_chunk = True\n", " async for sr, audio_chunk_np in synthesize_speech_streaming_notebook(llm_response_text, tts_description):\n", " if sr is not None and first_chunk: # First yield with sample rate\n", " logger_nb.info(f\"Gradio Pipeline: TTS Stream - Yielding sample rate {sr}\")\n", " yield transcribed_text, llm_response_text, (sr, None)\n", " first_chunk = False\n", " elif audio_chunk_np is not None: # Subsequent audio chunks\n", " logger_nb.debug(f\"Gradio Pipeline: TTS Stream - Yielding audio chunk of shape {audio_chunk_np.shape}\")\n", " yield transcribed_text, llm_response_text, (None, audio_chunk_np)\n", " elif sr is None and audio_chunk_np is None and not first_chunk: # End of stream signal from generator\n", " logger_nb.info(\"Gradio Pipeline: TTS Stream - Signalling end of stream.\")\n", " yield transcribed_text, llm_response_text, (None, None)\n", " break\n", "\n", " logger_nb.info(\"Gradio Pipeline: Finished.\")\n", "\n", "\n", "# Define Gradio Interface\n", "# Ensure this cell is run after defining all functions and loading models.\n", "with gr.Blocks(title=\"Notebook Conversational AI\") as demo_notebook:\n", " gr.Markdown(\"# Conversational AI in Jupyter Notebook\")\n", " gr.Markdown(\"Uses AI4Bharat IndicParler-TTS (Streaming), Gemini LLM, and Whisper STT.\")\n", "\n", " with gr.Row():\n", " mic_input = gr.Audio(sources=[\"microphone\"], type=\"numpy\", label=\"Speak Here\", streaming=False)\n", " # `streaming=True` for gr.Audio input is for continuous input,\n", " # not directly related to output streaming here. We process after recording stops.\n", "\n", " submit_button = gr.Button(\"Process Speech\")\n", "\n", " with gr.Accordion(\"Conversation Log\", open=True):\n", " stt_output = gr.Textbox(label=\"You Said (Transcription)\", lines=2, interactive=False)\n", " llm_output = gr.Textbox(label=\"Assistant's Response (Text)\", lines=4, interactive=False)\n", " # For streaming audio output in Gradio, the component itself handles accumulation\n", " tts_audio_output = gr.Audio(label=\"Assistant's Speech (Streaming)\", streaming=True, autoplay=False)\n", "\n", " submit_button.click(\n", " fn=full_ai_pipeline_notebook,\n", " inputs=[mic_input],\n", " outputs=[stt_output, llm_output, tts_audio_output]\n", " )" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "1dc105e0a29149ba8b646f0e46bba8db", "afd89e11dd7546d4949321da8a5e27ff", "4cf46922d8cb4faf82275b6a723fa78c", "f50fc1856ff94bb28ab1b47050503e9c", "0844c41065fb4d82a886cf020e7c6b58", "9e1336e21abe47c28a08407d7fc04cae", "9ea0bf46bac04c82b97a795a6bba829f", "72138e774ff44ad6a9c5c1cfa0b54113", "151749c129f84de8a35ea6a04aed7f26", "db25ab7af40c4475acc5b815e837404e", "000e4c425a834befb256ac2811ddd944", "084508f9d92f4f6d9240013fdc8997ca", "c3e31f57c1b54affba9263a51a538609", "c3df42f2e57b47c08e182297a5e92bfd", "85669881366d44a7b355887ebbef3c23", "f37cf46fdb154251ab601551be42d4a8", "a4958a323880496f84c0cda36ed0fafd", "2795d1dfe834418c9335a65284129a4f", "e345836890264e929b6a465d7bf20b78", "0d82a674efaa4a7a94de1c3308fe2d56", "907e757aec2d48fcb61bd277fcd4fb71", "57e0019906214ffb952ab6ffdefda922", "ff0e9084f6ea47d9aa48a7cd7277f151", "f3ea3ae08746479fb0cf4d180b175302", "f804716c88cc47238790c01a30ba73df", "c6ea52dffc5b4b4e8471830d0f6fd69a", "f61266500648479388516acda6f54d39", "bdf3fc82e04844f0aa4465d70c667d25", "e1c9a766de204d379a9c02a5bd6d5e10", "87bdf4a01aaa4983bd906a9c2b8be9db", "827fcc11fc8f4a1287ea417068c338b5", "0b83eb781952498db57f06cbdd356d85", "08979837c80940c0a168e6e2f1ec8ccc", "7d7f88b27323461da0a20f2c98632cf1", "c851f10d02eb4516a9f45faaf1b72ad0", "7fc666136dce4732941f37e5ad4bb667", "dc48d1e2d5404b0db407abd817855ee7", "389f8686300249e1b7d2057e4b803e76", "7396e06208ad4bd1aeb471497856d184", "942d523bbc14450eb4f26b096cb31dbe", "0e78c9d3d8f943b596a1769073153877", "f0c9de96a49e49339acd5295a27949e3", "48fbad129de24aa7bbd24956ed093e7d", "120bf0b214a441d9beddf5cc2d29672c", "ddc0650fe60d47ebb0534bb08f6d35dc", "55cd8af6e6ba4e9f9fa5d8d7526f13ff", "789a26caac5c4ddaae5a172a12d824fe", "79aa723e35e745ecb9fbd57a0946e24c", "89958b4c86024acd9b0141078a05d264", "737a64890f99408fa5daa51934df792e", "3dc95c4ce88640c8a72440a17d75950b", "b7e00e0de7ad46ae8228b95f8de3eeab", "950ce1075335428d9a419c7ad662a72a", "8e0eda96a9144ac995117007aafb7973", "a53bffe501c344bfb888c269e6105311", "e0d722e8eefb47bfa733d04acb7dfe26", "0ec3130f76354e1d88e9c6cf1684dfef", "be62cee63ea24543926adc3ef1931964", "00ffc95bbd7a41daace497357350b9f6", "dcf8deffa17c4949a0c520d4a2e88ba7", "1f0ca07f78994acfaf73a21ac6024ad3", "70e05ab4620040f39595d7f5f01c942c", "5eb3464a5da64ee0afaf937ad9fede13", "ab0600de0bc744c2961951c81f8f3464", "811541ee9b314868aea0492c07e24686", "f5f75b0f29bd40a3bc7a2f577847563f", "9ce18a4480e54ade87cab05118ff7b6c", "8a2a6a206b494ad3894905ee1ef36b3c", "288d3a7309fe42ae9142c1b41af6211e", "3224681a0e154e65905f67064caa9711", "994961ff43444351ba3dd79f394ec752", "19cbd35e15674da490dbf595931e74c8", "78598a2a81c94a7eab378539f6219d82", "0ecfbeae600e4495be8480be32af73db", "f891346ff14b43179c5cc0fedbe3bcc6", "6af2379bb3284ecf927aa0ff0c953826", "a47f614043b84baab2763b1bde12d7bf" ] }, "id": "fkIzYB790HF7", "outputId": "81578f07-edc2-4185-a7fb-dccea98d1227" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "INFO:notebook_ai_pipeline:Notebook: Loading models. Whisper on cuda with torch.float16, TTS on cuda with torch.bfloat16\n", "INFO:notebook_ai_pipeline:Notebook: Loading Whisper model: tiny\n", "100%|█████████████████████████████████████| 72.1M/72.1M [00:01<00:00, 50.2MiB/s]\n", "INFO:notebook_ai_pipeline:Notebook: Whisper model loaded successfully.\n", "INFO:notebook_ai_pipeline:Notebook: Loading IndicParler-TTS model: ai4bharat/indic-parler-tts\n", "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "config.json: 0%| | 0.00/7.34k [00:00 is overwritten by shared text_encoder config: T5Config {\n", " \"_name_or_path\": \"google/flan-t5-large\",\n", " \"architectures\": [\n", " \"T5ForConditionalGeneration\"\n", " ],\n", " \"classifier_dropout\": 0.0,\n", " \"d_ff\": 2816,\n", " \"d_kv\": 64,\n", " \"d_model\": 1024,\n", " \"decoder_start_token_id\": 0,\n", " \"dense_act_fn\": \"gelu_new\",\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"gated-gelu\",\n", " \"initializer_factor\": 1.0,\n", " \"is_encoder_decoder\": true,\n", " \"is_gated_act\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"n_positions\": 512,\n", " \"num_decoder_layers\": 24,\n", " \"num_heads\": 16,\n", " \"num_layers\": 24,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_max_distance\": 128,\n", " \"relative_attention_num_buckets\": 32,\n", " \"tie_word_embeddings\": false,\n", " \"transformers_version\": \"4.46.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n", "WARNING:parler_tts.modeling_parler_tts:Config of the audio_encoder: is overwritten by shared audio_encoder config: DacConfig {\n", " \"_name_or_path\": \"ylacombe/dac_44khz\",\n", " \"architectures\": [\n", " \"DacModel\"\n", " ],\n", " \"codebook_dim\": 8,\n", " \"codebook_loss_weight\": 1.0,\n", " \"codebook_size\": 1024,\n", " \"commitment_loss_weight\": 0.25,\n", " \"decoder_hidden_size\": 1536,\n", " \"downsampling_ratios\": [\n", " 2,\n", " 4,\n", " 8,\n", " 8\n", " ],\n", " \"encoder_hidden_size\": 64,\n", " \"hidden_size\": 1024,\n", " \"hop_length\": 512,\n", " \"model_type\": \"dac\",\n", " \"n_codebooks\": 9,\n", " \"quantizer_dropout\": 0.0,\n", " \"sampling_rate\": 44100,\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.46.1\",\n", " \"upsampling_ratios\": [\n", " 8,\n", " 8,\n", " 4,\n", " 2\n", " ]\n", "}\n", "\n", "WARNING:parler_tts.modeling_parler_tts:Config of the decoder: is overwritten by shared decoder config: ParlerTTSDecoderConfig {\n", " \"_name_or_path\": \"/fsx/yoach/tmp/artefacts/parler-tts-mini-v2-empty/decoder\",\n", " \"activation_dropout\": 0.0,\n", " \"activation_function\": \"gelu\",\n", " \"add_cross_attention\": true,\n", " \"architectures\": [\n", " \"ParlerTTSForCausalLM\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 1025,\n", " \"codebook_weights\": null,\n", " \"cross_attention_implementation_strategy\": null,\n", " \"delay_strategy\": \"delay\",\n", " \"dropout\": 0.1,\n", " \"eos_token_id\": 1024,\n", " \"ffn_dim\": 4096,\n", " \"hidden_size\": 1024,\n", " \"initializer_factor\": 0.02,\n", " \"is_decoder\": true,\n", " \"layerdrop\": 0.0,\n", " \"max_position_embeddings\": 4096,\n", " \"model_type\": \"parler_tts_decoder\",\n", " \"num_attention_heads\": 16,\n", " \"num_codebooks\": 9,\n", " \"num_cross_attention_key_value_heads\": 16,\n", " \"num_hidden_layers\": 24,\n", " \"num_key_value_heads\": 16,\n", " \"pad_token_id\": 1024,\n", " \"rope_embeddings\": false,\n", " \"rope_theta\": 10000.0,\n", " \"scale_embedding\": false,\n", " \"tie_word_embeddings\": false,\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.46.1\",\n", " \"use_cache\": true,\n", " \"use_fused_lm_heads\": true,\n", " \"vocab_size\": 1088\n", "}\n", "\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "generation_config.json: 0%| | 0.00/223 [00:00" ], "text/html": [ "
" ] }, "metadata": {} } ] } ] }