diff --git "a/test_notebook.ipynb" "b/test_notebook.ipynb" new file mode 100644--- /dev/null +++ "b/test_notebook.ipynb" @@ -0,0 +1,3617 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "1dc105e0a29149ba8b646f0e46bba8db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_afd89e11dd7546d4949321da8a5e27ff", + "IPY_MODEL_4cf46922d8cb4faf82275b6a723fa78c", + "IPY_MODEL_f50fc1856ff94bb28ab1b47050503e9c" + ], + "layout": "IPY_MODEL_0844c41065fb4d82a886cf020e7c6b58" + } + }, + "afd89e11dd7546d4949321da8a5e27ff": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9e1336e21abe47c28a08407d7fc04cae", + "placeholder": "​", + "style": "IPY_MODEL_9ea0bf46bac04c82b97a795a6bba829f", + "value": "config.json: 100%" + } + }, + "4cf46922d8cb4faf82275b6a723fa78c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_72138e774ff44ad6a9c5c1cfa0b54113", + "max": 7338, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_151749c129f84de8a35ea6a04aed7f26", + "value": 7338 + } + }, + "f50fc1856ff94bb28ab1b47050503e9c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_db25ab7af40c4475acc5b815e837404e", + "placeholder": "​", + "style": "IPY_MODEL_000e4c425a834befb256ac2811ddd944", + "value": " 7.34k/7.34k [00:00<00:00, 394kB/s]" + } + }, + "0844c41065fb4d82a886cf020e7c6b58": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9e1336e21abe47c28a08407d7fc04cae": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9ea0bf46bac04c82b97a795a6bba829f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "72138e774ff44ad6a9c5c1cfa0b54113": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "151749c129f84de8a35ea6a04aed7f26": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "db25ab7af40c4475acc5b815e837404e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "000e4c425a834befb256ac2811ddd944": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "084508f9d92f4f6d9240013fdc8997ca": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c3e31f57c1b54affba9263a51a538609", + "IPY_MODEL_c3df42f2e57b47c08e182297a5e92bfd", + "IPY_MODEL_85669881366d44a7b355887ebbef3c23" + ], + "layout": "IPY_MODEL_f37cf46fdb154251ab601551be42d4a8" + } + }, + "c3e31f57c1b54affba9263a51a538609": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a4958a323880496f84c0cda36ed0fafd", + "placeholder": "​", + "style": "IPY_MODEL_2795d1dfe834418c9335a65284129a4f", + "value": "model.safetensors: 100%" + } + }, + "c3df42f2e57b47c08e182297a5e92bfd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e345836890264e929b6a465d7bf20b78", + "max": 3751321772, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_0d82a674efaa4a7a94de1c3308fe2d56", + "value": 3751321772 + } + }, + "85669881366d44a7b355887ebbef3c23": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_907e757aec2d48fcb61bd277fcd4fb71", + "placeholder": "​", + "style": "IPY_MODEL_57e0019906214ffb952ab6ffdefda922", + "value": " 3.75G/3.75G [00:29<00:00, 180MB/s]" + } + }, + "f37cf46fdb154251ab601551be42d4a8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a4958a323880496f84c0cda36ed0fafd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2795d1dfe834418c9335a65284129a4f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e345836890264e929b6a465d7bf20b78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0d82a674efaa4a7a94de1c3308fe2d56": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "907e757aec2d48fcb61bd277fcd4fb71": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57e0019906214ffb952ab6ffdefda922": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ff0e9084f6ea47d9aa48a7cd7277f151": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f3ea3ae08746479fb0cf4d180b175302", + "IPY_MODEL_f804716c88cc47238790c01a30ba73df", + "IPY_MODEL_c6ea52dffc5b4b4e8471830d0f6fd69a" + ], + "layout": "IPY_MODEL_f61266500648479388516acda6f54d39" + } + }, + "f3ea3ae08746479fb0cf4d180b175302": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bdf3fc82e04844f0aa4465d70c667d25", + "placeholder": "​", + "style": "IPY_MODEL_e1c9a766de204d379a9c02a5bd6d5e10", + "value": "generation_config.json: 100%" + } + }, + "f804716c88cc47238790c01a30ba73df": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_87bdf4a01aaa4983bd906a9c2b8be9db", + "max": 223, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_827fcc11fc8f4a1287ea417068c338b5", + "value": 223 + } + }, + "c6ea52dffc5b4b4e8471830d0f6fd69a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0b83eb781952498db57f06cbdd356d85", + "placeholder": "​", + "style": "IPY_MODEL_08979837c80940c0a168e6e2f1ec8ccc", + "value": " 223/223 [00:00<00:00, 17.0kB/s]" + } + }, + "f61266500648479388516acda6f54d39": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "bdf3fc82e04844f0aa4465d70c667d25": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e1c9a766de204d379a9c02a5bd6d5e10": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "87bdf4a01aaa4983bd906a9c2b8be9db": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "827fcc11fc8f4a1287ea417068c338b5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0b83eb781952498db57f06cbdd356d85": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "08979837c80940c0a168e6e2f1ec8ccc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7d7f88b27323461da0a20f2c98632cf1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c851f10d02eb4516a9f45faaf1b72ad0", + "IPY_MODEL_7fc666136dce4732941f37e5ad4bb667", + "IPY_MODEL_dc48d1e2d5404b0db407abd817855ee7" + ], + "layout": "IPY_MODEL_389f8686300249e1b7d2057e4b803e76" + } + }, + "c851f10d02eb4516a9f45faaf1b72ad0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_7396e06208ad4bd1aeb471497856d184", + "placeholder": "​", + "style": "IPY_MODEL_942d523bbc14450eb4f26b096cb31dbe", + "value": "tokenizer_config.json: 100%" + } + }, + "7fc666136dce4732941f37e5ad4bb667": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0e78c9d3d8f943b596a1769073153877", + "max": 990, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f0c9de96a49e49339acd5295a27949e3", + "value": 990 + } + }, + "dc48d1e2d5404b0db407abd817855ee7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_48fbad129de24aa7bbd24956ed093e7d", + "placeholder": "​", + "style": "IPY_MODEL_120bf0b214a441d9beddf5cc2d29672c", + "value": " 990/990 [00:00<00:00, 77.0kB/s]" + } + }, + "389f8686300249e1b7d2057e4b803e76": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "7396e06208ad4bd1aeb471497856d184": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "942d523bbc14450eb4f26b096cb31dbe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0e78c9d3d8f943b596a1769073153877": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f0c9de96a49e49339acd5295a27949e3": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "48fbad129de24aa7bbd24956ed093e7d": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "120bf0b214a441d9beddf5cc2d29672c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ddc0650fe60d47ebb0534bb08f6d35dc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_55cd8af6e6ba4e9f9fa5d8d7526f13ff", + "IPY_MODEL_789a26caac5c4ddaae5a172a12d824fe", + "IPY_MODEL_79aa723e35e745ecb9fbd57a0946e24c" + ], + "layout": "IPY_MODEL_89958b4c86024acd9b0141078a05d264" + } + }, + "55cd8af6e6ba4e9f9fa5d8d7526f13ff": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_737a64890f99408fa5daa51934df792e", + "placeholder": "​", + "style": "IPY_MODEL_3dc95c4ce88640c8a72440a17d75950b", + "value": "tokenizer.model: 100%" + } + }, + "789a26caac5c4ddaae5a172a12d824fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b7e00e0de7ad46ae8228b95f8de3eeab", + "max": 1795391, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_950ce1075335428d9a419c7ad662a72a", + "value": 1795391 + } + }, + "79aa723e35e745ecb9fbd57a0946e24c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8e0eda96a9144ac995117007aafb7973", + "placeholder": "​", + "style": "IPY_MODEL_a53bffe501c344bfb888c269e6105311", + "value": " 1.80M/1.80M [00:00<00:00, 72.8MB/s]" + } + }, + "89958b4c86024acd9b0141078a05d264": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "737a64890f99408fa5daa51934df792e": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3dc95c4ce88640c8a72440a17d75950b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b7e00e0de7ad46ae8228b95f8de3eeab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "950ce1075335428d9a419c7ad662a72a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8e0eda96a9144ac995117007aafb7973": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a53bffe501c344bfb888c269e6105311": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e0d722e8eefb47bfa733d04acb7dfe26": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0ec3130f76354e1d88e9c6cf1684dfef", + "IPY_MODEL_be62cee63ea24543926adc3ef1931964", + "IPY_MODEL_00ffc95bbd7a41daace497357350b9f6" + ], + "layout": "IPY_MODEL_dcf8deffa17c4949a0c520d4a2e88ba7" + } + }, + "0ec3130f76354e1d88e9c6cf1684dfef": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_1f0ca07f78994acfaf73a21ac6024ad3", + "placeholder": "​", + "style": "IPY_MODEL_70e05ab4620040f39595d7f5f01c942c", + "value": "tokenizer.json: 100%" + } + }, + "be62cee63ea24543926adc3ef1931964": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5eb3464a5da64ee0afaf937ad9fede13", + "max": 10272460, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_ab0600de0bc744c2961951c81f8f3464", + "value": 10272460 + } + }, + "00ffc95bbd7a41daace497357350b9f6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_811541ee9b314868aea0492c07e24686", + "placeholder": "​", + "style": "IPY_MODEL_f5f75b0f29bd40a3bc7a2f577847563f", + "value": " 10.3M/10.3M [00:01<00:00, 9.57MB/s]" + } + }, + "dcf8deffa17c4949a0c520d4a2e88ba7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1f0ca07f78994acfaf73a21ac6024ad3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "70e05ab4620040f39595d7f5f01c942c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5eb3464a5da64ee0afaf937ad9fede13": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ab0600de0bc744c2961951c81f8f3464": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "811541ee9b314868aea0492c07e24686": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f5f75b0f29bd40a3bc7a2f577847563f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9ce18a4480e54ade87cab05118ff7b6c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8a2a6a206b494ad3894905ee1ef36b3c", + "IPY_MODEL_288d3a7309fe42ae9142c1b41af6211e", + "IPY_MODEL_3224681a0e154e65905f67064caa9711" + ], + "layout": "IPY_MODEL_994961ff43444351ba3dd79f394ec752" + } + }, + "8a2a6a206b494ad3894905ee1ef36b3c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_19cbd35e15674da490dbf595931e74c8", + "placeholder": "​", + "style": "IPY_MODEL_78598a2a81c94a7eab378539f6219d82", + "value": "special_tokens_map.json: 100%" + } + }, + "288d3a7309fe42ae9142c1b41af6211e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0ecfbeae600e4495be8480be32af73db", + "max": 552, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f891346ff14b43179c5cc0fedbe3bcc6", + "value": 552 + } + }, + "3224681a0e154e65905f67064caa9711": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6af2379bb3284ecf927aa0ff0c953826", + "placeholder": "​", + "style": "IPY_MODEL_a47f614043b84baab2763b1bde12d7bf", + "value": " 552/552 [00:00<00:00, 59.0kB/s]" + } + }, + "994961ff43444351ba3dd79f394ec752": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "19cbd35e15674da490dbf595931e74c8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "78598a2a81c94a7eab378539f6219d82": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0ecfbeae600e4495be8480be32af73db": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f891346ff14b43179c5cc0fedbe3bcc6": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "6af2379bb3284ecf927aa0ff0c953826": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a47f614043b84baab2763b1bde12d7bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "cells": [ + { + "cell_type": "code", + "source": [ + "# @title 0. Installations\n", + "# Install necessary packages (run this cell once if needed)\n", + "\n", + "!pip install gradio torch torchvision torchaudio openai-whisper soundfile parler-tts transformers google-generativeai numpy librosa flash-attn" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "l7kKPw9qgBll", + "outputId": "cc6b0841-ef47-46b6-eaa1-7d0a9dd4a793" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting gradio\n", + " Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)\n", + "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.11/dist-packages (0.21.0+cu124)\n", + "Requirement already satisfied: torchaudio in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", + "Collecting openai-whisper\n", + " Downloading openai-whisper-20240930.tar.gz (800 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m800.5/800.5 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: soundfile in /usr/local/lib/python3.11/dist-packages (0.13.1)\n", + "Collecting parler-tts\n", + " Downloading parler_tts-0.2.3.tar.gz (80 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m80.2/80.2 kB\u001b[0m \u001b[31m7.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.3)\n", + "Requirement already satisfied: google-generativeai in /usr/local/lib/python3.11/dist-packages (0.8.5)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (2.0.2)\n", + "Requirement already satisfied: librosa in /usr/local/lib/python3.11/dist-packages (0.11.0)\n", + "Collecting flash-attn\n", + " Downloading flash_attn-2.7.4.post1.tar.gz (6.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m83.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting aiofiles<25.0,>=22.0 (from gradio)\n", + " Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: anyio<5.0,>=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.9.0)\n", + "Collecting fastapi<1.0,>=0.115.2 (from gradio)\n", + " Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)\n", + "Collecting ffmpy (from gradio)\n", + " Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting gradio-client==1.10.1 (from gradio)\n", + " Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)\n", + "Collecting groovy~=0.1 (from gradio)\n", + " Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: httpx>=0.24.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.28.1)\n", + "Requirement already satisfied: huggingface-hub>=0.28.1 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.31.2)\n", + "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.1.6)\n", + "Requirement already satisfied: markupsafe<4.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.0.2)\n", + "Requirement already satisfied: orjson~=3.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (3.10.18)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from gradio) (24.2)\n", + "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.2.2)\n", + "Requirement already satisfied: pillow<12.0,>=8.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (11.2.1)\n", + "Requirement already satisfied: pydantic<2.12,>=2.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (2.11.4)\n", + "Collecting pydub (from gradio)\n", + " Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Collecting python-multipart>=0.0.18 (from gradio)\n", + " Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: pyyaml<7.0,>=5.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (6.0.2)\n", + "Collecting ruff>=0.9.3 (from gradio)\n", + " Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)\n", + "Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)\n", + " Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting semantic-version~=2.0 (from gradio)\n", + " Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n", + "Collecting starlette<1.0,>=0.40.0 (from gradio)\n", + " Downloading starlette-0.46.2-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting tomlkit<0.14.0,>=0.12.0 (from gradio)\n", + " Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)\n", + "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.11/dist-packages (from gradio) (0.15.3)\n", + "Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.11/dist-packages (from gradio) (4.13.2)\n", + "Collecting uvicorn>=0.14.0 (from gradio)\n", + " Downloading uvicorn-0.34.2-py3-none-any.whl.metadata (6.5 kB)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (2025.3.2)\n", + "Requirement already satisfied: websockets<16.0,>=10.0 in /usr/local/lib/python3.11/dist-packages (from gradio-client==1.10.1->gradio) (15.0.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch) (3.18.0)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", + " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", + " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", + " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", + " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", + " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", + " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", + "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", + "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", + " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", + "Requirement already satisfied: numba in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (0.60.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (4.67.1)\n", + "Requirement already satisfied: more-itertools in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (10.7.0)\n", + "Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (from openai-whisper) (0.9.0)\n", + "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.11/dist-packages (from soundfile) (1.17.1)\n", + "Collecting transformers\n", + " Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.1/44.1 kB\u001b[0m \u001b[31m3.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sentencepiece in /usr/local/lib/python3.11/dist-packages (from parler-tts) (0.2.0)\n", + "Collecting descript-audio-codec-unofficial (from parler-tts)\n", + " Downloading descript_audio_codec_unofficial-1.0.0.tar.gz (24 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting descript-audiotools-unofficial (from parler-tts)\n", + " Downloading descript_audiotools_unofficial-0.7.4.tar.gz (100 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m100.7/100.7 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: protobuf>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from parler-tts) (5.29.4)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", + "Collecting tokenizers<0.21,>=0.20 (from transformers)\n", + " Downloading tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n", + "Requirement already satisfied: google-ai-generativelanguage==0.6.15 in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (0.6.15)\n", + "Requirement already satisfied: google-api-core in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.24.2)\n", + "Requirement already satisfied: google-api-python-client in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.169.0)\n", + "Requirement already satisfied: google-auth>=2.15.0 in /usr/local/lib/python3.11/dist-packages (from google-generativeai) (2.38.0)\n", + "Requirement already satisfied: proto-plus<2.0.0dev,>=1.22.3 in /usr/local/lib/python3.11/dist-packages (from google-ai-generativelanguage==0.6.15->google-generativeai) (1.26.1)\n", + "Requirement already satisfied: audioread>=2.1.9 in /usr/local/lib/python3.11/dist-packages (from librosa) (3.0.1)\n", + "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.15.3)\n", + "Requirement already satisfied: scikit-learn>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.6.1)\n", + "Requirement already satisfied: joblib>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.5.0)\n", + "Requirement already satisfied: decorator>=4.3.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (4.4.2)\n", + "Requirement already satisfied: pooch>=1.1 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.8.2)\n", + "Requirement already satisfied: soxr>=0.3.2 in /usr/local/lib/python3.11/dist-packages (from librosa) (0.5.0.post1)\n", + "Requirement already satisfied: lazy_loader>=0.1 in /usr/local/lib/python3.11/dist-packages (from librosa) (0.4)\n", + "Requirement already satisfied: msgpack>=1.0 in /usr/local/lib/python3.11/dist-packages (from librosa) (1.1.0)\n", + "Requirement already satisfied: einops in /usr/local/lib/python3.11/dist-packages (from flash-attn) (0.8.1)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<5.0,>=3.0->gradio) (1.3.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.0->soundfile) (2.22)\n", + "Requirement already satisfied: googleapis-common-protos<2.0.0,>=1.56.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core->google-generativeai) (1.70.0)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (5.5.2)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (0.4.2)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.11/dist-packages (from google-auth>=2.15.0->google-generativeai) (4.9.1)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (2025.4.26)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.24.1->gradio) (1.0.9)\n", + "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.16.0)\n", + "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.11/dist-packages (from numba->openai-whisper) (0.43.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0,>=1.0->gradio) (2025.2)\n", + "Requirement already satisfied: platformdirs>=2.5.0 in /usr/local/lib/python3.11/dist-packages (from pooch>=1.1->librosa) (4.3.8)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (2.33.2)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<2.12,>=2.0->gradio) (0.4.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn>=1.1.0->librosa) (3.6.0)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (8.2.0)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/dist-packages (from typer<1.0,>=0.12->gradio) (13.9.4)\n", + "Collecting argbind>=0.3.7 (from descript-audio-codec-unofficial->parler-tts)\n", + " Downloading argbind-0.3.9.tar.gz (17 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting pyloudnorm (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Requirement already satisfied: importlib-resources in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (6.5.2)\n", + "Collecting julius (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading julius-0.2.7.tar.gz (59 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.6/59.6 kB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: ipython in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (7.34.0)\n", + "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (3.10.0)\n", + "Collecting pystoi (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading pystoi-0.4.1-py2.py3-none-any.whl.metadata (4.0 kB)\n", + "Collecting torch_stoi (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading torch_stoi-0.2.3-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting flatten-dict (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading flatten_dict-0.4.2-py2.py3-none-any.whl.metadata (9.2 kB)\n", + "Collecting markdown2 (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading markdown2-2.5.3-py3-none-any.whl.metadata (2.1 kB)\n", + "Collecting randomname (from descript-audiotools-unofficial->parler-tts)\n", + " Downloading randomname-0.2.1.tar.gz (64 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m64.2/64.2 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting protobuf>=4.0.0 (from parler-tts)\n", + " Downloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)\n", + "Requirement already satisfied: tensorboard in /usr/local/lib/python3.11/dist-packages (from descript-audiotools-unofficial->parler-tts) (2.18.0)\n", + "Requirement already satisfied: httplib2<1.0.0,>=0.19.0 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (0.22.0)\n", + "Requirement already satisfied: google-auth-httplib2<1.0.0,>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (0.2.0)\n", + "Requirement already satisfied: uritemplate<5,>=3.0.1 in /usr/local/lib/python3.11/dist-packages (from google-api-python-client->google-generativeai) (4.1.1)\n", + "Requirement already satisfied: docstring-parser in /usr/local/lib/python3.11/dist-packages (from argbind>=0.3.7->descript-audio-codec-unofficial->parler-tts) (0.16)\n", + "Requirement already satisfied: grpcio<2.0dev,>=1.33.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai) (1.71.0)\n", + "Requirement already satisfied: grpcio-status<2.0.dev0,>=1.33.2 in /usr/local/lib/python3.11/dist-packages (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai) (1.71.0)\n", + "Requirement already satisfied: pyparsing!=3.0.0,!=3.0.1,!=3.0.2,!=3.0.3,<4,>=2.4.2 in /usr/local/lib/python3.11/dist-packages (from httplib2<1.0.0,>=0.19.0->google-api-python-client->google-generativeai) (3.2.3)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.6.1 in /usr/local/lib/python3.11/dist-packages (from pyasn1-modules>=0.2.1->google-auth>=2.15.0->google-generativeai) (0.6.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0,>=1.0->gradio) (1.17.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.19.1)\n", + "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (75.2.0)\n", + "Collecting jedi>=0.16 (from ipython->descript-audiotools-unofficial->parler-tts)\n", + " Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.7.5)\n", + "Requirement already satisfied: traitlets>=4.2 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (5.7.1)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (3.0.51)\n", + "Requirement already satisfied: backcall in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.2.0)\n", + "Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (0.1.7)\n", + "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.11/dist-packages (from ipython->descript-audiotools-unofficial->parler-tts) (4.9.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (1.3.2)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (4.58.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->descript-audiotools-unofficial->parler-tts) (1.4.8)\n", + "Requirement already satisfied: future>=0.16.0 in /usr/local/lib/python3.11/dist-packages (from pyloudnorm->descript-audiotools-unofficial->parler-tts) (1.0.0)\n", + "Collecting fire (from randomname->descript-audiotools-unofficial->parler-tts)\n", + " Downloading fire-0.7.0.tar.gz (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.2/87.2 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (1.4.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (3.8)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (0.7.2)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from tensorboard->descript-audiotools-unofficial->parler-tts) (3.1.3)\n", + "INFO: pip is looking at multiple versions of grpcio-status to determine which version is compatible with other requirements. This could take a while.\n", + "Collecting grpcio-status<2.0.dev0,>=1.33.2 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.10.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,!=2.8.*,!=2.9.*,<3.0.0dev,>=1.34.1->google-ai-generativelanguage==0.6.15->google-generativeai)\n", + " Downloading grpcio_status-1.70.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.69.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.68.1-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.68.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.67.1-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.67.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.66.2-py3-none-any.whl.metadata (1.1 kB)\n", + "INFO: pip is still looking at multiple versions of grpcio-status to determine which version is compatible with other requirements. This could take a while.\n", + " Downloading grpcio_status-1.66.1-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.66.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.65.5-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.65.4-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.65.2-py3-none-any.whl.metadata (1.1 kB)\n", + "INFO: This is taking longer than usual. You might need to provide the dependency resolver with stricter constraints to reduce runtime. See https://pip.pypa.io/warnings/backtracking for guidance. If you want to abort this run, press Ctrl + C.\n", + " Downloading grpcio_status-1.65.1-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.64.3-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.64.1-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.64.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.63.2-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.63.0-py3-none-any.whl.metadata (1.1 kB)\n", + " Downloading grpcio_status-1.62.3-py3-none-any.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.11/dist-packages (from jedi>=0.16->ipython->descript-audiotools-unofficial->parler-tts) (0.8.4)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", + "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.11/dist-packages (from pexpect>4.3->ipython->descript-audiotools-unofficial->parler-tts) (0.7.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython->descript-audiotools-unofficial->parler-tts) (0.2.13)\n", + "Requirement already satisfied: termcolor in /usr/local/lib/python3.11/dist-packages (from fire->randomname->descript-audiotools-unofficial->parler-tts) (3.1.0)\n", + "Downloading gradio-5.30.0-py3-none-any.whl (54.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 MB\u001b[0m \u001b[31m21.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gradio_client-1.10.1-py3-none-any.whl (323 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.1/323.1 kB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m1.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m68.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m92.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m58.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading transformers-4.46.1-py3-none-any.whl (10.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.0/10.0 MB\u001b[0m \u001b[31m132.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiofiles-24.1.0-py3-none-any.whl (15 kB)\n", + "Downloading fastapi-0.115.12-py3-none-any.whl (95 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.2/95.2 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading groovy-0.1.2-py3-none-any.whl (14 kB)\n", + "Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)\n", + "Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m100.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading safehttpx-0.1.6-py3-none-any.whl (8.7 kB)\n", + "Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", + "Downloading starlette-0.46.2-py3-none-any.whl (72 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.0/72.0 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tokenizers-0.20.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.0/3.0 MB\u001b[0m \u001b[31m83.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tomlkit-0.13.2-py3-none-any.whl (37 kB)\n", + "Downloading uvicorn-0.34.2-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.5/62.5 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-4.25.7-cp37-abi3-manylinux2014_x86_64.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━��━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.6/294.6 kB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading ffmpy-0.5.0-py3-none-any.whl (6.0 kB)\n", + "Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", + "Downloading flatten_dict-0.4.2-py2.py3-none-any.whl (9.7 kB)\n", + "Downloading markdown2-2.5.3-py3-none-any.whl (48 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.5/48.5 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)\n", + "Downloading pystoi-0.4.1-py2.py3-none-any.whl (8.2 kB)\n", + "Downloading torch_stoi-0.2.3-py3-none-any.whl (8.1 kB)\n", + "Downloading grpcio_status-1.62.3-py3-none-any.whl (14 kB)\n", + "Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: openai-whisper, parler-tts, flash-attn, descript-audio-codec-unofficial, descript-audiotools-unofficial, argbind, julius, randomname, fire\n", + " Building wheel for openai-whisper (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for openai-whisper: filename=openai_whisper-20240930-py3-none-any.whl size=803404 sha256=cd5f700c94815efdc68bd77072a7f440edf76ce136ec56fd066eb5e0bc6fa119\n", + " Stored in directory: /root/.cache/pip/wheels/2f/f2/ce/6eb23db4091d026238ce76703bd66da60b969d70bcc81d5d3a\n", + " Building wheel for parler-tts (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for parler-tts: filename=parler_tts-0.2.3-py3-none-any.whl size=81609 sha256=be391f71bef44fa8593fa11a7f0c57454726ed568307e20f4b3030313cfcb0cd\n", + " Stored in directory: /root/.cache/pip/wheels/bb/94/4a/89d2bbf31af3caa0b79ef47a0823224b7e2ddae2febc10920b\n", + " Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for flash-attn: filename=flash_attn-2.7.4.post1-cp311-cp311-linux_x86_64.whl size=187831595 sha256=58853b28a5a926cae14402bfd8d4d93a45ebf8f9e79533f37ab09d0d77a99c05\n", + " Stored in directory: /root/.cache/pip/wheels/3d/88/d8/284b89f56af7d5bf366b10d6b8e251ac8a7c7bf3f04203fb4f\n", + " Building wheel for descript-audio-codec-unofficial (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for descript-audio-codec-unofficial: filename=descript_audio_codec_unofficial-1.0.0-py3-none-any.whl size=27053 sha256=b1fb6e45123b5c5c473b08ecc87d859b8aa3bf8435ea81448ee418545db3c3c1\n", + " Stored in directory: /root/.cache/pip/wheels/c4/18/5f/17a643fe763770d2451bb1ee893c188fe5680288c28c238fcf\n", + " Building wheel for descript-audiotools-unofficial (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for descript-audiotools-unofficial: filename=descript_audiotools_unofficial-0.7.4-py2.py3-none-any.whl size=108048 sha256=ca8df6790be6b8ff67089d00e1153baef98cb15c575330f7ad8986ea5ba6ec16\n", + " Stored in directory: /root/.cache/pip/wheels/e6/ed/8f/fec8ded5f11f4b4a0bfd716b7516b7a09a9aacaf5d19acb3da\n", + " Building wheel for argbind (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for argbind: filename=argbind-0.3.9-py2.py3-none-any.whl size=11730 sha256=3daaedabc8350dfc3081472a99e21ee172756a00bd24d5c3bbe3ea028febb74b\n", + " Stored in directory: /root/.cache/pip/wheels/36/3a/34/e858fa3cf5f8c33a040734efcc17e95cb5cfd99c256a7fcecf\n", + " Building wheel for julius (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for julius: filename=julius-0.2.7-py3-none-any.whl size=21870 sha256=3d270315f111d7e918f02c638970614e44f36494c4bd87c8c9b2c13aa2333bca\n", + " Stored in directory: /root/.cache/pip/wheels/16/15/d4/edd724cefe78050a6ba3344b8b0c6672db829a799dbb9f81ff\n", + " Building wheel for randomname (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for randomname: filename=randomname-0.2.1-py3-none-any.whl size=89194 sha256=c1068a2a33e81ff31497cb97664c0c41eaeb63d922a3c04f4538bb4bb21e792c\n", + " Stored in directory: /root/.cache/pip/wheels/99/b3/ae/c137ed34d7c385b74ae440b4f008183264ebe466ea0341db09\n", + " Building wheel for fire (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=97d18f069a923dacda67ea757052748600a3fcc711d281d3767e92fc60b56dfd\n", + " Stored in directory: /root/.cache/pip/wheels/46/54/24/1624fd5b8674eb1188623f7e8e17cdf7c0f6c24b609dfb8a89\n", + "Successfully built openai-whisper parler-tts flash-attn descript-audio-codec-unofficial descript-audiotools-unofficial argbind julius randomname fire\n", + "Installing collected packages: pydub, uvicorn, tomlkit, semantic-version, ruff, python-multipart, protobuf, nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, markdown2, jedi, groovy, flatten-dict, fire, ffmpy, argbind, aiofiles, starlette, randomname, pystoi, pyloudnorm, nvidia-cusparse-cu12, nvidia-cudnn-cu12, tokenizers, safehttpx, nvidia-cusolver-cu12, grpcio-status, gradio-client, fastapi, transformers, gradio, openai-whisper, julius, flash-attn, torch_stoi, descript-audiotools-unofficial, descript-audio-codec-unofficial, parler-tts\n", + " Attempting uninstall: protobuf\n", + " Found existing installation: protobuf 5.29.4\n", + " Uninstalling protobuf-5.29.4:\n", + " Successfully uninstalled protobuf-5.29.4\n", + " Attempting uninstall: nvidia-nvjitlink-cu12\n", + " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", + " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", + " Attempting uninstall: nvidia-curand-cu12\n", + " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", + " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", + " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", + " Attempting uninstall: nvidia-cufft-cu12\n", + " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", + " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", + " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", + " Attempting uninstall: nvidia-cuda-runtime-cu12\n", + " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", + " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cuda-cupti-cu12\n", + " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", + " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", + " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", + " Attempting uninstall: nvidia-cublas-cu12\n", + " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", + " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", + " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", + " Attempting uninstall: nvidia-cusparse-cu12\n", + " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", + " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", + " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", + " Attempting uninstall: nvidia-cudnn-cu12\n", + " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", + " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", + " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", + " Attempting uninstall: tokenizers\n", + " Found existing installation: tokenizers 0.21.1\n", + " Uninstalling tokenizers-0.21.1:\n", + " Successfully uninstalled tokenizers-0.21.1\n", + " Attempting uninstall: nvidia-cusolver-cu12\n", + " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", + " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", + " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", + " Attempting uninstall: grpcio-status\n", + " Found existing installation: grpcio-status 1.71.0\n", + " Uninstalling grpcio-status-1.71.0:\n", + " Successfully uninstalled grpcio-status-1.71.0\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 4.51.3\n", + " Uninstalling transformers-4.51.3:\n", + " Successfully uninstalled transformers-4.51.3\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "ydf 0.11.0 requires protobuf<6.0.0,>=5.29.1, but you have protobuf 4.25.7 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed aiofiles-24.1.0 argbind-0.3.9 descript-audio-codec-unofficial-1.0.0 descript-audiotools-unofficial-0.7.4 fastapi-0.115.12 ffmpy-0.5.0 fire-0.7.0 flash-attn-2.7.4.post1 flatten-dict-0.4.2 gradio-5.30.0 gradio-client-1.10.1 groovy-0.1.2 grpcio-status-1.62.3 jedi-0.19.2 julius-0.2.7 markdown2-2.5.3 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127 openai-whisper-20240930 parler-tts-0.2.3 protobuf-4.25.7 pydub-0.25.1 pyloudnorm-0.1.1 pystoi-0.4.1 python-multipart-0.0.20 randomname-0.2.1 ruff-0.11.10 safehttpx-0.1.6 semantic-version-2.10.0 starlette-0.46.2 tokenizers-0.20.3 tomlkit-0.13.2 torch_stoi-0.2.3 transformers-4.46.1 uvicorn-0.34.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "Ld89BbIoap8b" + }, + "outputs": [], + "source": [ + "# @title 1. Setup and Imports\n", + "\n", + "import asyncio\n", + "import base64\n", + "import io\n", + "import logging\n", + "import os\n", + "from threading import Thread, Event\n", + "import time\n", + "import queue # For streamer's queue.Empty exception\n", + "\n", + "import soundfile as sf\n", + "import torch\n", + "import whisper\n", + "from parler_tts import ParlerTTSForConditionalGeneration, ParlerTTSStreamer\n", + "from transformers import AutoTokenizer, GenerationConfig as HFGeLE\n", + "import google.generativeai as genai\n", + "# from google.colab import userdata\n", + "# from flash_attn_triton import FlashAttention\n", + "\n", + "import gradio as gr\n", + "\n", + "DEVICE = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "WHISPER_MODEL_SIZE = \"tiny\"\n", + "TTS_MODEL_NAME = \"ai4bharat/indic-parler-tts\"\n", + "attention_implementation = \"sdpa\" # Also try with flash_attention_2\n", + "GEMINI_MODEL_NAME_NOTEBOOK = \"gemini-1.5-flash-latest\"\n", + "GOOGLE_API_KEY = \"AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8\"\n", + "torch_dtype_tts = torch.bfloat16 if DEVICE == \"cuda\" and torch.cuda.is_bf16_supported() else (torch.float16 if DEVICE == \"cuda\" else torch.float32)\n", + "torch_dtype_whisper = torch.float16 if DEVICE == \"cuda\" else torch.float32\n", + "# userdata.get('GOOGLE_API_KEY')\n", + "\n", + "TTS_STREAMING_PARAMS_NOTEBOOK = {\n", + " \"do_sample\": True,\n", + " \"temperature\": 1.0,\n", + " \"min_new_tokens\": 5,\n", + "}\n", + "\n", + "# --- Logging ---\n", + "logging.basicConfig(level=logging.INFO)\n", + "logger_nb = logging.getLogger(\"notebook_ai_pipeline\") # Use a specific logger for the notebook\n", + "logger_nb.setLevel(logging.INFO)\n", + "\n", + "\n", + "# --- Global Model Variables for Notebook ---\n", + "whisper_model_nb = None\n", + "gemini_model_instance_nb = None\n", + "tts_model_nb = None\n", + "tts_tokenizer_nb = None" + ] + }, + { + "cell_type": "code", + "source": [ + "# @title 2. Model Loading Functions\n", + "def load_all_resources_notebook():\n", + " global whisper_model_nb, tts_model_nb, tts_tokenizer_nb, gemini_model_instance_nb\n", + " logger_nb.info(f\"Notebook: Loading models. Whisper on {DEVICE} with {torch_dtype_whisper}, TTS on {DEVICE} with {torch_dtype_tts}\")\n", + "\n", + " if whisper_model_nb is None:\n", + " logger_nb.info(f\"Notebook: Loading Whisper model: {WHISPER_MODEL_SIZE}\")\n", + " whisper_model_nb = whisper.load_model(WHISPER_MODEL_SIZE, device=DEVICE)\n", + " logger_nb.info(\"Notebook: Whisper model loaded successfully.\")\n", + "\n", + " if tts_model_nb is None:\n", + " logger_nb.info(f\"Notebook: Loading IndicParler-TTS model: {TTS_MODEL_NAME}\")\n", + " tts_model_nb = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL_NAME).to(DEVICE, dtype=torch_dtype_tts)\n", + " tts_tokenizer_nb = AutoTokenizer.from_pretrained(TTS_MODEL_NAME)\n", + " if tts_tokenizer_nb and tts_tokenizer_nb.pad_token_id is not None:\n", + " TTS_STREAMING_PARAMS_NOTEBOOK[\"pad_token_id\"] = tts_tokenizer_nb.pad_token_id\n", + " logger_nb.info(f\"Notebook: IndicParler-TTS model loaded. Streaming params: {TTS_STREAMING_PARAMS_NOTEBOOK}\")\n", + "\n", + " if gemini_model_instance_nb is None:\n", + " if not GOOGLE_API_KEY or GOOGLE_API_KEY != \"AIzaSyD6x3Yoby4eQ6QL2kaaG_Rz3fG3rh7wPB8\":\n", + " logger_nb.warning(\"Notebook: GOOGLE_API_KEY not found or not replaced. LLM functionality will be limited.\")\n", + " else:\n", + " try:\n", + " genai.configure(api_key=GOOGLE_API_KEY)\n", + " gemini_model_instance_nb = genai.GenerativeModel(GEMINI_MODEL_NAME_NOTEBOOK)\n", + " logger_nb.info(f\"Notebook: Gemini API configured with model: {GEMINI_MODEL_NAME_NOTEBOOK}\")\n", + " except Exception as e:\n", + " logger_nb.error(f\"Notebook: Failed to configure Gemini API: {e}\", exc_info=True)\n", + " gemini_model_instance_nb = None\n", + " logger_nb.info(\"Notebook: All resources loaded (or attempted).\")" + ], + "metadata": { + "id": "T176tmyQ0DC8" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 3. Helper Functions for AI Pipeline\n", + "async def transcribe_audio_notebook(audio_input_tuple):\n", + " if not whisper_model_nb:\n", + " logger_nb.error(\"Notebook STT: Whisper model not loaded.\")\n", + " return \"Error: Whisper model not loaded.\"\n", + "\n", + " if audio_input_tuple is None:\n", + " logger_nb.warning(\"Notebook STT: No audio provided.\")\n", + " return \"No audio provided.\"\n", + "\n", + " sample_rate, audio_numpy = audio_input_tuple\n", + "\n", + " if audio_numpy is None or audio_numpy.size == 0:\n", + " logger_nb.warning(\"Notebook STT: Audio numpy array is empty.\")\n", + " return \"Empty audio received.\"\n", + "\n", + " # Ensure audio is mono float32, which is a common expectation\n", + " if audio_numpy.ndim > 1:\n", + " if audio_numpy.shape[0] == 2 and audio_numpy.ndim == 2:\n", + " audio_numpy = librosa.to_mono(audio_numpy)\n", + " elif audio_numpy.shape[1] == 2 and audio_numpy.ndim == 2:\n", + " audio_numpy = np.mean(audio_numpy, axis=1)\n", + "\n", + " if audio_numpy.dtype != np.float32:\n", + " if np.issubdtype(audio_numpy.dtype, np.integer):\n", + " audio_numpy = audio_numpy.astype(np.float32) / np.iinfo(audio_numpy.dtype).max\n", + " else:\n", + " audio_numpy = audio_numpy.astype(np.float32)\n", + "\n", + " # Whisper expects audio at its own sampling rate (usually 16kHz, but it handles resampling)\n", + " # However, providing it correctly can sometimes be better.\n", + " # For simplicity, we let Whisper handle resampling from the input sample_rate.\n", + "\n", + " try:\n", + " logger_nb.info(f\"Notebook STT: Transcribing audio of shape {audio_numpy.shape} with original sample rate {sample_rate}\")\n", + " # Whisper's transcribe method can take a numpy array directly\n", + " result = whisper_model_nb.transcribe(audio_numpy, sample_rate=sample_rate, fp16=(DEVICE == \"cuda\" and torch_dtype_whisper == torch.float16))\n", + " transcribed_text = result[\"text\"].strip()\n", + " logger_nb.info(f\"Notebook STT: Transcription: {transcribed_text}\")\n", + " return transcribed_text if transcribed_text else \"Transcription resulted in empty text.\"\n", + " except Exception as e:\n", + " logger_nb.error(f\"Notebook STT: Error during transcription: {e}\", exc_info=True)\n", + " return f\"Error during transcription: {str(e)}\"\n", + "\n", + "async def generate_gemini_response_notebook(text: str):\n", + " if not gemini_model_instance_nb:\n", + " logger_nb.error(\"Notebook LLM: Gemini model instance not available.\")\n", + " return \"Sorry, the language model is currently unavailable (Gemini not configured).\"\n", + " if not isinstance(text, str) or not text.strip() or text.startswith(\"Error:\") or \"No audio provided\" in text or \"Empty audio\" in text:\n", + " logger_nb.warning(f\"Notebook LLM: Invalid input for Gemini: '{text}'. Skipping.\")\n", + " return \"LLM (Gemini) skipped due to prior error or no input.\"\n", + " try:\n", + " full_prompt = f\"User: {text}\\nAssistant:\"\n", + " logger_nb.info(f\"Notebook LLM: Sending prompt to Gemini: \\\"{full_prompt[:100]}...\\\"\")\n", + "\n", + " # Running blocking IO in an executor for async compatibility in notebook if needed\n", + " loop = asyncio.get_event_loop()\n", + " response = await loop.run_in_executor(None, gemini_model_instance_nb.generate_content, full_prompt)\n", + "\n", + " response_text = \"I'm sorry, I couldn't generate a response for that (Gemini).\"\n", + " if hasattr(response, 'text') and response.text:\n", + " response_text = response.text.strip()\n", + " elif hasattr(response, 'parts') and response.parts:\n", + " response_text = \"\".join(part.text for part in response.parts).strip()\n", + " elif response.candidates and response.candidates[0].content.parts:\n", + " response_text = response.candidates[0].content.parts[0].text.strip()\n", + " else:\n", + " safety_feedback = \"\"\n", + " if hasattr(response, 'prompt_feedback') and response.prompt_feedback:\n", + " safety_feedback = f\" Safety Feedback: {response.prompt_feedback}\"\n", + " elif response.candidates and hasattr(response.candidates[0], 'finish_reason') and response.candidates[0].finish_reason != \"STOP\":\n", + " safety_feedback = f\" Finish Reason: {response.candidates[0].finish_reason}\"\n", + " logger_nb.warning(f\"Notebook LLM: Gemini response might be empty or blocked.{safety_feedback}\")\n", + " logger_nb.info(f\"Notebook LLM: Gemini Response: {response_text}\")\n", + " return response_text\n", + " except Exception as e:\n", + " logger_nb.error(f\"Notebook LLM: Error during Gemini generation: {e}\", exc_info=True)\n", + " return f\"Sorry, I encountered an error trying to respond with Gemini: {str(e)}\"\n", + "\n", + "async def synthesize_speech_streaming_notebook(text: str, description: str = \"A clear, female voice speaking in English.\", play_steps_in_s: float = 0.4):\n", + " if not tts_model_nb or not tts_tokenizer_nb:\n", + " logger_nb.error(\"Notebook TTS: Model or tokenizer not loaded.\")\n", + " yield None, None # Yield None for sample_rate, None for chunk\n", + " return\n", + "\n", + " if not isinstance(text, str) or not text.strip() or text.startswith(\"Error:\") or \"LLM skipped\" in text or \"unavailable\" in text:\n", + " logger_nb.warning(f\"Notebook TTS: Invalid input text for TTS: '{text}'. Yielding no audio.\")\n", + " yield None, None\n", + " return\n", + "\n", + " streamer = None\n", + " thread = None\n", + " # This event is to signal the main loop that the generation thread is done/exited\n", + " thread_done_event = Event()\n", + "\n", + " try:\n", + " logger_nb.info(f\"Notebook TTS Streamer: Starting for text: \\\"{text[:50]}...\\\"\")\n", + "\n", + " if hasattr(tts_model_nb.config, 'audio_encoder') and hasattr(tts_model_nb.config.audio_encoder, 'sampling_rate'):\n", + " sampling_rate = tts_model_nb.config.audio_encoder.sampling_rate\n", + " else:\n", + " logger_nb.warning(\"Notebook TTS Streamer: Could not find sampling_rate, defaulting to 24000\")\n", + " sampling_rate = 24000\n", + "\n", + " try:\n", + " frame_rate = getattr(tts_model_nb.config.audio_encoder, 'frame_rate', 100)\n", + " except AttributeError:\n", + " logger_nb.warning(\"Notebook TTS Streamer: frame_rate not found, using default of 100 Hz.\")\n", + " frame_rate = 100\n", + "\n", + " play_steps = int(frame_rate * play_steps_in_s)\n", + " if play_steps == 0 : play_steps = 1\n", + "\n", + " logger_nb.info(f\"Notebook TTS Streamer: params: sampling_rate={sampling_rate}, frame_rate={frame_rate}, play_steps={play_steps}\")\n", + "\n", + " streamer = ParlerTTSStreamer(tts_model_nb, device=DEVICE, play_steps=play_steps)\n", + " description_inputs = tts_tokenizer_nb(description, return_tensors=\"pt\")\n", + " prompt_inputs = tts_tokenizer_nb(text, return_tensors=\"pt\")\n", + "\n", + " current_streaming_params = TTS_STREAMING_PARAMS_NOTEBOOK.copy()\n", + " if tts_tokenizer_nb.pad_token_id is not None: # Ensure pad_token_id is set if model expects it\n", + " current_streaming_params[\"pad_token_id\"] = tts_tokenizer_nb.pad_token_id\n", + "\n", + " thread_generation_kwargs = {\n", + " \"input_ids\": description_inputs.input_ids.to(DEVICE),\n", + " \"prompt_input_ids\": prompt_inputs.input_ids.to(DEVICE),\n", + " \"attention_mask\": description_inputs.attention_mask.to(DEVICE) if hasattr(description_inputs, 'attention_mask') else None,\n", + " \"streamer\": streamer,\n", + " **current_streaming_params\n", + " }\n", + " if thread_generation_kwargs[\"attention_mask\"] is None:\n", + " del thread_generation_kwargs[\"attention_mask\"]\n", + "\n", + " def _generate_in_thread_notebook():\n", + " try:\n", + " logger_nb.info(f\"Notebook TTS generation thread: Started for text \\\"{text[:30]}...\\\"\")\n", + " with torch.no_grad():\n", + " tts_model_nb.generate(**thread_generation_kwargs)\n", + " logger_nb.info(f\"Notebook TTS generation thread: Finished model.generate() for text \\\"{text[:30]}...\\\"\")\n", + " except Exception as e_thread:\n", + " logger_nb.error(f\"Notebook TTS generation thread: Error: {e_thread}\", exc_info=True)\n", + " finally:\n", + " if streamer: streamer.end()\n", + " logger_nb.info(f\"Notebook TTS generation thread: Called streamer.end() for text \\\"{text[:30]}...\\\"\")\n", + " thread_done_event.set()\n", + "\n", + " thread = Thread(target=_generate_in_thread_notebook)\n", + " thread.daemon = True\n", + " thread.start()\n", + "\n", + " loop = asyncio.get_event_loop() # Get current loop for notebook\n", + " streamer_iter_count = 0\n", + "\n", + " # Yield sample rate once at the beginning\n", + " yield sampling_rate, None # Signal sample rate, no audio chunk yet\n", + "\n", + " while not thread_done_event.is_set():\n", + " audio_chunk_tensor = None\n", + " try:\n", + " logger_nb.debug(f\"Notebook TTS Streamer: Attempting to get chunk {streamer_iter_count}...\")\n", + " # Use run_in_executor for the blocking queue get\n", + " audio_chunk_tensor = await loop.run_in_executor(None, lambda: streamer.audio_queue.get(timeout=0.1)) # Shorter timeout\n", + "\n", + " if audio_chunk_tensor is None:\n", + " logger_nb.info(\"Notebook TTS Streamer: Yielded None, assuming end from producer.\")\n", + " # thread_done_event might not be set yet if this None is the final sentinel\n", + " if not streamer.is_active: # Check if streamer itself thinks it's done\n", + " break\n", + " continue # If streamer is active but yielded None, it's unusual, maybe wait\n", + "\n", + " if not isinstance(audio_chunk_tensor, torch.Tensor) or audio_chunk_tensor.numel() == 0:\n", + " logger_nb.debug(\"Notebook TTS Streamer: Yielded empty or non-tensor chunk.\")\n", + " await asyncio.sleep(0.01) # Brief sleep\n", + " continue\n", + "\n", + " audio_chunk_np = audio_chunk_tensor.cpu().to(torch.float32).numpy().squeeze()\n", + " if audio_chunk_np.size == 0:\n", + " continue\n", + "\n", + " # For Gradio, we yield the numpy array directly with the sample rate\n", + " yield None, audio_chunk_np # No sample rate on subsequent chunks, only audio\n", + " streamer_iter_count += 1\n", + "\n", + " except queue.Empty: # Timeout from streamer.audio_queue.get\n", + " logger_nb.debug(\"Notebook TTS Streamer: Queue empty, checking thread status.\")\n", + " if not thread.is_alive() and not thread_done_event.is_set(): # Thread died before signaling\n", + " logger_nb.warning(\"Notebook TTS Streamer: Generation thread died unexpectedly. Ending stream.\")\n", + " thread_done_event.set()\n", + " continue # Loop again to check thread_done_event\n", + " except StopIteration: # Should not happen if streamer.end() is used correctly by thread\n", + " logger_nb.info(\"Notebook TTS Streamer: Finished (StopIteration).\")\n", + " thread_done_event.set()\n", + " break\n", + " except Exception as e_stream_iter:\n", + " logger_nb.error(f\"Notebook TTS Streamer: Error iterating streamer: {e_stream_iter}\", exc_info=True)\n", + " thread_done_event.set()\n", + " break\n", + "\n", + " logger_nb.info(f\"Notebook TTS Streamer: Finished iteration. Yielded {streamer_iter_count} chunks.\")\n", + "\n", + " except Exception as e:\n", + " logger_nb.error(f\"Notebook TTS Streamer: Error in main function: {e}\", exc_info=True)\n", + " yield None, None\n", + " finally:\n", + " logger_nb.info(f\"Notebook TTS Streamer: Exiting for text \\\"{text[:50]}...\\\".\")\n", + " if streamer: streamer.end()\n", + " if thread and thread.is_alive():\n", + " logger_nb.info(\"Notebook TTS Streamer: Waiting for thread in finally...\")\n", + " thread.join(timeout=2.0)\n", + " if thread.is_alive():\n", + " logger_nb.warning(\"Notebook TTS Streamer: Thread still alive after join timeout.\")\n", + " yield None, None # Signal end of stream" + ], + "metadata": { + "id": "DFYsuydk0Lxm" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 4. Gradio Interface Definition and Pipeline\n", + "\n", + "# Load models once when this cell is run (or re-run)\n", + "if whisper_model_nb is None or tts_model_nb is None or gemini_model_instance_nb is None: # Basic check to prevent re-loading if cell is run multiple times\n", + " load_all_resources_notebook()\n", + "\n", + "async def full_ai_pipeline_notebook(audio_input_microphone):\n", + " \"\"\"\n", + " Gradio function that processes audio input through STT, LLM (Gemini),\n", + " and yields streaming TTS audio chunks for Gradio's streaming audio output.\n", + " \"\"\"\n", + " logger_nb.info(\"Gradio Pipeline: Started.\")\n", + "\n", + " # 1. STT\n", + " if audio_input_microphone is None:\n", + " yield \"Please provide audio input.\", \"Recording not provided.\", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Empty audio\n", + " return\n", + "\n", + " transcribed_text = await transcribe_audio_notebook(audio_input_microphone)\n", + " logger_nb.info(f\"Gradio Pipeline: Transcription: {transcribed_text}\")\n", + " yield transcribed_text, \"Processing with LLM...\", (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI\n", + "\n", + " # 2. LLM (Gemini)\n", + " if transcribed_text.startswith(\"Error:\") or \"No audio provided\" in transcribed_text or \"Empty audio\" in transcribed_text:\n", + " llm_response_text = \"Cannot proceed with LLM due to STT error.\"\n", + " else:\n", + " llm_response_text = await generate_gemini_response_notebook(transcribed_text)\n", + " logger_nb.info(f\"Gradio Pipeline: LLM Response: {llm_response_text}\")\n", + " yield transcribed_text, llm_response_text, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32)) # Update UI\n", + "\n", + " # 3. TTS Streaming\n", + " if llm_response_text.startswith(\"Error:\") or \"LLM skipped\" in llm_response_text or \"unavailable\" in llm_response_text:\n", + " logger_nb.warning(\"Gradio Pipeline: Skipping TTS due to LLM error.\")\n", + " final_llm_text_with_tts_status = f\"{llm_response_text} (TTS Skipped)\"\n", + " yield transcribed_text, final_llm_text_with_tts_status, (TARGET_SAMPLE_RATE, np.array([0.0], dtype=np.float32))\n", + " return\n", + "\n", + " tts_description = \"A clear, female voice speaking in English.\"\n", + "\n", + " # For Gradio's streaming audio output, we yield (sample_rate, chunk_np_array)\n", + " # The first yield should be (sample_rate, None) to set the rate.\n", + " # Subsequent yields are (None, chunk_np_array).\n", + " # Final yield is (None, None) to signal end.\n", + "\n", + " first_chunk = True\n", + " async for sr, audio_chunk_np in synthesize_speech_streaming_notebook(llm_response_text, tts_description):\n", + " if sr is not None and first_chunk: # First yield with sample rate\n", + " logger_nb.info(f\"Gradio Pipeline: TTS Stream - Yielding sample rate {sr}\")\n", + " yield transcribed_text, llm_response_text, (sr, None)\n", + " first_chunk = False\n", + " elif audio_chunk_np is not None: # Subsequent audio chunks\n", + " logger_nb.debug(f\"Gradio Pipeline: TTS Stream - Yielding audio chunk of shape {audio_chunk_np.shape}\")\n", + " yield transcribed_text, llm_response_text, (None, audio_chunk_np)\n", + " elif sr is None and audio_chunk_np is None and not first_chunk: # End of stream signal from generator\n", + " logger_nb.info(\"Gradio Pipeline: TTS Stream - Signalling end of stream.\")\n", + " yield transcribed_text, llm_response_text, (None, None)\n", + " break\n", + "\n", + " logger_nb.info(\"Gradio Pipeline: Finished.\")\n", + "\n", + "\n", + "# Define Gradio Interface\n", + "# Ensure this cell is run after defining all functions and loading models.\n", + "with gr.Blocks(title=\"Notebook Conversational AI\") as demo_notebook:\n", + " gr.Markdown(\"# Conversational AI in Jupyter Notebook\")\n", + " gr.Markdown(\"Uses AI4Bharat IndicParler-TTS (Streaming), Gemini LLM, and Whisper STT.\")\n", + "\n", + " with gr.Row():\n", + " mic_input = gr.Audio(sources=[\"microphone\"], type=\"numpy\", label=\"Speak Here\", streaming=False)\n", + " # `streaming=True` for gr.Audio input is for continuous input,\n", + " # not directly related to output streaming here. We process after recording stops.\n", + "\n", + " submit_button = gr.Button(\"Process Speech\")\n", + "\n", + " with gr.Accordion(\"Conversation Log\", open=True):\n", + " stt_output = gr.Textbox(label=\"You Said (Transcription)\", lines=2, interactive=False)\n", + " llm_output = gr.Textbox(label=\"Assistant's Response (Text)\", lines=4, interactive=False)\n", + " # For streaming audio output in Gradio, the component itself handles accumulation\n", + " tts_audio_output = gr.Audio(label=\"Assistant's Speech (Streaming)\", streaming=True, autoplay=False)\n", + "\n", + " submit_button.click(\n", + " fn=full_ai_pipeline_notebook,\n", + " inputs=[mic_input],\n", + " outputs=[stt_output, llm_output, tts_audio_output]\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000, + "referenced_widgets": [ + "1dc105e0a29149ba8b646f0e46bba8db", + "afd89e11dd7546d4949321da8a5e27ff", + "4cf46922d8cb4faf82275b6a723fa78c", + "f50fc1856ff94bb28ab1b47050503e9c", + "0844c41065fb4d82a886cf020e7c6b58", + "9e1336e21abe47c28a08407d7fc04cae", + "9ea0bf46bac04c82b97a795a6bba829f", + "72138e774ff44ad6a9c5c1cfa0b54113", + "151749c129f84de8a35ea6a04aed7f26", + "db25ab7af40c4475acc5b815e837404e", + "000e4c425a834befb256ac2811ddd944", + "084508f9d92f4f6d9240013fdc8997ca", + "c3e31f57c1b54affba9263a51a538609", + "c3df42f2e57b47c08e182297a5e92bfd", + "85669881366d44a7b355887ebbef3c23", + "f37cf46fdb154251ab601551be42d4a8", + "a4958a323880496f84c0cda36ed0fafd", + "2795d1dfe834418c9335a65284129a4f", + "e345836890264e929b6a465d7bf20b78", + "0d82a674efaa4a7a94de1c3308fe2d56", + "907e757aec2d48fcb61bd277fcd4fb71", + "57e0019906214ffb952ab6ffdefda922", + "ff0e9084f6ea47d9aa48a7cd7277f151", + "f3ea3ae08746479fb0cf4d180b175302", + "f804716c88cc47238790c01a30ba73df", + "c6ea52dffc5b4b4e8471830d0f6fd69a", + "f61266500648479388516acda6f54d39", + "bdf3fc82e04844f0aa4465d70c667d25", + "e1c9a766de204d379a9c02a5bd6d5e10", + "87bdf4a01aaa4983bd906a9c2b8be9db", + "827fcc11fc8f4a1287ea417068c338b5", + "0b83eb781952498db57f06cbdd356d85", + "08979837c80940c0a168e6e2f1ec8ccc", + "7d7f88b27323461da0a20f2c98632cf1", + "c851f10d02eb4516a9f45faaf1b72ad0", + "7fc666136dce4732941f37e5ad4bb667", + "dc48d1e2d5404b0db407abd817855ee7", + "389f8686300249e1b7d2057e4b803e76", + "7396e06208ad4bd1aeb471497856d184", + "942d523bbc14450eb4f26b096cb31dbe", + "0e78c9d3d8f943b596a1769073153877", + "f0c9de96a49e49339acd5295a27949e3", + "48fbad129de24aa7bbd24956ed093e7d", + "120bf0b214a441d9beddf5cc2d29672c", + "ddc0650fe60d47ebb0534bb08f6d35dc", + "55cd8af6e6ba4e9f9fa5d8d7526f13ff", + "789a26caac5c4ddaae5a172a12d824fe", + "79aa723e35e745ecb9fbd57a0946e24c", + "89958b4c86024acd9b0141078a05d264", + "737a64890f99408fa5daa51934df792e", + "3dc95c4ce88640c8a72440a17d75950b", + "b7e00e0de7ad46ae8228b95f8de3eeab", + "950ce1075335428d9a419c7ad662a72a", + "8e0eda96a9144ac995117007aafb7973", + "a53bffe501c344bfb888c269e6105311", + "e0d722e8eefb47bfa733d04acb7dfe26", + "0ec3130f76354e1d88e9c6cf1684dfef", + "be62cee63ea24543926adc3ef1931964", + "00ffc95bbd7a41daace497357350b9f6", + "dcf8deffa17c4949a0c520d4a2e88ba7", + "1f0ca07f78994acfaf73a21ac6024ad3", + "70e05ab4620040f39595d7f5f01c942c", + "5eb3464a5da64ee0afaf937ad9fede13", + "ab0600de0bc744c2961951c81f8f3464", + "811541ee9b314868aea0492c07e24686", + "f5f75b0f29bd40a3bc7a2f577847563f", + "9ce18a4480e54ade87cab05118ff7b6c", + "8a2a6a206b494ad3894905ee1ef36b3c", + "288d3a7309fe42ae9142c1b41af6211e", + "3224681a0e154e65905f67064caa9711", + "994961ff43444351ba3dd79f394ec752", + "19cbd35e15674da490dbf595931e74c8", + "78598a2a81c94a7eab378539f6219d82", + "0ecfbeae600e4495be8480be32af73db", + "f891346ff14b43179c5cc0fedbe3bcc6", + "6af2379bb3284ecf927aa0ff0c953826", + "a47f614043b84baab2763b1bde12d7bf" + ] + }, + "id": "fkIzYB790HF7", + "outputId": "81578f07-edc2-4185-a7fb-dccea98d1227" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "INFO:notebook_ai_pipeline:Notebook: Loading models. Whisper on cuda with torch.float16, TTS on cuda with torch.bfloat16\n", + "INFO:notebook_ai_pipeline:Notebook: Loading Whisper model: tiny\n", + "100%|█████████████████████████████████████| 72.1M/72.1M [00:01<00:00, 50.2MiB/s]\n", + "INFO:notebook_ai_pipeline:Notebook: Whisper model loaded successfully.\n", + "INFO:notebook_ai_pipeline:Notebook: Loading IndicParler-TTS model: ai4bharat/indic-parler-tts\n", + "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "config.json: 0%| | 0.00/7.34k [00:00 is overwritten by shared text_encoder config: T5Config {\n", + " \"_name_or_path\": \"google/flan-t5-large\",\n", + " \"architectures\": [\n", + " \"T5ForConditionalGeneration\"\n", + " ],\n", + " \"classifier_dropout\": 0.0,\n", + " \"d_ff\": 2816,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 1024,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dense_act_fn\": \"gelu_new\",\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"gated-gelu\",\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"is_gated_act\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 24,\n", + " \"num_heads\": 16,\n", + " \"num_layers\": 24,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_max_distance\": 128,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"tie_word_embeddings\": false,\n", + " \"transformers_version\": \"4.46.1\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "WARNING:parler_tts.modeling_parler_tts:Config of the audio_encoder: is overwritten by shared audio_encoder config: DacConfig {\n", + " \"_name_or_path\": \"ylacombe/dac_44khz\",\n", + " \"architectures\": [\n", + " \"DacModel\"\n", + " ],\n", + " \"codebook_dim\": 8,\n", + " \"codebook_loss_weight\": 1.0,\n", + " \"codebook_size\": 1024,\n", + " \"commitment_loss_weight\": 0.25,\n", + " \"decoder_hidden_size\": 1536,\n", + " \"downsampling_ratios\": [\n", + " 2,\n", + " 4,\n", + " 8,\n", + " 8\n", + " ],\n", + " \"encoder_hidden_size\": 64,\n", + " \"hidden_size\": 1024,\n", + " \"hop_length\": 512,\n", + " \"model_type\": \"dac\",\n", + " \"n_codebooks\": 9,\n", + " \"quantizer_dropout\": 0.0,\n", + " \"sampling_rate\": 44100,\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.46.1\",\n", + " \"upsampling_ratios\": [\n", + " 8,\n", + " 8,\n", + " 4,\n", + " 2\n", + " ]\n", + "}\n", + "\n", + "WARNING:parler_tts.modeling_parler_tts:Config of the decoder: is overwritten by shared decoder config: ParlerTTSDecoderConfig {\n", + " \"_name_or_path\": \"/fsx/yoach/tmp/artefacts/parler-tts-mini-v2-empty/decoder\",\n", + " \"activation_dropout\": 0.0,\n", + " \"activation_function\": \"gelu\",\n", + " \"add_cross_attention\": true,\n", + " \"architectures\": [\n", + " \"ParlerTTSForCausalLM\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 1025,\n", + " \"codebook_weights\": null,\n", + " \"cross_attention_implementation_strategy\": null,\n", + " \"delay_strategy\": \"delay\",\n", + " \"dropout\": 0.1,\n", + " \"eos_token_id\": 1024,\n", + " \"ffn_dim\": 4096,\n", + " \"hidden_size\": 1024,\n", + " \"initializer_factor\": 0.02,\n", + " \"is_decoder\": true,\n", + " \"layerdrop\": 0.0,\n", + " \"max_position_embeddings\": 4096,\n", + " \"model_type\": \"parler_tts_decoder\",\n", + " \"num_attention_heads\": 16,\n", + " \"num_codebooks\": 9,\n", + " \"num_cross_attention_key_value_heads\": 16,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_key_value_heads\": 16,\n", + " \"pad_token_id\": 1024,\n", + " \"rope_embeddings\": false,\n", + " \"rope_theta\": 10000.0,\n", + " \"scale_embedding\": false,\n", + " \"tie_word_embeddings\": false,\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.46.1\",\n", + " \"use_cache\": true,\n", + " \"use_fused_lm_heads\": true,\n", + " \"vocab_size\": 1088\n", + "}\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "generation_config.json: 0%| | 0.00/223 [00:00" + ], + "text/html": [ + "
" + ] + }, + "metadata": {} + } + ] + } + ] +} \ No newline at end of file