diff --git "a/BanglaGemma9b_GGUF.ipynb" "b/BanglaGemma9b_GGUF.ipynb"
new file mode 100644--- /dev/null
+++ "b/BanglaGemma9b_GGUF.ipynb"
@@ -0,0 +1,7803 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "IqM-T1RTzY6C"
+ },
+ "source": [
+ "To run this, press \"*Runtime*\" and press \"*Run all*\" on a **free** Tesla T4 Google Colab instance!\n",
+ "
\n",
+ "

\n",
+ "

\n",
+ "

Join Discord if you need help + ⭐
Star us on Github ⭐\n",
+ "
\n",
+ "\n",
+ "To install Unsloth on your own computer, follow the installation instructions on our Github page [here](https://github.com/unslothai/unsloth?tab=readme-ov-file#-installation-instructions).\n",
+ "\n",
+ "You will learn how to do [data prep](#Data), how to [train](#Train), how to [run the model](#Inference), & [how to save it](#Save) (eg for Llama.cpp).\n",
+ "\n",
+ "**[NEW] Try 2x faster inference in a free Colab for Llama-3.1 8b Instruct [here](https://colab.research.google.com/drive/1T-YBVfnphoVc8E2E854qF3jdia2Ll2W2?usp=sharing)**\n",
+ "\n",
+ "**[NEW] Finetuning Mistral Small 22b fits in a 16GB GPU!**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "2eSvM9zX_2d3"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!pip install unsloth\n",
+ "# Also get the latest nightly Unsloth!\n",
+ "!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "r2v_X2fA0Df5"
+ },
+ "source": [
+ "* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc\n",
+ "* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.\n",
+ "* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.\n",
+ "* [**NEW**] We make Gemma-2 9b / 27b **2x faster**! See our [Gemma-2 9b notebook](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing)\n",
+ "* [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)\n",
+ "* [**NEW**] We make Mistral NeMo 12B 2x faster and fit in under 12GB of VRAM! [Mistral NeMo notebook](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 403,
+ "referenced_widgets": [
+ "51f1e28d282645a58c8b783f4f60cfc2",
+ "7ca5730f63b4420cab8b124a17aaeb27",
+ "64747677b60c489ebd3e769272533b3f",
+ "5c5c498bd046409c860372110e523c7c",
+ "ae087aee8e96402681d22d892d6cd476",
+ "f340840186b4458dbff47afe987f1f59",
+ "723b04a16f3a402589fdb9463834f3d1",
+ "e50f33072d6d454b98c6607f9e847401",
+ "3f97e7bd5fff420c80615d676e1648f0",
+ "e7f6f37bf5b6483788e50c112b7ef007",
+ "aa566decf6b4412caa2988fa623900ea",
+ "b90fe47ddb6240bd90ddd1705a9f3fc9",
+ "ca20885d9adc4815b5418073a7930f8f",
+ "95aa990d762d428d93cef2834fb86c8a",
+ "c2d228de02d14c6c8f780048b1ccc088",
+ "c56ce88b9fab4475af5fafbc7a845010",
+ "3e94165c4ab0471db8fb1fbd5b5bac0d",
+ "74c3cbb850e44a4c9eb8283080ba075e",
+ "20d356533da04942856986a33e7a99fb",
+ "3cdf1d5b878b41838f0ec2b4d877e97a",
+ "a2cde30a94d3462488bcb33693e3e274",
+ "f04ed92a356c489a9877f82b05bb330f",
+ "fe0cef5f02ca4e5e95b06356b8286fbe",
+ "6db31893e3f84043b5abc6a24bac8228",
+ "fd5eccb2370b40b58eea5c9f0d868e36",
+ "90762bb3d5fd4f4db67f3a8a11434689",
+ "004f3ec8f7a545c4bc54484dcb3022bb",
+ "a6500ce74ca54e0ca650851502b14644",
+ "de38fc3f3df348f29528d9acd6b9d981",
+ "a3c7c2459ad14e9c81d7422d7e83393f",
+ "fdd24808ed23442998104b5b28370aa6",
+ "2e6a26fb12084d5487525f5e78ab5ac8",
+ "8e082fef631b4eeab73c02e181f5690c",
+ "b57e30ad94fa4b739d32c9553f5aee29",
+ "4677d087bf6b40a3a4915ac7481f6e8d",
+ "90c9301c342846729db7e3c6dfe5b849",
+ "c7977b2008c2476596c5351012e710b6",
+ "dcb2b3f1102a44429e62828b99ed39ab",
+ "673da437f86a4371b7e3913a66de835a",
+ "358035cd9f6943aeadc4cba1964109a6",
+ "edb08520684f4f83a7094599ed55cb37",
+ "c2185ae3f8aa4f3488e0bd7257664e26",
+ "71f71606c101414bae187de7f145ea43",
+ "bf1ea3ec39db442d91f74fdcfd1c5ac3",
+ "62a6bc239405496ca1e451fbda8787f3",
+ "7af5367620b64131a6fef8c2864d0d28",
+ "d9f230d8474c40fc995c67c4f1eeb86a",
+ "596156e7bb7346c1808ed960997a5159",
+ "418ece30091b4d66aca4df6367e0bec5",
+ "b1610596162844658f4ac1893f0fdd40",
+ "87ccf938ee7641acb94c6050bb7c4b20",
+ "d1462aa795714430bfee51674a619527",
+ "cd98cb1f265448cf90adfd4fb3362b0d",
+ "780d3c81c8e2461694df4d515d381d9d",
+ "f5b0174aa23e432896d0dfe37387036b",
+ "a80caed5f8af41ca99576d9daa68c6f6",
+ "262070892253448793aba4d048f40c08",
+ "e5486d352f314f45b663a6472d6ff885",
+ "f033347d7cdb4f38a3eb3e05f546e438",
+ "d1549b76e8ff4d69b17f9a0831b43551",
+ "45d4f27475294750aff2487353c8105e",
+ "3926bab2dbad4e5fb4362ee96d6fdd67",
+ "e885fb98968949589006001c2f84a8eb",
+ "1fa73eafabb14c73aaee39354c62477f",
+ "2279b927aab74513aa1f6efb2c66c426",
+ "b44759c58b284a5a950350a2cf82c4e6"
+ ]
+ },
+ "id": "QmUBVEnvCDJv",
+ "outputId": "55acd488-9a43-4d68-8b55-0e3061ff247f"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
+ "==((====))== Unsloth 2024.10.7: Fast Gemma2 patching. Transformers = 4.44.2.\n",
+ " \\\\ /| GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.\n",
+ "O^O/ \\_/ \\ Pytorch: 2.5.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.\n",
+ "\\ / Bfloat16 = FALSE. FA [Xformers = 0.0.28.post2. FA2 = False]\n",
+ " \"-____-\" Free Apache license: http://github.com/unslothai/unsloth\n",
+ "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "51f1e28d282645a58c8b783f4f60cfc2",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "model.safetensors: 0%| | 0.00/6.13G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b90fe47ddb6240bd90ddd1705a9f3fc9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "generation_config.json: 0%| | 0.00/190 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "fe0cef5f02ca4e5e95b06356b8286fbe",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer_config.json: 0%| | 0.00/46.4k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b57e30ad94fa4b739d32c9553f5aee29",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.model: 0%| | 0.00/4.24M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "62a6bc239405496ca1e451fbda8787f3",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "special_tokens_map.json: 0%| | 0.00/636 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a80caed5f8af41ca99576d9daa68c6f6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "tokenizer.json: 0%| | 0.00/17.5M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: We fixed a gradient accumulation bug, but it seems like you don't have the latest transformers version!\n",
+ "Please update transformers, TRL and unsloth via:\n",
+ "`pip install --upgrade --no-cache-dir unsloth git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/trl.git`\n"
+ ]
+ }
+ ],
+ "source": [
+ "from unsloth import FastLanguageModel\n",
+ "import torch\n",
+ "max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!\n",
+ "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+ "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+ "\n",
+ "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+ " model_name = \"unsloth/gemma-2-9b\",\n",
+ " max_seq_length = max_seq_length,\n",
+ " dtype = dtype,\n",
+ " load_in_4bit = load_in_4bit,\n",
+ " token = \"hf_\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SXd9bTZd1aaL"
+ },
+ "source": [
+ "We now add LoRA adapters so we only need to update 1 to 10% of all parameters!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6bZsfBuZDeCL",
+ "outputId": "083cf8e8-fb6f-4209-b76e-d36ac8af7cae"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth 2024.10.7 patched 42 layers with 42 QKV layers, 42 O layers and 42 MLP layers.\n"
+ ]
+ }
+ ],
+ "source": [
+ "model = FastLanguageModel.get_peft_model(\n",
+ " model,\n",
+ " r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+ " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+ " \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+ " lora_alpha = 16,\n",
+ " lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+ " bias = \"none\", # Supports any, but = \"none\" is optimized\n",
+ " # [NEW] \"unsloth\" uses 30% less VRAM, fits 2x larger batch sizes!\n",
+ " use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
+ " random_state = 3407,\n",
+ " use_rslora = False, # We support rank stabilized LoRA\n",
+ " loftq_config = None, # And LoftQ\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vITh0KVJ10qX"
+ },
+ "source": [
+ "\n",
+ "### Data Prep\n",
+ "We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.\n",
+ "\n",
+ "**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).\n",
+ "\n",
+ "**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!\n",
+ "\n",
+ "If you want to use the `mistral3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).\n",
+ "\n",
+ "For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 177,
+ "referenced_widgets": [
+ "3fddd29878ba408db098fb05db157710",
+ "b542e0276854449d9ec4bed67279d037",
+ "c3143dca63c6445fb7aa06d7d764d7a9",
+ "e5c89397eb1a42bb895a4c540db2df1c",
+ "ae243226e393499bac22c08d2b3d9570",
+ "265fecfcffb44db580c066a04b5ea37b",
+ "81b34a02e519484c965524acbe252807",
+ "9179bf6bb49f477b9d9e5eb2f8015aaa",
+ "d6dff4e305aa46fbaa9375133356378a",
+ "e2844ec735d4420391b8ed1b9a932949",
+ "d1ebb90d4a8e4656941f47d644013204",
+ "902ea19651c546de8c19414daf6a053a",
+ "4e40665bc93a414c87c089a3a0bb4008",
+ "854179dbb6854ac5bb3d7240dcc3cb0b",
+ "7b64ff6c0203472391e90ce30ed4165f",
+ "17c55c44d870452c81d902b74c8cce79",
+ "2b0e9c589f9848f2aeec3a97dacf2dc5",
+ "a62221e4825a48caa9ef7f906fd43748",
+ "f9ee25b240f74c21adfa24ce54659efd",
+ "5b795861641e488fb6a47f88860a9ccd",
+ "1f6d972a105a46438f51566f5a24cf84",
+ "7c86fc2b3b0d4708bc2e55801894e37e",
+ "f8a68ea30ebd4251931cf4d7b5be62a9",
+ "f2f000db73f34f468b1c549c8422743a",
+ "1ce1838f9eb34f74a615ad82cab78274",
+ "0afe6c4f57a643159bc51aa36f099f61",
+ "cc0bc8033830406a942b67c4cbbc5d28",
+ "0bf1d81abe6f4a3493c29810857fc8dd",
+ "f31165c434c7427fb4d26ea2af0feda9",
+ "0a4506749df0400480090d3127285ed6",
+ "641f767ab42e4190bdd3d0abfe851301",
+ "e0211c2ff4fb46aaa85bf681c004a04c",
+ "a593571b38dd4fd696e3d4778d2a9f03",
+ "8c398b847644448e9d75135f3200f156",
+ "63f16ab5e462454d88927b21df4427aa",
+ "afb69e965b33472a8de2739c1cdff1e9",
+ "5455311519604e9993d537555f372a0b",
+ "a398a05038394c7b853237d751a0bbdd",
+ "68b2f241810746b7973e2b94ba4c0122",
+ "e4fd6646d36e4bce817e1e28dd99dc51",
+ "c6ede16c623b49c7b4916e5ce4799125",
+ "9a6fe19da592481bbe762912bc45bbed",
+ "9439a307ffdd4705b7a1affb46d0fb71",
+ "6a62779572c6495fb2594270742b6e58",
+ "871336e6e4134fb28bd3b2fa606059cc",
+ "0a8ca1638fe248d0856fd4c385f9a70b",
+ "93a7d487abb0476383c7e57a3da1f851",
+ "372011973a8e46a2888bf4299b042aa0",
+ "2d5c3406ed7e4e03af04711751debd71",
+ "afc36d622583404b942709b58027ffd2",
+ "8b74367efcea4a50be0c0b205dc1dd47",
+ "97d31d1c17d248f3b2dffe59143a9797",
+ "c7fbd851c32746d3a2a0e69b411b2121",
+ "cc0cb8826ef3428389ed6dfff6717d95",
+ "57f60ec03bf14970b020302f317ea97a"
+ ]
+ },
+ "id": "LjY75GoYUCB8",
+ "outputId": "062127be-de34-40cb-b112-d799a1873d64"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3fddd29878ba408db098fb05db157710",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "README.md: 0%| | 0.00/450 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "902ea19651c546de8c19414daf6a053a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "train-00000-of-00002.parquet: 0%| | 0.00/158M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f8a68ea30ebd4251931cf4d7b5be62a9",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "train-00001-of-00002.parquet: 0%| | 0.00/144M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8c398b847644448e9d75135f3200f156",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Generating train split: 0%| | 0/172026 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "871336e6e4134fb28bd3b2fa606059cc",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map: 0%| | 0/172026 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "alpaca_prompt = \"\"\"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+ "\n",
+ "### Instruction:\n",
+ "{}\n",
+ "\n",
+ "### Input:\n",
+ "{}\n",
+ "\n",
+ "### Response:\n",
+ "{}\"\"\"\n",
+ "\n",
+ "EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN\n",
+ "def formatting_prompts_func(examples):\n",
+ " instructions = examples[\"instruction\"]\n",
+ " inputs = examples[\"input\"]\n",
+ " outputs = examples[\"output\"]\n",
+ " texts = []\n",
+ " for instruction, input, output in zip(instructions, inputs, outputs):\n",
+ " # Must add EOS_TOKEN, otherwise your generation will go on forever!\n",
+ " text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN\n",
+ " texts.append(text)\n",
+ " return { \"text\" : texts, }\n",
+ "pass\n",
+ "\n",
+ "from datasets import load_dataset\n",
+ "dataset = load_dataset(\"BanglaLLM/bangla-alpaca-orca\", split = \"train\")\n",
+ "dataset = dataset.map(formatting_prompts_func, batched = True,)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "idAEIeSQ3xdS"
+ },
+ "source": [
+ "\n",
+ "### Train the model\n",
+ "Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 66,
+ "referenced_widgets": [
+ "dd252c6d1d59418aa0f5b7c469351dee",
+ "1949ce4af8c94c0ba7e3ac9d8df6b332",
+ "e096cf56562a4e7281681be173d51b09",
+ "89202b77af2a47b196cc8723c846e891",
+ "3cd94a9a96894e51a652076762478155",
+ "bac3b0bec13d492b86a5c65a0bb5b96f",
+ "8adaf5cc36a3456094e077eca79c8b7e",
+ "98cc2108542f443eb242a45fc671afef",
+ "90620de3bb6a467d92b92622e5dfb0c5",
+ "6f7ed19f4b77411c88223d59fa50d13a",
+ "9d319b570bd64f0e9176817d577bc020"
+ ]
+ },
+ "id": "95_Nn-89DhsL",
+ "outputId": "d644b905-6f99-42e2-8539-1bf9173a04bd"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "dd252c6d1d59418aa0f5b7c469351dee",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Map (num_proc=2): 0%| | 0/172026 [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "max_steps is given, it will override any value given in num_train_epochs\n"
+ ]
+ }
+ ],
+ "source": [
+ "from trl import SFTTrainer\n",
+ "from transformers import TrainingArguments\n",
+ "from unsloth import is_bfloat16_supported\n",
+ "\n",
+ "trainer = SFTTrainer(\n",
+ " model = model,\n",
+ " tokenizer = tokenizer,\n",
+ " train_dataset = dataset,\n",
+ " dataset_text_field = \"text\",\n",
+ " max_seq_length = max_seq_length,\n",
+ " dataset_num_proc = 2,\n",
+ " packing = False, # Can make training 5x faster for short sequences.\n",
+ " args = TrainingArguments(\n",
+ " per_device_train_batch_size = 1,\n",
+ " gradient_accumulation_steps = 4,\n",
+ " warmup_steps = 5,\n",
+ " # num_train_epochs = 1, # Set this for 1 full training run.\n",
+ " max_steps = 200,\n",
+ " learning_rate = 2e-4,\n",
+ " fp16 = not is_bfloat16_supported(),\n",
+ " bf16 = is_bfloat16_supported(),\n",
+ " logging_steps = 1,\n",
+ " optim = \"adamw_8bit\",\n",
+ " weight_decay = 0.01,\n",
+ " lr_scheduler_type = \"linear\",\n",
+ " seed = 3407,\n",
+ " output_dir = \"outputs\",\n",
+ " report_to = \"none\", # Use this for WandB etc\n",
+ " ),\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "2ejIt2xSNKKp",
+ "outputId": "d558ea2b-76a1-46ba-b01a-3206deae32f4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GPU = Tesla T4. Max memory = 14.748 GB.\n",
+ "6.576 GB of memory reserved.\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@title Show current memory stats\n",
+ "gpu_stats = torch.cuda.get_device_properties(0)\n",
+ "start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)\n",
+ "print(f\"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.\")\n",
+ "print(f\"{start_gpu_memory} GB of memory reserved.\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "yqxqAZ7KJ4oL",
+ "outputId": "7a7a385a-b90b-4e69-83b7-0f19d2f978ee"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n",
+ "`pip install --upgrade --no-cache-dir unsloth git+https://github.com/huggingface/transformers.git git+https://github.com/huggingface/trl.git`\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+ " \\\\ /| Num examples = 172,026 | Num Epochs = 1\n",
+ "O^O/ \\_/ \\ Batch size per device = 1 | Gradient Accumulation steps = 4\n",
+ "\\ / Total batch size = 4 | Total steps = 200\n",
+ " \"-____-\" Number of trainable parameters = 54,018,048\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ "
\n",
+ " [200/200 35:17, Epoch 0/1]\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Step | \n",
+ " Training Loss | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 1.701200 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1.496500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1.836600 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1.300100 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 1.385700 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 1.406400 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 1.361900 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 1.255600 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 1.073400 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 1.055400 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 0.924000 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 0.660100 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 1.054300 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 0.642100 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 1.163400 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 1.049700 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 1.200700 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 0.638300 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 0.920000 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 0.508700 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 1.129800 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 0.805900 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 0.588500 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " 0.876600 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " 0.920100 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " 1.080800 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " 1.081600 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " 0.944700 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " 0.940600 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " 0.942900 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " 0.718600 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " 0.577500 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " 0.764700 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " 1.111100 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " 1.084600 | \n",
+ "
\n",
+ " \n",
+ " 36 | \n",
+ " 0.978700 | \n",
+ "
\n",
+ " \n",
+ " 37 | \n",
+ " 0.765000 | \n",
+ "
\n",
+ " \n",
+ " 38 | \n",
+ " 0.895000 | \n",
+ "
\n",
+ " \n",
+ " 39 | \n",
+ " 0.792800 | \n",
+ "
\n",
+ " \n",
+ " 40 | \n",
+ " 0.727800 | \n",
+ "
\n",
+ " \n",
+ " 41 | \n",
+ " 0.849400 | \n",
+ "
\n",
+ " \n",
+ " 42 | \n",
+ " 0.775200 | \n",
+ "
\n",
+ " \n",
+ " 43 | \n",
+ " 0.710300 | \n",
+ "
\n",
+ " \n",
+ " 44 | \n",
+ " 1.014700 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " 1.042400 | \n",
+ "
\n",
+ " \n",
+ " 46 | \n",
+ " 1.225500 | \n",
+ "
\n",
+ " \n",
+ " 47 | \n",
+ " 0.571200 | \n",
+ "
\n",
+ " \n",
+ " 48 | \n",
+ " 1.098000 | \n",
+ "
\n",
+ " \n",
+ " 49 | \n",
+ " 0.872600 | \n",
+ "
\n",
+ " \n",
+ " 50 | \n",
+ " 0.741700 | \n",
+ "
\n",
+ " \n",
+ " 51 | \n",
+ " 0.979600 | \n",
+ "
\n",
+ " \n",
+ " 52 | \n",
+ " 0.999200 | \n",
+ "
\n",
+ " \n",
+ " 53 | \n",
+ " 0.556200 | \n",
+ "
\n",
+ " \n",
+ " 54 | \n",
+ " 0.660700 | \n",
+ "
\n",
+ " \n",
+ " 55 | \n",
+ " 0.784900 | \n",
+ "
\n",
+ " \n",
+ " 56 | \n",
+ " 0.940400 | \n",
+ "
\n",
+ " \n",
+ " 57 | \n",
+ " 0.701900 | \n",
+ "
\n",
+ " \n",
+ " 58 | \n",
+ " 0.968700 | \n",
+ "
\n",
+ " \n",
+ " 59 | \n",
+ " 0.682900 | \n",
+ "
\n",
+ " \n",
+ " 60 | \n",
+ " 0.840300 | \n",
+ "
\n",
+ " \n",
+ " 61 | \n",
+ " 0.526800 | \n",
+ "
\n",
+ " \n",
+ " 62 | \n",
+ " 0.961600 | \n",
+ "
\n",
+ " \n",
+ " 63 | \n",
+ " 0.754700 | \n",
+ "
\n",
+ " \n",
+ " 64 | \n",
+ " 1.092100 | \n",
+ "
\n",
+ " \n",
+ " 65 | \n",
+ " 0.929000 | \n",
+ "
\n",
+ " \n",
+ " 66 | \n",
+ " 0.804800 | \n",
+ "
\n",
+ " \n",
+ " 67 | \n",
+ " 1.272900 | \n",
+ "
\n",
+ " \n",
+ " 68 | \n",
+ " 1.062800 | \n",
+ "
\n",
+ " \n",
+ " 69 | \n",
+ " 1.383400 | \n",
+ "
\n",
+ " \n",
+ " 70 | \n",
+ " 1.233700 | \n",
+ "
\n",
+ " \n",
+ " 71 | \n",
+ " 1.016000 | \n",
+ "
\n",
+ " \n",
+ " 72 | \n",
+ " 0.744300 | \n",
+ "
\n",
+ " \n",
+ " 73 | \n",
+ " 0.800700 | \n",
+ "
\n",
+ " \n",
+ " 74 | \n",
+ " 1.008500 | \n",
+ "
\n",
+ " \n",
+ " 75 | \n",
+ " 0.906300 | \n",
+ "
\n",
+ " \n",
+ " 76 | \n",
+ " 0.766700 | \n",
+ "
\n",
+ " \n",
+ " 77 | \n",
+ " 1.090200 | \n",
+ "
\n",
+ " \n",
+ " 78 | \n",
+ " 0.807400 | \n",
+ "
\n",
+ " \n",
+ " 79 | \n",
+ " 0.550700 | \n",
+ "
\n",
+ " \n",
+ " 80 | \n",
+ " 0.553800 | \n",
+ "
\n",
+ " \n",
+ " 81 | \n",
+ " 0.999900 | \n",
+ "
\n",
+ " \n",
+ " 82 | \n",
+ " 1.292100 | \n",
+ "
\n",
+ " \n",
+ " 83 | \n",
+ " 1.061900 | \n",
+ "
\n",
+ " \n",
+ " 84 | \n",
+ " 1.047400 | \n",
+ "
\n",
+ " \n",
+ " 85 | \n",
+ " 0.734200 | \n",
+ "
\n",
+ " \n",
+ " 86 | \n",
+ " 0.391800 | \n",
+ "
\n",
+ " \n",
+ " 87 | \n",
+ " 0.702700 | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " 0.687700 | \n",
+ "
\n",
+ " \n",
+ " 89 | \n",
+ " 0.822200 | \n",
+ "
\n",
+ " \n",
+ " 90 | \n",
+ " 0.705000 | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " 0.763900 | \n",
+ "
\n",
+ " \n",
+ " 92 | \n",
+ " 0.236300 | \n",
+ "
\n",
+ " \n",
+ " 93 | \n",
+ " 0.749500 | \n",
+ "
\n",
+ " \n",
+ " 94 | \n",
+ " 0.445200 | \n",
+ "
\n",
+ " \n",
+ " 95 | \n",
+ " 0.500800 | \n",
+ "
\n",
+ " \n",
+ " 96 | \n",
+ " 0.877400 | \n",
+ "
\n",
+ " \n",
+ " 97 | \n",
+ " 0.884400 | \n",
+ "
\n",
+ " \n",
+ " 98 | \n",
+ " 0.887000 | \n",
+ "
\n",
+ " \n",
+ " 99 | \n",
+ " 0.889900 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " 0.895900 | \n",
+ "
\n",
+ " \n",
+ " 101 | \n",
+ " 1.042100 | \n",
+ "
\n",
+ " \n",
+ " 102 | \n",
+ " 1.052900 | \n",
+ "
\n",
+ " \n",
+ " 103 | \n",
+ " 0.953700 | \n",
+ "
\n",
+ " \n",
+ " 104 | \n",
+ " 0.752700 | \n",
+ "
\n",
+ " \n",
+ " 105 | \n",
+ " 0.921000 | \n",
+ "
\n",
+ " \n",
+ " 106 | \n",
+ " 0.897100 | \n",
+ "
\n",
+ " \n",
+ " 107 | \n",
+ " 0.784500 | \n",
+ "
\n",
+ " \n",
+ " 108 | \n",
+ " 0.712600 | \n",
+ "
\n",
+ " \n",
+ " 109 | \n",
+ " 0.716700 | \n",
+ "
\n",
+ " \n",
+ " 110 | \n",
+ " 1.199900 | \n",
+ "
\n",
+ " \n",
+ " 111 | \n",
+ " 0.844600 | \n",
+ "
\n",
+ " \n",
+ " 112 | \n",
+ " 0.810800 | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " 0.704900 | \n",
+ "
\n",
+ " \n",
+ " 114 | \n",
+ " 1.119300 | \n",
+ "
\n",
+ " \n",
+ " 115 | \n",
+ " 0.408600 | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " 0.431300 | \n",
+ "
\n",
+ " \n",
+ " 117 | \n",
+ " 1.093200 | \n",
+ "
\n",
+ " \n",
+ " 118 | \n",
+ " 0.649600 | \n",
+ "
\n",
+ " \n",
+ " 119 | \n",
+ " 0.685300 | \n",
+ "
\n",
+ " \n",
+ " 120 | \n",
+ " 1.326500 | \n",
+ "
\n",
+ " \n",
+ " 121 | \n",
+ " 0.722300 | \n",
+ "
\n",
+ " \n",
+ " 122 | \n",
+ " 0.580700 | \n",
+ "
\n",
+ " \n",
+ " 123 | \n",
+ " 0.890100 | \n",
+ "
\n",
+ " \n",
+ " 124 | \n",
+ " 0.722200 | \n",
+ "
\n",
+ " \n",
+ " 125 | \n",
+ " 0.901900 | \n",
+ "
\n",
+ " \n",
+ " 126 | \n",
+ " 0.383200 | \n",
+ "
\n",
+ " \n",
+ " 127 | \n",
+ " 0.765700 | \n",
+ "
\n",
+ " \n",
+ " 128 | \n",
+ " 1.099800 | \n",
+ "
\n",
+ " \n",
+ " 129 | \n",
+ " 1.230900 | \n",
+ "
\n",
+ " \n",
+ " 130 | \n",
+ " 1.045700 | \n",
+ "
\n",
+ " \n",
+ " 131 | \n",
+ " 0.643400 | \n",
+ "
\n",
+ " \n",
+ " 132 | \n",
+ " 1.044200 | \n",
+ "
\n",
+ " \n",
+ " 133 | \n",
+ " 0.984500 | \n",
+ "
\n",
+ " \n",
+ " 134 | \n",
+ " 1.070600 | \n",
+ "
\n",
+ " \n",
+ " 135 | \n",
+ " 1.073700 | \n",
+ "
\n",
+ " \n",
+ " 136 | \n",
+ " 0.388500 | \n",
+ "
\n",
+ " \n",
+ " 137 | \n",
+ " 0.962500 | \n",
+ "
\n",
+ " \n",
+ " 138 | \n",
+ " 1.048300 | \n",
+ "
\n",
+ " \n",
+ " 139 | \n",
+ " 0.661400 | \n",
+ "
\n",
+ " \n",
+ " 140 | \n",
+ " 0.906000 | \n",
+ "
\n",
+ " \n",
+ " 141 | \n",
+ " 0.725700 | \n",
+ "
\n",
+ " \n",
+ " 142 | \n",
+ " 0.888300 | \n",
+ "
\n",
+ " \n",
+ " 143 | \n",
+ " 0.254600 | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " 0.824500 | \n",
+ "
\n",
+ " \n",
+ " 145 | \n",
+ " 0.814300 | \n",
+ "
\n",
+ " \n",
+ " 146 | \n",
+ " 0.965900 | \n",
+ "
\n",
+ " \n",
+ " 147 | \n",
+ " 0.719700 | \n",
+ "
\n",
+ " \n",
+ " 148 | \n",
+ " 1.137200 | \n",
+ "
\n",
+ " \n",
+ " 149 | \n",
+ " 0.745100 | \n",
+ "
\n",
+ " \n",
+ " 150 | \n",
+ " 0.972400 | \n",
+ "
\n",
+ " \n",
+ " 151 | \n",
+ " 0.530900 | \n",
+ "
\n",
+ " \n",
+ " 152 | \n",
+ " 0.816800 | \n",
+ "
\n",
+ " \n",
+ " 153 | \n",
+ " 0.740300 | \n",
+ "
\n",
+ " \n",
+ " 154 | \n",
+ " 0.808000 | \n",
+ "
\n",
+ " \n",
+ " 155 | \n",
+ " 1.164000 | \n",
+ "
\n",
+ " \n",
+ " 156 | \n",
+ " 0.523100 | \n",
+ "
\n",
+ " \n",
+ " 157 | \n",
+ " 1.065800 | \n",
+ "
\n",
+ " \n",
+ " 158 | \n",
+ " 1.191600 | \n",
+ "
\n",
+ " \n",
+ " 159 | \n",
+ " 0.865600 | \n",
+ "
\n",
+ " \n",
+ " 160 | \n",
+ " 0.839400 | \n",
+ "
\n",
+ " \n",
+ " 161 | \n",
+ " 0.975000 | \n",
+ "
\n",
+ " \n",
+ " 162 | \n",
+ " 0.614300 | \n",
+ "
\n",
+ " \n",
+ " 163 | \n",
+ " 1.052100 | \n",
+ "
\n",
+ " \n",
+ " 164 | \n",
+ " 0.889800 | \n",
+ "
\n",
+ " \n",
+ " 165 | \n",
+ " 0.402000 | \n",
+ "
\n",
+ " \n",
+ " 166 | \n",
+ " 0.633400 | \n",
+ "
\n",
+ " \n",
+ " 167 | \n",
+ " 0.800300 | \n",
+ "
\n",
+ " \n",
+ " 168 | \n",
+ " 0.973800 | \n",
+ "
\n",
+ " \n",
+ " 169 | \n",
+ " 0.466100 | \n",
+ "
\n",
+ " \n",
+ " 170 | \n",
+ " 0.877100 | \n",
+ "
\n",
+ " \n",
+ " 171 | \n",
+ " 0.752700 | \n",
+ "
\n",
+ " \n",
+ " 172 | \n",
+ " 1.166300 | \n",
+ "
\n",
+ " \n",
+ " 173 | \n",
+ " 0.919500 | \n",
+ "
\n",
+ " \n",
+ " 174 | \n",
+ " 0.701400 | \n",
+ "
\n",
+ " \n",
+ " 175 | \n",
+ " 0.902800 | \n",
+ "
\n",
+ " \n",
+ " 176 | \n",
+ " 0.895900 | \n",
+ "
\n",
+ " \n",
+ " 177 | \n",
+ " 0.808900 | \n",
+ "
\n",
+ " \n",
+ " 178 | \n",
+ " 0.631700 | \n",
+ "
\n",
+ " \n",
+ " 179 | \n",
+ " 0.588300 | \n",
+ "
\n",
+ " \n",
+ " 180 | \n",
+ " 0.901700 | \n",
+ "
\n",
+ " \n",
+ " 181 | \n",
+ " 1.015800 | \n",
+ "
\n",
+ " \n",
+ " 182 | \n",
+ " 0.893900 | \n",
+ "
\n",
+ " \n",
+ " 183 | \n",
+ " 0.726100 | \n",
+ "
\n",
+ " \n",
+ " 184 | \n",
+ " 0.814900 | \n",
+ "
\n",
+ " \n",
+ " 185 | \n",
+ " 0.589000 | \n",
+ "
\n",
+ " \n",
+ " 186 | \n",
+ " 0.728600 | \n",
+ "
\n",
+ " \n",
+ " 187 | \n",
+ " 0.884300 | \n",
+ "
\n",
+ " \n",
+ " 188 | \n",
+ " 0.791000 | \n",
+ "
\n",
+ " \n",
+ " 189 | \n",
+ " 0.917300 | \n",
+ "
\n",
+ " \n",
+ " 190 | \n",
+ " 0.954500 | \n",
+ "
\n",
+ " \n",
+ " 191 | \n",
+ " 1.196100 | \n",
+ "
\n",
+ " \n",
+ " 192 | \n",
+ " 0.870400 | \n",
+ "
\n",
+ " \n",
+ " 193 | \n",
+ " 0.949800 | \n",
+ "
\n",
+ " \n",
+ " 194 | \n",
+ " 0.982200 | \n",
+ "
\n",
+ " \n",
+ " 195 | \n",
+ " 0.965000 | \n",
+ "
\n",
+ " \n",
+ " 196 | \n",
+ " 1.317000 | \n",
+ "
\n",
+ " \n",
+ " 197 | \n",
+ " 0.497100 | \n",
+ "
\n",
+ " \n",
+ " 198 | \n",
+ " 0.655100 | \n",
+ "
\n",
+ " \n",
+ " 199 | \n",
+ " 1.060100 | \n",
+ "
\n",
+ " \n",
+ " 200 | \n",
+ " 0.994400 | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainer_stats = trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "pCqnaKmlO1U9",
+ "outputId": "fb7f67f1-f97e-4cef-b87f-ac93b53950c4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "2194.7755 seconds used for training.\n",
+ "36.58 minutes used for training.\n",
+ "Peak reserved memory = 10.35 GB.\n",
+ "Peak reserved memory for training = 3.774 GB.\n",
+ "Peak reserved memory % of max memory = 70.179 %.\n",
+ "Peak reserved memory for training % of max memory = 25.59 %.\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@title Show final memory and time stats\n",
+ "used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)\n",
+ "used_memory_for_lora = round(used_memory - start_gpu_memory, 3)\n",
+ "used_percentage = round(used_memory /max_memory*100, 3)\n",
+ "lora_percentage = round(used_memory_for_lora/max_memory*100, 3)\n",
+ "print(f\"{trainer_stats.metrics['train_runtime']} seconds used for training.\")\n",
+ "print(f\"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.\")\n",
+ "print(f\"Peak reserved memory = {used_memory} GB.\")\n",
+ "print(f\"Peak reserved memory for training = {used_memory_for_lora} GB.\")\n",
+ "print(f\"Peak reserved memory % of max memory = {used_percentage} %.\")\n",
+ "print(f\"Peak reserved memory for training % of max memory = {lora_percentage} %.\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ekOmTR1hSNcr"
+ },
+ "source": [
+ "\n",
+ "### Inference\n",
+ "Let's run the model! You can change the instruction and input - leave the output blank!\n",
+ "\n",
+ "**[NEW] Try 2x faster inference in a free Colab for Llama-3.1 8b Instruct [here](https://colab.research.google.com/drive/1T-YBVfnphoVc8E2E854qF3jdia2Ll2W2?usp=sharing)**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "kR3gIAX-SM2q",
+ "outputId": "1c987413-c7c4-4c8d-a807-c3793139f58e"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nপ্যারিসের একটি বিখ্যাত লম্বা টাওয়ার কি?\\n\\n### Input:\\n\\n\\n### Response:\\nপ্যারিসের একটি বিখ্যাত লম্বা টাওয়ার হল ইয়ারা টাওয়ার। এটি প্যারিসের 16তম জেলায় অবস্থিত এবং এটি প্যারিসের সবচে']"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# alpaca_prompt = Copied from above\n",
+ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"প্যারিসের একটি বিখ্যাত লম্বা টাওয়ার কি?\", # instruction\n",
+ " \"\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)\n",
+ "tokenizer.batch_decode(outputs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "CrSvZObor0lY"
+ },
+ "source": [
+ " You can also use a `TextStreamer` for continuous inference - so you can see the generation token by token, instead of waiting the whole time!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "e2pEuRb1r2Vg",
+ "outputId": "6a62a532-f947-4063-d9be-fe381711bfa4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+ "\n",
+ "### Instruction:\n",
+ "ক্রমাগত ফিবোনাচি সিকোয়েন্স করবেন\n",
+ "\n",
+ "### Input:\n",
+ "1, 1, 2, 3, 5, 8\n",
+ "\n",
+ "### Response:\n",
+ "ক্রমাগত ফিবোনাচি সিকোয়েন্স হল একটি সিকোয়েন্স যা প্রতিটি সংখ্যাটি এর আগের দুটি সংখ্যার যোগফলের সমান। প্রদত্ত সিকোয়েন্সে, প্রথম দুটি সংখ্যা 1 এবং 1। পরবর্তী সংখ্যাটি 1 এবং 1 এর যোগফল, যা 2। তৃতীয় সংখ্যাটি 1 এবং 2 এর যোগফল, যা \n"
+ ]
+ }
+ ],
+ "source": [
+ "# alpaca_prompt = Copied from above\n",
+ "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"ক্রমাগত ফিবোনাচি সিকোয়েন্স করবেন\", # instruction\n",
+ " \"1, 1, 2, 3, 5, 8\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "from transformers import TextStreamer\n",
+ "text_streamer = TextStreamer(tokenizer)\n",
+ "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "uMuVrWbjAzhc"
+ },
+ "source": [
+ "\n",
+ "### Saving, loading finetuned models\n",
+ "To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.\n",
+ "\n",
+ "**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "upcOlWe7A1vc"
+ },
+ "outputs": [],
+ "source": [
+ "# # model.save_pretrained(\"BanglaGemma9b_GGUF\") # Local saving\n",
+ "# # tokenizer.save_pretrained(\"BanglaGemma9b_GGUF\")\n",
+ "# model.push_to_hub(\"vaugheu/BanglaGemma9b_GGUF\", token = \"hf_\") # Online saving\n",
+ "# tokenizer.push_to_hub(\"vaugheu/BanglaGemma9b_GGUF\", token = \"hf_\") # Online saving"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AEEcJ4qfC7Lp"
+ },
+ "source": [
+ "Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "MKX_XKs_BNZR",
+ "outputId": "77981de7-6c2c-46ea-e978-64d59285a969"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n",
+ "\n",
+ "### Instruction:\n",
+ "প্যারিসের একটি বিখ্যাত লম্বা টাওয়ার কি?\n",
+ "\n",
+ "### Input:\n",
+ "\n",
+ "\n",
+ "### Response:\n",
+ "প্যারিসের একটি বিখ্যাত লম্বা টাওয়ার হল ইয়ারা টাওয়ার। এটি প্যারিসের 16তম জেলায় অবস্থিত এবং এটি প্যারিসের সবচেয়ে উঁচু ভবন। এটি 1973 সালে নির্মিত হয়েছিল এবং এটি 187 মিটার (614 ফুট) উঁচু।\n"
+ ]
+ }
+ ],
+ "source": [
+ "if False:\n",
+ " from unsloth import FastLanguageModel\n",
+ " model, tokenizer = FastLanguageModel.from_pretrained(\n",
+ " model_name = \"BanglaGemma9b_GGUF\", # YOUR MODEL YOU USED FOR TRAINING\n",
+ " max_seq_length = max_seq_length,\n",
+ " dtype = dtype,\n",
+ " load_in_4bit = load_in_4bit,\n",
+ " )\n",
+ " FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
+ "\n",
+ "# alpaca_prompt = You MUST copy from above!\n",
+ "\n",
+ "inputs = tokenizer(\n",
+ "[\n",
+ " alpaca_prompt.format(\n",
+ " \"প্যারিসের একটি বিখ্যাত লম্বা টাওয়ার কি?\", # instruction\n",
+ " \"\", # input\n",
+ " \"\", # output - leave this blank for generation!\n",
+ " )\n",
+ "], return_tensors = \"pt\").to(\"cuda\")\n",
+ "\n",
+ "from transformers import TextStreamer\n",
+ "text_streamer = TextStreamer(tokenizer)\n",
+ "_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "QQMjaNrjsU5_"
+ },
+ "source": [
+ "You can also use Hugging Face's `AutoModelForPeftCausalLM`. Only use this if you do not have `unsloth` installed. It can be hopelessly slow, since `4bit` model downloading is not supported, and Unsloth's **inference is 2x faster**."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "yFfaXG0WsQuE"
+ },
+ "outputs": [],
+ "source": [
+ "if False:\n",
+ " # I highly do NOT suggest - use Unsloth if possible\n",
+ " from peft import AutoPeftModelForCausalLM\n",
+ " from transformers import AutoTokenizer\n",
+ " model = AutoPeftModelForCausalLM.from_pretrained(\n",
+ " \"BanglaGemma9b_GGUF\", # YOUR MODEL YOU USED FOR TRAINING\n",
+ " load_in_4bit = load_in_4bit,\n",
+ " )\n",
+ " tokenizer = AutoTokenizer.from_pretrained(\"BanglaGemma9b_GGUF\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "f422JgM9sdVT"
+ },
+ "source": [
+ "### Saving to float16 for VLLM\n",
+ "\n",
+ "We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "iHjt_SMYsd3P"
+ },
+ "outputs": [],
+ "source": [
+ "# Merge to 16bit\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n",
+ "if False: model.push_to_hub_merged(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n",
+ "\n",
+ "# Merge to 4bit\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n",
+ "if False: model.push_to_hub_merged(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n",
+ "\n",
+ "# Just LoRA adapters\n",
+ "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"lora\",)\n",
+ "if False: model.push_to_hub_merged(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, save_method = \"lora\", token = \"\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TCv4vXHd61i7"
+ },
+ "source": [
+ "### GGUF / llama.cpp Conversion\n",
+ "To save to `GGUF` / `llama.cpp`, we support it natively now! We clone `llama.cpp` and we default save it to `q8_0`. We allow all methods like `q4_k_m`. Use `save_pretrained_gguf` for local saving and `push_to_hub_gguf` for uploading to HF.\n",
+ "\n",
+ "Some supported quant methods (full list on our [Wiki page](https://github.com/unslothai/unsloth/wiki#gguf-quantization-options)):\n",
+ "* `q8_0` - Fast conversion. High resource use, but generally acceptable.\n",
+ "* `q4_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q4_K.\n",
+ "* `q5_k_m` - Recommended. Uses Q6_K for half of the attention.wv and feed_forward.w2 tensors, else Q5_K.\n",
+ "\n",
+ "[**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000,
+ "referenced_widgets": [
+ "57125af4f34d49d0b58888ad9e21415d",
+ "bbf22f44362b4805a884fc3d8b2d9d17",
+ "65d84eea4ef64e1ab9267c5f8fc25600",
+ "58fdbca58f604e949f2b079dae3a33c6",
+ "69593167eac74b069ba9ea11b5a2df98",
+ "7b57b77c678c489b8fd28bf9ecf220b8",
+ "5bcff80df0554cac936d1cd5a12c7330",
+ "8100ac5e848d465b82435f46917e89b9",
+ "75e4ed63347846dab807a972dbbd8f4b",
+ "23c50152d387453cbb40d588ccdba734",
+ "14db196d277e48a7bde44205351d354d",
+ "45c4bfa78e31480d890c5ccd2d05cd73",
+ "67188a4a909a4ea8a48cb3919507daa7",
+ "1b6dfa7096fc4a13845b76a861e0fbf3",
+ "7a4f7546d7d846f69cea34560da96a6f",
+ "485ed3f8011e4fdebeeb180db45df130",
+ "0335fd59b20f40039e53bfafd4a9f014",
+ "fd276879b0c7416caffb6ca0b87f7079",
+ "368ddc19d093459daa15702474b7e9f3",
+ "6121fa09c08d40f1ac39f58900b453f8",
+ "369758bdc0ec4a9ba753c5164c83e4d5",
+ "d4dcefae7523463bb2d836869e89bd69"
+ ]
+ },
+ "id": "FqfebeAdT073",
+ "outputId": "60525780-d801-48ca-845d-dff01a3f2c81"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: You have 1 CPUs. Using `safe_serialization` is 10x slower.\n",
+ "We shall switch to Pytorch saving, which will take 3 minutes and not 30 minutes.\n",
+ "To force `safe_serialization`, set it to `None` instead.\n",
+ "Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded\n",
+ "model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.\n",
+ "Unsloth: Will remove a cached repo with size 6.1G\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 5.37 out of 12.67 RAM for saving.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " 31%|███ | 13/42 [00:01<00:02, 10.60it/s]We will save to Disk and not RAM now.\n",
+ "100%|██████████| 42/42 [03:53<00:00, 5.57s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
+ "Unsloth: Saving model/pytorch_model-00001-of-00004.bin...\n",
+ "Unsloth: Saving model/pytorch_model-00002-of-00004.bin...\n",
+ "Unsloth: Saving model/pytorch_model-00003-of-00004.bin...\n",
+ "Unsloth: Saving model/pytorch_model-00004-of-00004.bin...\n",
+ "Done.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Converting gemma2 model. Can use fast conversion = False.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits will take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q8_0'] will take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
+ "Unsloth: [1] Converting model at model into q8_0 GGUF format.\n",
+ "The output location will be /content/model/unsloth.Q8_0.gguf\n",
+ "This will take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: model\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Exporting model...\n",
+ "INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00004.bin'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {3584, 256000}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00002-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00003-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.28.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.28.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.28.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.29.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.29.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.29.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.30.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.30.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.30.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.31.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.31.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.31.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.32.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.32.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.32.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00004-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.32.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.32.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.33.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.33.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.33.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.33.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.34.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.34.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.34.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.34.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.35.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.35.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.35.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.35.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.36.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.36.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.36.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.36.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.37.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.37.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.37.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.37.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.38.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.38.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.38.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.38.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.39.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.39.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.39.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.39.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.40.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.40.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.40.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.40.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.41.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.41.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.41.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.41.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:Set meta model\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "INFO:gguf.vocab:Setting special token type bos to 2\n",
+ "INFO:gguf.vocab:Setting special token type eos to 1\n",
+ "INFO:gguf.vocab:Setting special token type unk to 3\n",
+ "INFO:gguf.vocab:Setting special token type pad to 0\n",
+ "INFO:gguf.vocab:Setting add_bos_token to True\n",
+ "INFO:gguf.vocab:Setting add_eos_token to False\n",
+ "INFO:hf-to-gguf:Set model quantization version\n",
+ "INFO:gguf.gguf_writer:Writing the following files:\n",
+ "INFO:gguf.gguf_writer:/content/model/unsloth.Q8_0.gguf: n_tensors = 464, total_size = 9.8G\n",
+ "Writing: 100%|██████████| 9.82G/9.82G [03:34<00:00, 45.8Mbyte/s]\n",
+ "INFO:hf-to-gguf:Model successfully exported to /content/model/unsloth.Q8_0.gguf\n",
+ "Unsloth: Conversion completed! Output location: /content/model/unsloth.Q8_0.gguf\n",
+ "Unsloth: Merging 4bit and LoRA weights to 16bit...\n",
+ "Unsloth: Will use up to 5.48 out of 12.67 RAM for saving.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "100%|██████████| 42/42 [02:17<00:00, 3.28s/it]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unsloth: Saving tokenizer... Done.\n",
+ "Unsloth: Saving model... This might take 5 minutes for Llama-7b...\n",
+ "Unsloth: Saving vaugheu/BanglaGemma9b_GGUF/pytorch_model-00001-of-00004.bin...\n",
+ "Unsloth: Saving vaugheu/BanglaGemma9b_GGUF/pytorch_model-00002-of-00004.bin...\n",
+ "Unsloth: Saving vaugheu/BanglaGemma9b_GGUF/pytorch_model-00003-of-00004.bin...\n",
+ "Unsloth: Saving vaugheu/BanglaGemma9b_GGUF/pytorch_model-00004-of-00004.bin...\n",
+ "Done.\n",
+ "==((====))== Unsloth: Conversion from QLoRA to GGUF information\n",
+ " \\\\ /| [0] Installing llama.cpp will take 3 minutes.\n",
+ "O^O/ \\_/ \\ [1] Converting HF to GGUF 16bits will take 3 minutes.\n",
+ "\\ / [2] Converting GGUF 16bits to ['q8_0'] will take 10 minutes each.\n",
+ " \"-____-\" In total, you will have to wait at least 16 minutes.\n",
+ "\n",
+ "Unsloth: [0] Installing llama.cpp. This will take 3 minutes...\n",
+ "Unsloth: [1] Converting model at vaugheu/BanglaGemma9b_GGUF into q8_0 GGUF format.\n",
+ "The output location will be /content/vaugheu/BanglaGemma9b_GGUF/unsloth.Q8_0.gguf\n",
+ "This will take 3 minutes...\n",
+ "INFO:hf-to-gguf:Loading model: BanglaGemma9b_GGUF\n",
+ "INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only\n",
+ "INFO:hf-to-gguf:Exporting model...\n",
+ "INFO:hf-to-gguf:gguf: loading model weight map from 'pytorch_model.bin.index.json'\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00001-of-00004.bin'\n",
+ "INFO:hf-to-gguf:token_embd.weight, torch.float16 --> Q8_0, shape = {3584, 256000}\n",
+ "INFO:hf-to-gguf:blk.0.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.0.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.0.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.0.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.0.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.1.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.1.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.1.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.1.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.2.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.2.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.2.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.2.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.3.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.3.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.3.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.3.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.4.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.4.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.4.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.4.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.5.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.5.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.5.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.5.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.6.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.6.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.6.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.6.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.7.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.7.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.7.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00002-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.7.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.8.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.8.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.8.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.8.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.9.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.9.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.9.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.9.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.10.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.10.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.10.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.10.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.11.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.11.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.11.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.11.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.12.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.12.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.12.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.12.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.13.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.13.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.13.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.13.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.14.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.14.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.14.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.14.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.15.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.15.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.15.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.15.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.16.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.16.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.16.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.16.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.17.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.17.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.17.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.17.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.18.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.18.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.18.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.18.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.19.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.19.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.19.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.19.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.20.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.20.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.20.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00003-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.20.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.21.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.21.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.21.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.21.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.22.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.22.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.22.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.22.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.23.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.23.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.23.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.23.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.24.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.24.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.24.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.24.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.25.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.25.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.25.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.25.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.26.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.26.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.26.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.26.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.27.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.27.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.27.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.27.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.28.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.28.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.28.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.28.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.29.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.29.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.29.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.29.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.30.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.30.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.30.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.30.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.31.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.31.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.31.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.31.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.32.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.32.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.32.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:gguf: loading model part 'pytorch_model-00004-of-00004.bin'\n",
+ "INFO:hf-to-gguf:blk.32.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.32.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.32.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.33.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.33.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.33.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.33.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.33.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.34.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.34.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.34.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.34.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.34.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.35.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.35.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.35.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.35.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.35.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.36.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.36.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.36.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.36.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.36.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.37.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.37.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.37.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.37.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.37.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.38.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.38.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.38.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.38.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.38.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.39.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.39.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.39.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.39.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.39.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.40.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.40.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.40.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.40.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.40.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.attn_q.weight, torch.float16 --> Q8_0, shape = {3584, 4096}\n",
+ "INFO:hf-to-gguf:blk.41.attn_k.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.41.attn_v.weight, torch.float16 --> Q8_0, shape = {3584, 2048}\n",
+ "INFO:hf-to-gguf:blk.41.attn_output.weight, torch.float16 --> Q8_0, shape = {4096, 3584}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_gate.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_up.weight, torch.float16 --> Q8_0, shape = {3584, 14336}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_down.weight, torch.float16 --> Q8_0, shape = {14336, 3584}\n",
+ "INFO:hf-to-gguf:blk.41.attn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.post_attention_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.ffn_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:blk.41.post_ffw_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:output_norm.weight, torch.float16 --> F32, shape = {3584}\n",
+ "INFO:hf-to-gguf:Set meta model\n",
+ "INFO:hf-to-gguf:Set model parameters\n",
+ "INFO:hf-to-gguf:Set model tokenizer\n",
+ "INFO:gguf.vocab:Setting special token type bos to 2\n",
+ "INFO:gguf.vocab:Setting special token type eos to 1\n",
+ "INFO:gguf.vocab:Setting special token type unk to 3\n",
+ "INFO:gguf.vocab:Setting special token type pad to 0\n",
+ "INFO:gguf.vocab:Setting add_bos_token to True\n",
+ "INFO:gguf.vocab:Setting add_eos_token to False\n",
+ "INFO:hf-to-gguf:Set model quantization version\n",
+ "INFO:gguf.gguf_writer:Writing the following files:\n",
+ "INFO:gguf.gguf_writer:/content/vaugheu/BanglaGemma9b_GGUF/unsloth.Q8_0.gguf: n_tensors = 464, total_size = 9.8G\n",
+ "Writing: 100%|██████████| 9.82G/9.82G [03:38<00:00, 44.8Mbyte/s]\n",
+ "INFO:hf-to-gguf:Model successfully exported to /content/vaugheu/BanglaGemma9b_GGUF/unsloth.Q8_0.gguf\n",
+ "Unsloth: Conversion completed! Output location: /content/vaugheu/BanglaGemma9b_GGUF/unsloth.Q8_0.gguf\n",
+ "Unsloth: Uploading GGUF to Huggingface Hub...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "57125af4f34d49d0b58888ad9e21415d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "45c4bfa78e31480d890c5ccd2d05cd73",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "unsloth.Q8_0.gguf: 0%| | 0.00/9.83G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Saved GGUF to https://huggingface.co/vaugheu/BanglaGemma9b_GGUF\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Save to 8bit Q8_0\n",
+ "if True: model.save_pretrained_gguf(\"model\", tokenizer,)\n",
+ "# Remember to go to https://huggingface.co/settings/tokens for a token!\n",
+ "# And change hf to your username!\n",
+ "if True: model.push_to_hub_gguf(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, token = \"hf_\")\n",
+ "\n",
+ "# Save to 16bit GGUF\n",
+ "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\n",
+ "if False: model.push_to_hub_gguf(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, quantization_method = \"f16\", token = \"\")\n",
+ "\n",
+ "# Save to q4_k_m GGUF\n",
+ "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\n",
+ "if False: model.push_to_hub_gguf(\"vaugheu/BanglaGemma9b_GGUF\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")\n",
+ "\n",
+ "# Save to multiple GGUF options - much faster if you want multiple!\n",
+ "if False:\n",
+ " model.push_to_hub_gguf(\n",
+ " \"vaugheu/BanglaGemma9b_GGUF\", # Change hf to your username!\n",
+ " tokenizer,\n",
+ " quantization_method = [\"q4_k_m\", \"q8_0\", \"q5_k_m\",],\n",
+ " token = \"\", # Get a token at https://huggingface.co/settings/tokens\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "bDp0zNpwe6U_"
+ },
+ "source": [
+ "Now, use the `model-unsloth.gguf` file or `model-unsloth-Q4_K_M.gguf` file in `llama.cpp` or a UI based system like `GPT4All`. You can install GPT4All by going [here](https://gpt4all.io/index.html).\n",
+ "\n",
+ "**[NEW] Try 2x faster inference in a free Colab for Llama-3.1 8b Instruct [here](https://colab.research.google.com/drive/1T-YBVfnphoVc8E2E854qF3jdia2Ll2W2?usp=sharing)**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Zt9CHJqO6p30"
+ },
+ "source": [
+ "And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/u54VK8m8tk) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord!\n",
+ "\n",
+ "Some other links:\n",
+ "1. Zephyr DPO 2x faster [free Colab](https://colab.research.google.com/drive/15vttTpzzVXv_tJwEk-hIcQ0S9FcEWvwP?usp=sharing)\n",
+ "2. Llama 7b 2x faster [free Colab](https://colab.research.google.com/drive/1lBzz5KeZJKXjvivbYvmGarix9Ao6Wxe5?usp=sharing)\n",
+ "3. TinyLlama 4x faster full Alpaca 52K in 1 hour [free Colab](https://colab.research.google.com/drive/1AZghoNBQaMDgWJpi4RbffGM1h6raLUj9?usp=sharing)\n",
+ "4. CodeLlama 34b 2x faster [A100 on Colab](https://colab.research.google.com/drive/1y7A0AxE3y8gdj4AVkl2aZX47Xu3P1wJT?usp=sharing)\n",
+ "5. Mistral 7b [free Kaggle version](https://www.kaggle.com/code/danielhanchen/kaggle-mistral-7b-unsloth-notebook)\n",
+ "6. We also did a [blog](https://huggingface.co/blog/unsloth-trl) with 🤗 HuggingFace, and we're in the TRL [docs](https://huggingface.co/docs/trl/main/en/sft_trainer#accelerate-fine-tuning-2x-using-unsloth)!\n",
+ "7. `ChatML` for ShareGPT datasets, [conversational notebook](https://colab.research.google.com/drive/1Aau3lgPzeZKQ-98h69CCu1UJcvIBLmy2?usp=sharing)\n",
+ "8. Text completions like novel writing [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing)\n",
+ "9. [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)\n",
+ "10. [**NEW**] We make Gemma-2 9b / 27b **2x faster**! See our [Gemma-2 9b notebook](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing)\n",
+ "11. [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)\n",
+ "12. [**NEW**] We make Mistral NeMo 12B 2x faster and fit in under 12GB of VRAM! [Mistral NeMo notebook](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing)\n",
+ "\n",
+ "\n",
+ "

\n",
+ "

\n",
+ "

Support our work if you can! Thanks!\n",
+ "
"
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "004f3ec8f7a545c4bc54484dcb3022bb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0335fd59b20f40039e53bfafd4a9f014": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0a4506749df0400480090d3127285ed6": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0a8ca1638fe248d0856fd4c385f9a70b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_afc36d622583404b942709b58027ffd2",
+ "placeholder": "",
+ "style": "IPY_MODEL_8b74367efcea4a50be0c0b205dc1dd47",
+ "value": "Map: 100%"
+ }
+ },
+ "0afe6c4f57a643159bc51aa36f099f61": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e0211c2ff4fb46aaa85bf681c004a04c",
+ "placeholder": "",
+ "style": "IPY_MODEL_a593571b38dd4fd696e3d4778d2a9f03",
+ "value": " 144M/144M [00:01<00:00, 100MB/s]"
+ }
+ },
+ "0bf1d81abe6f4a3493c29810857fc8dd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "14db196d277e48a7bde44205351d354d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "17c55c44d870452c81d902b74c8cce79": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1949ce4af8c94c0ba7e3ac9d8df6b332": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_bac3b0bec13d492b86a5c65a0bb5b96f",
+ "placeholder": "",
+ "style": "IPY_MODEL_8adaf5cc36a3456094e077eca79c8b7e",
+ "value": "Map (num_proc=2): 100%"
+ }
+ },
+ "1b6dfa7096fc4a13845b76a861e0fbf3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_368ddc19d093459daa15702474b7e9f3",
+ "max": 9827148032,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_6121fa09c08d40f1ac39f58900b453f8",
+ "value": 9827148032
+ }
+ },
+ "1ce1838f9eb34f74a615ad82cab78274": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "danger",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0a4506749df0400480090d3127285ed6",
+ "max": 143810826,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_641f767ab42e4190bdd3d0abfe851301",
+ "value": 143810813
+ }
+ },
+ "1f6d972a105a46438f51566f5a24cf84": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "1fa73eafabb14c73aaee39354c62477f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "20d356533da04942856986a33e7a99fb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2279b927aab74513aa1f6efb2c66c426": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "23c50152d387453cbb40d588ccdba734": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "262070892253448793aba4d048f40c08": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_45d4f27475294750aff2487353c8105e",
+ "placeholder": "",
+ "style": "IPY_MODEL_3926bab2dbad4e5fb4362ee96d6fdd67",
+ "value": "tokenizer.json: 100%"
+ }
+ },
+ "265fecfcffb44db580c066a04b5ea37b": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2b0e9c589f9848f2aeec3a97dacf2dc5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2d5c3406ed7e4e03af04711751debd71": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "2e6a26fb12084d5487525f5e78ab5ac8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "358035cd9f6943aeadc4cba1964109a6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "368ddc19d093459daa15702474b7e9f3": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "369758bdc0ec4a9ba753c5164c83e4d5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "372011973a8e46a2888bf4299b042aa0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_cc0cb8826ef3428389ed6dfff6717d95",
+ "placeholder": "",
+ "style": "IPY_MODEL_57f60ec03bf14970b020302f317ea97a",
+ "value": " 172026/172026 [00:16<00:00, 3697.29 examples/s]"
+ }
+ },
+ "3926bab2dbad4e5fb4362ee96d6fdd67": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "3cd94a9a96894e51a652076762478155": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3cdf1d5b878b41838f0ec2b4d877e97a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "3e94165c4ab0471db8fb1fbd5b5bac0d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "3f97e7bd5fff420c80615d676e1648f0": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "3fddd29878ba408db098fb05db157710": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_b542e0276854449d9ec4bed67279d037",
+ "IPY_MODEL_c3143dca63c6445fb7aa06d7d764d7a9",
+ "IPY_MODEL_e5c89397eb1a42bb895a4c540db2df1c"
+ ],
+ "layout": "IPY_MODEL_ae243226e393499bac22c08d2b3d9570"
+ }
+ },
+ "418ece30091b4d66aca4df6367e0bec5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "45c4bfa78e31480d890c5ccd2d05cd73": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_67188a4a909a4ea8a48cb3919507daa7",
+ "IPY_MODEL_1b6dfa7096fc4a13845b76a861e0fbf3",
+ "IPY_MODEL_7a4f7546d7d846f69cea34560da96a6f"
+ ],
+ "layout": "IPY_MODEL_485ed3f8011e4fdebeeb180db45df130"
+ }
+ },
+ "45d4f27475294750aff2487353c8105e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4677d087bf6b40a3a4915ac7481f6e8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_673da437f86a4371b7e3913a66de835a",
+ "placeholder": "",
+ "style": "IPY_MODEL_358035cd9f6943aeadc4cba1964109a6",
+ "value": "tokenizer.model: 100%"
+ }
+ },
+ "485ed3f8011e4fdebeeb180db45df130": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "4e40665bc93a414c87c089a3a0bb4008": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2b0e9c589f9848f2aeec3a97dacf2dc5",
+ "placeholder": "",
+ "style": "IPY_MODEL_a62221e4825a48caa9ef7f906fd43748",
+ "value": "train-00000-of-00002.parquet: 100%"
+ }
+ },
+ "51f1e28d282645a58c8b783f4f60cfc2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7ca5730f63b4420cab8b124a17aaeb27",
+ "IPY_MODEL_64747677b60c489ebd3e769272533b3f",
+ "IPY_MODEL_5c5c498bd046409c860372110e523c7c"
+ ],
+ "layout": "IPY_MODEL_ae087aee8e96402681d22d892d6cd476"
+ }
+ },
+ "5455311519604e9993d537555f372a0b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9439a307ffdd4705b7a1affb46d0fb71",
+ "placeholder": "",
+ "style": "IPY_MODEL_6a62779572c6495fb2594270742b6e58",
+ "value": " 172026/172026 [00:15<00:00, 14572.28 examples/s]"
+ }
+ },
+ "57125af4f34d49d0b58888ad9e21415d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_bbf22f44362b4805a884fc3d8b2d9d17",
+ "IPY_MODEL_65d84eea4ef64e1ab9267c5f8fc25600",
+ "IPY_MODEL_58fdbca58f604e949f2b079dae3a33c6"
+ ],
+ "layout": "IPY_MODEL_69593167eac74b069ba9ea11b5a2df98"
+ }
+ },
+ "57f60ec03bf14970b020302f317ea97a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "58fdbca58f604e949f2b079dae3a33c6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_23c50152d387453cbb40d588ccdba734",
+ "placeholder": "",
+ "style": "IPY_MODEL_14db196d277e48a7bde44205351d354d",
+ "value": " 1/1 [01:26<00:00, 86.02s/it]"
+ }
+ },
+ "596156e7bb7346c1808ed960997a5159": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_780d3c81c8e2461694df4d515d381d9d",
+ "placeholder": "",
+ "style": "IPY_MODEL_f5b0174aa23e432896d0dfe37387036b",
+ "value": " 636/636 [00:00<00:00, 44.2kB/s]"
+ }
+ },
+ "5b795861641e488fb6a47f88860a9ccd": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "5bcff80df0554cac936d1cd5a12c7330": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "5c5c498bd046409c860372110e523c7c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e7f6f37bf5b6483788e50c112b7ef007",
+ "placeholder": "",
+ "style": "IPY_MODEL_aa566decf6b4412caa2988fa623900ea",
+ "value": " 6.13G/6.13G [00:47<00:00, 62.0MB/s]"
+ }
+ },
+ "6121fa09c08d40f1ac39f58900b453f8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "62a6bc239405496ca1e451fbda8787f3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_7af5367620b64131a6fef8c2864d0d28",
+ "IPY_MODEL_d9f230d8474c40fc995c67c4f1eeb86a",
+ "IPY_MODEL_596156e7bb7346c1808ed960997a5159"
+ ],
+ "layout": "IPY_MODEL_418ece30091b4d66aca4df6367e0bec5"
+ }
+ },
+ "63f16ab5e462454d88927b21df4427aa": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_68b2f241810746b7973e2b94ba4c0122",
+ "placeholder": "",
+ "style": "IPY_MODEL_e4fd6646d36e4bce817e1e28dd99dc51",
+ "value": "Generating train split: 100%"
+ }
+ },
+ "641f767ab42e4190bdd3d0abfe851301": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "64747677b60c489ebd3e769272533b3f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "danger",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e50f33072d6d454b98c6607f9e847401",
+ "max": 6130708044,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3f97e7bd5fff420c80615d676e1648f0",
+ "value": 6130707460
+ }
+ },
+ "65d84eea4ef64e1ab9267c5f8fc25600": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_8100ac5e848d465b82435f46917e89b9",
+ "max": 1,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_75e4ed63347846dab807a972dbbd8f4b",
+ "value": 1
+ }
+ },
+ "67188a4a909a4ea8a48cb3919507daa7": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0335fd59b20f40039e53bfafd4a9f014",
+ "placeholder": "",
+ "style": "IPY_MODEL_fd276879b0c7416caffb6ca0b87f7079",
+ "value": "unsloth.Q8_0.gguf: "
+ }
+ },
+ "673da437f86a4371b7e3913a66de835a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "68b2f241810746b7973e2b94ba4c0122": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "69593167eac74b069ba9ea11b5a2df98": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "6a62779572c6495fb2594270742b6e58": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "6db31893e3f84043b5abc6a24bac8228": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a6500ce74ca54e0ca650851502b14644",
+ "placeholder": "",
+ "style": "IPY_MODEL_de38fc3f3df348f29528d9acd6b9d981",
+ "value": "tokenizer_config.json: 100%"
+ }
+ },
+ "6f7ed19f4b77411c88223d59fa50d13a": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "71f71606c101414bae187de7f145ea43": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "723b04a16f3a402589fdb9463834f3d1": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "74c3cbb850e44a4c9eb8283080ba075e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "75e4ed63347846dab807a972dbbd8f4b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "780d3c81c8e2461694df4d515d381d9d": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7a4f7546d7d846f69cea34560da96a6f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_369758bdc0ec4a9ba753c5164c83e4d5",
+ "placeholder": "",
+ "style": "IPY_MODEL_d4dcefae7523463bb2d836869e89bd69",
+ "value": " 9.84G/? [01:25<00:00, 730MB/s]"
+ }
+ },
+ "7af5367620b64131a6fef8c2864d0d28": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_b1610596162844658f4ac1893f0fdd40",
+ "placeholder": "",
+ "style": "IPY_MODEL_87ccf938ee7641acb94c6050bb7c4b20",
+ "value": "special_tokens_map.json: 100%"
+ }
+ },
+ "7b57b77c678c489b8fd28bf9ecf220b8": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "7b64ff6c0203472391e90ce30ed4165f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_1f6d972a105a46438f51566f5a24cf84",
+ "placeholder": "",
+ "style": "IPY_MODEL_7c86fc2b3b0d4708bc2e55801894e37e",
+ "value": " 158M/158M [00:01<00:00, 158MB/s]"
+ }
+ },
+ "7c86fc2b3b0d4708bc2e55801894e37e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "7ca5730f63b4420cab8b124a17aaeb27": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f340840186b4458dbff47afe987f1f59",
+ "placeholder": "",
+ "style": "IPY_MODEL_723b04a16f3a402589fdb9463834f3d1",
+ "value": "model.safetensors: 100%"
+ }
+ },
+ "8100ac5e848d465b82435f46917e89b9": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "81b34a02e519484c965524acbe252807": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "854179dbb6854ac5bb3d7240dcc3cb0b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "danger",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_f9ee25b240f74c21adfa24ce54659efd",
+ "max": 158165675,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_5b795861641e488fb6a47f88860a9ccd",
+ "value": 158165660
+ }
+ },
+ "871336e6e4134fb28bd3b2fa606059cc": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_0a8ca1638fe248d0856fd4c385f9a70b",
+ "IPY_MODEL_93a7d487abb0476383c7e57a3da1f851",
+ "IPY_MODEL_372011973a8e46a2888bf4299b042aa0"
+ ],
+ "layout": "IPY_MODEL_2d5c3406ed7e4e03af04711751debd71"
+ }
+ },
+ "87ccf938ee7641acb94c6050bb7c4b20": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "89202b77af2a47b196cc8723c846e891": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_6f7ed19f4b77411c88223d59fa50d13a",
+ "placeholder": "",
+ "style": "IPY_MODEL_9d319b570bd64f0e9176817d577bc020",
+ "value": " 172026/172026 [02:35<00:00, 788.86 examples/s]"
+ }
+ },
+ "8adaf5cc36a3456094e077eca79c8b7e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8b74367efcea4a50be0c0b205dc1dd47": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "8c398b847644448e9d75135f3200f156": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_63f16ab5e462454d88927b21df4427aa",
+ "IPY_MODEL_afb69e965b33472a8de2739c1cdff1e9",
+ "IPY_MODEL_5455311519604e9993d537555f372a0b"
+ ],
+ "layout": "IPY_MODEL_a398a05038394c7b853237d751a0bbdd"
+ }
+ },
+ "8e082fef631b4eeab73c02e181f5690c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "902ea19651c546de8c19414daf6a053a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4e40665bc93a414c87c089a3a0bb4008",
+ "IPY_MODEL_854179dbb6854ac5bb3d7240dcc3cb0b",
+ "IPY_MODEL_7b64ff6c0203472391e90ce30ed4165f"
+ ],
+ "layout": "IPY_MODEL_17c55c44d870452c81d902b74c8cce79"
+ }
+ },
+ "90620de3bb6a467d92b92622e5dfb0c5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "90762bb3d5fd4f4db67f3a8a11434689": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2e6a26fb12084d5487525f5e78ab5ac8",
+ "placeholder": "",
+ "style": "IPY_MODEL_8e082fef631b4eeab73c02e181f5690c",
+ "value": " 46.4k/46.4k [00:00<00:00, 3.41MB/s]"
+ }
+ },
+ "90c9301c342846729db7e3c6dfe5b849": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_edb08520684f4f83a7094599ed55cb37",
+ "max": 4241003,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c2185ae3f8aa4f3488e0bd7257664e26",
+ "value": 4241003
+ }
+ },
+ "9179bf6bb49f477b9d9e5eb2f8015aaa": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "93a7d487abb0476383c7e57a3da1f851": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_97d31d1c17d248f3b2dffe59143a9797",
+ "max": 172026,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_c7fbd851c32746d3a2a0e69b411b2121",
+ "value": 172026
+ }
+ },
+ "9439a307ffdd4705b7a1affb46d0fb71": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "95aa990d762d428d93cef2834fb86c8a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_20d356533da04942856986a33e7a99fb",
+ "max": 190,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_3cdf1d5b878b41838f0ec2b4d877e97a",
+ "value": 190
+ }
+ },
+ "97d31d1c17d248f3b2dffe59143a9797": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "98cc2108542f443eb242a45fc671afef": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "9a6fe19da592481bbe762912bc45bbed": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "9d319b570bd64f0e9176817d577bc020": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a2cde30a94d3462488bcb33693e3e274": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a398a05038394c7b853237d751a0bbdd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a3c7c2459ad14e9c81d7422d7e83393f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a593571b38dd4fd696e3d4778d2a9f03": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a62221e4825a48caa9ef7f906fd43748": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "a6500ce74ca54e0ca650851502b14644": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "a80caed5f8af41ca99576d9daa68c6f6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_262070892253448793aba4d048f40c08",
+ "IPY_MODEL_e5486d352f314f45b663a6472d6ff885",
+ "IPY_MODEL_f033347d7cdb4f38a3eb3e05f546e438"
+ ],
+ "layout": "IPY_MODEL_d1549b76e8ff4d69b17f9a0831b43551"
+ }
+ },
+ "aa566decf6b4412caa2988fa623900ea": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "ae087aee8e96402681d22d892d6cd476": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "ae243226e393499bac22c08d2b3d9570": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "afb69e965b33472a8de2739c1cdff1e9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_c6ede16c623b49c7b4916e5ce4799125",
+ "max": 172026,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_9a6fe19da592481bbe762912bc45bbed",
+ "value": 172026
+ }
+ },
+ "afc36d622583404b942709b58027ffd2": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b1610596162844658f4ac1893f0fdd40": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "b44759c58b284a5a950350a2cf82c4e6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "b542e0276854449d9ec4bed67279d037": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_265fecfcffb44db580c066a04b5ea37b",
+ "placeholder": "",
+ "style": "IPY_MODEL_81b34a02e519484c965524acbe252807",
+ "value": "README.md: 100%"
+ }
+ },
+ "b57e30ad94fa4b739d32c9553f5aee29": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_4677d087bf6b40a3a4915ac7481f6e8d",
+ "IPY_MODEL_90c9301c342846729db7e3c6dfe5b849",
+ "IPY_MODEL_c7977b2008c2476596c5351012e710b6"
+ ],
+ "layout": "IPY_MODEL_dcb2b3f1102a44429e62828b99ed39ab"
+ }
+ },
+ "b90fe47ddb6240bd90ddd1705a9f3fc9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_ca20885d9adc4815b5418073a7930f8f",
+ "IPY_MODEL_95aa990d762d428d93cef2834fb86c8a",
+ "IPY_MODEL_c2d228de02d14c6c8f780048b1ccc088"
+ ],
+ "layout": "IPY_MODEL_c56ce88b9fab4475af5fafbc7a845010"
+ }
+ },
+ "bac3b0bec13d492b86a5c65a0bb5b96f": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "bbf22f44362b4805a884fc3d8b2d9d17": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_7b57b77c678c489b8fd28bf9ecf220b8",
+ "placeholder": "",
+ "style": "IPY_MODEL_5bcff80df0554cac936d1cd5a12c7330",
+ "value": "100%"
+ }
+ },
+ "bf1ea3ec39db442d91f74fdcfd1c5ac3": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "c2185ae3f8aa4f3488e0bd7257664e26": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "c2d228de02d14c6c8f780048b1ccc088": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a2cde30a94d3462488bcb33693e3e274",
+ "placeholder": "",
+ "style": "IPY_MODEL_f04ed92a356c489a9877f82b05bb330f",
+ "value": " 190/190 [00:00<00:00, 13.1kB/s]"
+ }
+ },
+ "c3143dca63c6445fb7aa06d7d764d7a9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_9179bf6bb49f477b9d9e5eb2f8015aaa",
+ "max": 450,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_d6dff4e305aa46fbaa9375133356378a",
+ "value": 450
+ }
+ },
+ "c56ce88b9fab4475af5fafbc7a845010": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c6ede16c623b49c7b4916e5ce4799125": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "c7977b2008c2476596c5351012e710b6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_71f71606c101414bae187de7f145ea43",
+ "placeholder": "",
+ "style": "IPY_MODEL_bf1ea3ec39db442d91f74fdcfd1c5ac3",
+ "value": " 4.24M/4.24M [00:00<00:00, 17.7MB/s]"
+ }
+ },
+ "c7fbd851c32746d3a2a0e69b411b2121": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "ca20885d9adc4815b5418073a7930f8f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_3e94165c4ab0471db8fb1fbd5b5bac0d",
+ "placeholder": "",
+ "style": "IPY_MODEL_74c3cbb850e44a4c9eb8283080ba075e",
+ "value": "generation_config.json: 100%"
+ }
+ },
+ "cc0bc8033830406a942b67c4cbbc5d28": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cc0cb8826ef3428389ed6dfff6717d95": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "cd98cb1f265448cf90adfd4fb3362b0d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d1462aa795714430bfee51674a619527": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d1549b76e8ff4d69b17f9a0831b43551": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d1ebb90d4a8e4656941f47d644013204": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d4dcefae7523463bb2d836869e89bd69": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "d6dff4e305aa46fbaa9375133356378a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d9f230d8474c40fc995c67c4f1eeb86a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_d1462aa795714430bfee51674a619527",
+ "max": 636,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_cd98cb1f265448cf90adfd4fb3362b0d",
+ "value": 636
+ }
+ },
+ "dcb2b3f1102a44429e62828b99ed39ab": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "dd252c6d1d59418aa0f5b7c469351dee": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_1949ce4af8c94c0ba7e3ac9d8df6b332",
+ "IPY_MODEL_e096cf56562a4e7281681be173d51b09",
+ "IPY_MODEL_89202b77af2a47b196cc8723c846e891"
+ ],
+ "layout": "IPY_MODEL_3cd94a9a96894e51a652076762478155"
+ }
+ },
+ "de38fc3f3df348f29528d9acd6b9d981": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e0211c2ff4fb46aaa85bf681c004a04c": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e096cf56562a4e7281681be173d51b09": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_98cc2108542f443eb242a45fc671afef",
+ "max": 172026,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_90620de3bb6a467d92b92622e5dfb0c5",
+ "value": 172026
+ }
+ },
+ "e2844ec735d4420391b8ed1b9a932949": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e4fd6646d36e4bce817e1e28dd99dc51": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "e50f33072d6d454b98c6607f9e847401": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e5486d352f314f45b663a6472d6ff885": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e885fb98968949589006001c2f84a8eb",
+ "max": 17525357,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_1fa73eafabb14c73aaee39354c62477f",
+ "value": 17525357
+ }
+ },
+ "e5c89397eb1a42bb895a4c540db2df1c": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_e2844ec735d4420391b8ed1b9a932949",
+ "placeholder": "",
+ "style": "IPY_MODEL_d1ebb90d4a8e4656941f47d644013204",
+ "value": " 450/450 [00:00<00:00, 10.5kB/s]"
+ }
+ },
+ "e7f6f37bf5b6483788e50c112b7ef007": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "e885fb98968949589006001c2f84a8eb": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "edb08520684f4f83a7094599ed55cb37": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f033347d7cdb4f38a3eb3e05f546e438": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_2279b927aab74513aa1f6efb2c66c426",
+ "placeholder": "",
+ "style": "IPY_MODEL_b44759c58b284a5a950350a2cf82c4e6",
+ "value": " 17.5M/17.5M [00:00<00:00, 45.0MB/s]"
+ }
+ },
+ "f04ed92a356c489a9877f82b05bb330f": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f2f000db73f34f468b1c549c8422743a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_0bf1d81abe6f4a3493c29810857fc8dd",
+ "placeholder": "",
+ "style": "IPY_MODEL_f31165c434c7427fb4d26ea2af0feda9",
+ "value": "train-00001-of-00002.parquet: 100%"
+ }
+ },
+ "f31165c434c7427fb4d26ea2af0feda9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f340840186b4458dbff47afe987f1f59": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f5b0174aa23e432896d0dfe37387036b": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "f8a68ea30ebd4251931cf4d7b5be62a9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_f2f000db73f34f468b1c549c8422743a",
+ "IPY_MODEL_1ce1838f9eb34f74a615ad82cab78274",
+ "IPY_MODEL_0afe6c4f57a643159bc51aa36f099f61"
+ ],
+ "layout": "IPY_MODEL_cc0bc8033830406a942b67c4cbbc5d28"
+ }
+ },
+ "f9ee25b240f74c21adfa24ce54659efd": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "1.2.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "1.2.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "overflow_x": null,
+ "overflow_y": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "fd276879b0c7416caffb6ca0b87f7079": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "DescriptionStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "DescriptionStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "description_width": ""
+ }
+ },
+ "fd5eccb2370b40b58eea5c9f0d868e36": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_tooltip": null,
+ "layout": "IPY_MODEL_a3c7c2459ad14e9c81d7422d7e83393f",
+ "max": 46405,
+ "min": 0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_fdd24808ed23442998104b5b28370aa6",
+ "value": 46405
+ }
+ },
+ "fdd24808ed23442998104b5b28370aa6": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "1.2.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "fe0cef5f02ca4e5e95b06356b8286fbe": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "1.5.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "1.5.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "1.5.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_6db31893e3f84043b5abc6a24bac8228",
+ "IPY_MODEL_fd5eccb2370b40b58eea5c9f0d868e36",
+ "IPY_MODEL_90762bb3d5fd4f4db67f3a8a11434689"
+ ],
+ "layout": "IPY_MODEL_004f3ec8f7a545c4bc54484dcb3022bb"
+ }
+ }
+ }
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}