add bf16 to convert

Browse files

Files changed (7) hide show

README.md +1 -1
auto-exl2-upload/auto-exl2-upload.zip +2 -2
auto-exl2-upload/exl2-quant.py +19 -8
exl2-multi-quant-local/exl2-multi-quant-local.zip +2 -2
exl2-multi-quant-local/exl2-quant.py +12 -5
ipynb/EXL2_Private_Quant_V3.ipynb +51 -51
ipynb/Multi_Quant_exl2.ipynb +1 -1

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ Feel free to send in PRs or use this code however you'd like.\
 - [Manage branches (create/delete)](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/manage%20branches.py)
-- [EXL2 Single Quant V3](https://colab.research.google.com/drive/1Vc7d6JU3Z35OVHmtuMuhT830THJnzNfS?usp=sharing) **(COLAB)**
 ## work in progress/not tested (ordered by priority)

 - [Manage branches (create/delete)](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/manage%20branches.py)
+- [EXL2 Single Quant V3](https://colab.research.google.com/#fileId=https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/ipynb/EXL2_Private_Quant_V3.ipynb) **(COLAB)**
 ## work in progress/not tested (ordered by priority)

auto-exl2-upload/auto-exl2-upload.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6f43d73fb8f630a2323c707b8d374eb4f8492bb1f9278e0f01dcc24a04b81fc
-size 8603

 version https://git-lfs.github.com/spec/v1
+oid sha256:8957446b8346fe63db8344806062d6476f5e0ed438ac97ca3a37d06636141337
+size 8725

auto-exl2-upload/exl2-quant.py CHANGED Viewed

@@ -118,17 +118,17 @@ bpwvalue = list(qnum.values())
 bpwvalue.sort()
 #ask to change repo visibility to public on hf hub
-priv2pub = input("Do you want to make the repo public after successful quants? (y/n): ")
 while priv2pub != 'y' and priv2pub != 'n':
-    priv2pub = input("Please enter 'y' or 'n': ")
 clear_screen()
 #ask to delete original fp16 weights
-delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
 if delmodel == '':
     delmodel = 'n'
 while delmodel != 'y' and delmodel != 'n':
-    delmodel = input("Please enter 'y' or 'n': ")
     if delmodel == '':
         delmodel = 'n'
 clear_screen()
@@ -143,12 +143,19 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
 #convert to safetensors if bin
 if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
-    convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
     while convertst != 'y' and convertst != 'n':
-        convertst = input("Please enter 'y' or 'n': ")
     if convertst == 'y':
         print("Converting weights to safetensors, please wait...")
-        result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
         if result.returncode != 0:
             print("Converting failed. Please look for a safetensors model or convert model manually.")
             sys.exit("Exiting...")
@@ -171,7 +178,6 @@ if repo_exists(f"{whoami().get('name', None)}/{modelname}-exl2") == False:
     with open('./README.md', 'w') as file:
         file.write(f"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\n\n")
         file.write("## Automatically quantized using the auto quant script from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\n\n")
-        file.write(f"Would recommend {whoami().get('name', None)} to change up this README to include more info.\n\n")
         file.write("### BPW:\n\n")
         for bpw in bpwvalue:
             file.write(f"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\\\n")
@@ -208,6 +214,11 @@ for bpw in bpwvalue:
         create_branch(f"{whoami().get('name', None)}/{modelname}-exl2", branch=f"{bpw}bpw") #create branch
     except:
         print(f"Branch {bpw} already exists, trying upload...")
     upload_folder(folder_path=f"{model}-exl2-{bpw}bpw", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message=f"Add quant for BPW {bpw}", revision=f"{bpw}bpw") #upload quantized model
     subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
     subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw", shell=True) #remove compile directory

 bpwvalue.sort()
 #ask to change repo visibility to public on hf hub
+priv2pub = input("Do you want to make the repo public after successful quants? (y/n): ").lower()
 while priv2pub != 'y' and priv2pub != 'n':
+    priv2pub = input("Please enter 'y' or 'n': ").lower()
 clear_screen()
 #ask to delete original fp16 weights
+delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ").lower()
 if delmodel == '':
     delmodel = 'n'
 while delmodel != 'y' and delmodel != 'n':
+    delmodel = input("Please enter 'y' or 'n': ").lower()
     if delmodel == '':
         delmodel = 'n'
 clear_screen()
 #convert to safetensors if bin
 if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
+    convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ").lower()
     while convertst != 'y' and convertst != 'n':
+        convertst = input("Please enter 'y' or 'n': ").lower()
+    convusebf16 = input("Would you like to use bf16 loading? Will reduce ram usage (y/n): ").lower()
+    while convusebf16 != 'y' and convusebf16 != 'n':
+        convusebf16 = input("Please enter 'y' or 'n': ").lower()
+    if convusebf16 == 'y':
+        usingbf16 = "--bf16"
+    else:
+        usingbf16 = ""
     if convertst == 'y':
         print("Converting weights to safetensors, please wait...")
+        result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st {usingbf16}", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
         if result.returncode != 0:
             print("Converting failed. Please look for a safetensors model or convert model manually.")
             sys.exit("Exiting...")
     with open('./README.md', 'w') as file:
         file.write(f"# Exl2 quants for [{modelname}](https://huggingface.co/{repo_url})\n\n")
         file.write("## Automatically quantized using the auto quant script from [hf-scripts](https://huggingface.co/anthonyg5005/hf-scripts)\n\n")
         file.write("### BPW:\n\n")
         for bpw in bpwvalue:
             file.write(f"[{bpw}](https://huggingface.co/{whoami().get('name', None)}/{modelname}-exl2/tree/{bpw}bpw)\\\n")
         create_branch(f"{whoami().get('name', None)}/{modelname}-exl2", branch=f"{bpw}bpw") #create branch
     except:
         print(f"Branch {bpw} already exists, trying upload...")
+    try:
+        os.remove(f"{model}-exl2-{bpw}bpw/README.md") #bypasses encode issue when uploading some models
+        print("Deleting model README.")
+    except:
+        print("Skipping README delete.")
     upload_folder(folder_path=f"{model}-exl2-{bpw}bpw", repo_id=f"{whoami().get('name', None)}/{modelname}-exl2", commit_message=f"Add quant for BPW {bpw}", revision=f"{bpw}bpw") #upload quantized model
     subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw-WD", shell=True) #remove working directory
     subprocess.run(f"{osrmd} {model}-exl2-{bpw}bpw", shell=True) #remove compile directory

exl2-multi-quant-local/exl2-multi-quant-local.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35482a8018761c89f6c1824910541983876f33b636db008c09d4825b499f5704
-size 7424

 version https://git-lfs.github.com/spec/v1
+oid sha256:80ce84370ae0ee56854c6f5c6bee00ece8d3fd133e05acefde4b5c85de553057
+size 7520

exl2-multi-quant-local/exl2-quant.py CHANGED Viewed

@@ -90,11 +90,11 @@ bpwvalue = list(qnum.values())
 bpwvalue.sort()
 #ask to delete fp16 after done
-delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ")
 if delmodel == '':
     delmodel = 'n'
 while delmodel != 'y' and delmodel != 'n':
-    delmodel = input("Please enter 'y' or 'n': ")
     if delmodel == '':
         delmodel = 'n'
 if delmodel == 'y':
@@ -112,12 +112,19 @@ if not os.path.exists(f"models{slsh}{model}{slsh}converted-st"): #check if model
 #convert to safetensors if bin
 if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
-    convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ")
     while convertst != 'y' and convertst != 'n':
-        convertst = input("Please enter 'y' or 'n': ")
     if convertst == 'y':
         print("Converting weights to safetensors, please wait...")
-        result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
         if result.returncode != 0:
             print("Converting failed. Please look for a safetensors model or convert model manually.")
             sys.exit("Exiting...")

 bpwvalue.sort()
 #ask to delete fp16 after done
+delmodel = input("Do you want to delete the original model? (Won't delete if paused or failed) (y/N): ").lower()
 if delmodel == '':
     delmodel = 'n'
 while delmodel != 'y' and delmodel != 'n':
+    delmodel = input("Please enter 'y' or 'n': ").lower()
     if delmodel == '':
         delmodel = 'n'
 if delmodel == 'y':
 #convert to safetensors if bin
 if not glob.glob(f"models/{model}/*.safetensors"): #check if safetensors model exists
+    convertst = input("Couldn't find safetensors model, do you want to convert to safetensors? (y/n): ").lower()
     while convertst != 'y' and convertst != 'n':
+        convertst = input("Please enter 'y' or 'n': ").lower()
+    convusebf16 = input("Would you like to use bf16 loading? Will reduce ram usage (y/n): ").lower()
+    while convusebf16 != 'y' and convusebf16 != 'n':
+        convusebf16 = input("Please enter 'y' or 'n': ").lower()
+    if convusebf16 == 'y':
+        usingbf16 = "--bf16"
+    else:
+        usingbf16 = ""
     if convertst == 'y':
         print("Converting weights to safetensors, please wait...")
+        result = subprocess.run(f"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st {usingbf16}", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)
         if result.returncode != 0:
             print("Converting failed. Please look for a safetensors model or convert model manually.")
             sys.exit("Exiting...")

ipynb/EXL2_Private_Quant_V3.ipynb CHANGED Viewed

@@ -1,23 +1,10 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
-  },
   "cells": [
     {
       "cell_type": "markdown",
       "source": [
         "#Quantizing huggingface models to exl2\n",
         "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
@@ -27,10 +14,7 @@
         "#Outdated\n",
         "More recent stuff in [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts)\\\n",
         "If you need to quant a model to exl2 for free, check out the bot from the [Exllama Discord server](https://discord.gg/NSFwVuCjRq)"
-      ],
-      "metadata": {
-        "id": "Ku0ezvyD42ng"
-      }
     },
     {
       "cell_type": "code",
@@ -57,6 +41,12 @@
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Login to HF (Required to upload files)\n",
         "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
@@ -110,16 +100,16 @@
         "        login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
         "        continue\n",
         "    break"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "8Hl3fQmRLybp"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title ##Choose HF model to download\n",
         "#@markdown ###Repo should be formatted as user/repo\n",
@@ -133,22 +123,22 @@
         "#@markdown Convert Pytorch weights to Safetensors\n",
         "convert_safetensors = False # @param {type:\"boolean\"}\n",
         "if convert_safetensors == True:\n",
-        "    !python convert-to-safetensors.py models/{model} --output models/{model}-st\n",
         "    !rm -r models/{model}\n",
         "    !mv models/{model}-st models/{model}\n",
         "    print(\"Finished converting\")\n",
         "#@markdown If model files are stored in a pytorch .bin extention then enable convert_safetensors above.\\\n",
         "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/ipynb/pytorch-example.jpg \"File extension is .bin\")"
-      ],
-      "metadata": {
-        "id": "NI1LUMD7H-Zx",
-        "cellView": "form"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Quantize the model\n",
         "#@markdown ###Quantization time will last based on model size\n",
@@ -205,16 +195,16 @@
         "else:\n",
         "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
         "!python {quant}"
-      ],
-      "metadata": {
-        "id": "8anbEbGyNmBI",
-        "cellView": "form"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
         "#@title Upload to huggingface privately\n",
         "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
@@ -225,13 +215,23 @@
         "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
         "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
         "print(f\"uploaded to https://huggingface.co/{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "XORLS2uPrbma"
-      },
-      "execution_count": null,
-      "outputs": []
     }
-  ]
-}

 {
   "cells": [
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "Ku0ezvyD42ng"
+      },
       "source": [
         "#Quantizing huggingface models to exl2\n",
         "This version of my exl2 quantize colab creates a single quantizaion to upload privatly.\\\n",
         "#Outdated\n",
         "More recent stuff in [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts)\\\n",
         "If you need to quant a model to exl2 for free, check out the bot from the [Exllama Discord server](https://discord.gg/NSFwVuCjRq)"
+      ]
     },
     {
       "cell_type": "code",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8Hl3fQmRLybp"
+      },
+      "outputs": [],
       "source": [
         "#@title Login to HF (Required to upload files)\n",
         "#@markdown From my Colab/Kaggle login script on [Anthonyg5005/hf-scripts](https://huggingface.co/Anthonyg5005/hf-scripts/blob/main/HF%20Login%20Snippet%20Kaggle.py)\n",
         "        login(input(\"Enter your HuggingFace (WRITE) token: \"))\n",
         "        continue\n",
         "    break"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "NI1LUMD7H-Zx"
+      },
+      "outputs": [],
       "source": [
         "#@title ##Choose HF model to download\n",
         "#@markdown ###Repo should be formatted as user/repo\n",
         "#@markdown Convert Pytorch weights to Safetensors\n",
         "convert_safetensors = False # @param {type:\"boolean\"}\n",
         "if convert_safetensors == True:\n",
+        "    !python convert-to-safetensors.py models/{model} --output models/{model}-st --bf16 --max-shard-size 1GB\n",
         "    !rm -r models/{model}\n",
         "    !mv models/{model}-st models/{model}\n",
         "    print(\"Finished converting\")\n",
         "#@markdown If model files are stored in a pytorch .bin extention then enable convert_safetensors above.\\\n",
         "#@markdown ![Example Image](https://huggingface.co/Anthonyg5005/hf-scripts/resolve/main/ipynb/pytorch-example.jpg \"File extension is .bin\")"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "8anbEbGyNmBI"
+      },
+      "outputs": [],
       "source": [
         "#@title Quantize the model\n",
         "#@markdown ###Quantization time will last based on model size\n",
         "else:\n",
         "    quant = f\"convert.py -i models/{model} -o {model}-exl2-{BPW}bpw-WD -cf {model}-exl2-{BPW}bpw -b {BPW}\"\n",
         "!python {quant}"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "XORLS2uPrbma"
+      },
+      "outputs": [],
       "source": [
         "#@title Upload to huggingface privately\n",
         "#@markdown You may also set it to public but I'd recommend waiting for my next ipynb that will create mutliple quants and place them all into individual branches.\n",
         "create_repo(f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", private=True)\n",
         "HfApi().upload_folder(folder_path=f\"{model}-exl2-{BPW}bpw\", repo_id=f\"{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\", repo_type=\"model\", commit_message=\"Upload from Colab automation\")\n",
         "print(f\"uploaded to https://huggingface.co/{whoami().get('name', None)}/{model}-exl2-{BPW}bpw\")"
+      ]
     }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

ipynb/Multi_Quant_exl2.ipynb CHANGED Viewed

@@ -159,7 +159,7 @@
         "\n",
         "if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
         "    print(\"Converting weights to safetensors, please wait...\")\n",
-        "    result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
         "    if result.returncode != 0:\n",
         "        print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
         "        sys.exit(\"Exiting...\")\n",

         "\n",
         "if not glob.glob(f\"models/{model}/*.safetensors\"): #check if safetensors model exists, if not try converting\n",
         "    print(\"Converting weights to safetensors, please wait...\")\n",
+        "    result = subprocess.run(f\"{pyt} convert-to-safetensors.py models{slsh}{model} --output models{slsh}{model}-st --max-shard-size 1GB --bf16\", shell=True) #convert to safetensors (Credit to oobabooga for this script as well)\n",
         "    if result.returncode != 0:\n",
         "        print(\"Converting failed. Please look for a safetensors/bin model.\")\n",
         "        sys.exit(\"Exiting...\")\n",