Virt-io commited on
Commit
a853257
1 Parent(s): d3d556d

Add new data option

Browse files
Files changed (1) hide show
  1. Imat_AutoGGUF.ipynb +4 -2
Imat_AutoGGUF.ipynb CHANGED
@@ -33,11 +33,13 @@
33
  "\n",
34
  "# @markdown ### ⚡ Quantization parameters\n",
35
  "MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
36
- "IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\"]\n",
37
  "if IMATRIX_OPTION == \"Imatrix\":\n",
38
  " IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
39
  "if IMATRIX_OPTION == \"Imatrix-RP\":\n",
40
  " IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
 
 
41
  "print(IMATRIX)\n",
42
  "QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
43
  "QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
@@ -65,7 +67,7 @@
65
  "!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
66
  "\n",
67
  "# Build llamacpp\n",
68
- "!cd llama.cpp && make clean && LLAMA_CUBLAS=1 LLAMA_CUDA_FORCE_MMQ=1 LLAMA_LTO=1 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_MMV_Y=4 LLAMA_CUDA_KQUANTS_ITER=2 LLAMA_CUDA_F16=1 LLAMA_CUDA_DMMV_F16=1 make -j16\n",
69
  "\n",
70
  "# Convert to fp16\n",
71
  "fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",
 
33
  "\n",
34
  "# @markdown ### ⚡ Quantization parameters\n",
35
  "MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
36
+ "IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\", \"Imatrix-ChatML-test\""]\n",
37
  "if IMATRIX_OPTION == \"Imatrix\":\n",
38
  " IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
39
  "if IMATRIX_OPTION == \"Imatrix-RP\":\n",
40
  " IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
41
+ "if IMATRIX_OPTION == \"Imatrix-ChatML-test\":\n",
42
+ " IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/chatml_test.txt\"\n",
43
  "print(IMATRIX)\n",
44
  "QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
45
  "QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
 
67
  "!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
68
  "\n",
69
  "# Build llamacpp\n",
70
+ "!cd llama.cpp && make clean && LLAMA_CUDA=1 LLAMA_LTO=1 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_MMV_Y=4 LLAMA_CUDA_KQUANTS_ITER=2 LLAMA_CUDA_F16=1 LLAMA_CUDA_DMMV_F16=1 make -j16\n",
71
  "\n",
72
  "# Convert to fp16\n",
73
  "fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",