Squish42's picture
Initial commit
be6f120
raw
history blame
957 Bytes
from transformers import AutoTokenizer, TextGenerationPipeline
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import logging
pretrained_model_dir: str = "models/WizardLM-7B-Uncensored"
quantized_model_dir: str = "./"
config: dict = dict(
quantize_config=dict(model_file_base_name='WizardLM-7B-Uncensored',
bits=8, desc_act=False, group_size=128, true_sequential=True),
use_safetensors=True
)
logging.basicConfig(
format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
examples: list[dict[str, list[int]]] = [tokenizer("It was a cold night")]
model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, BaseQuantizeConfig(**config['quantize_config']))
model.quantize(examples)
model.save_quantized(quantized_model_dir, use_safetensors=config['use_safetensors'])