|
--- |
|
license: bigcode-openrail-m |
|
datasets: |
|
- WizardLM/WizardLM_evol_instruct_70k |
|
--- |
|
|
|
<font size=5>Here is an example to show how to use model quantized by auto_gptq</font> |
|
``` |
|
_3BITS_MODEL_PATH_V1_ = 'GodRain/WizardCoder-15B-V1.1-3bit' |
|
|
|
# pip install auto_gptq |
|
from auto_gptq import AutoGPTQForCausalLM |
|
from transformers import AutoTokenizer |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(_3BITS_MODEL_PATH_V1_) |
|
model = AutoGPTQForCausalLM.from_quantized(_3BITS_MODEL_PATH_V1_) |
|
|
|
out = evaluate("Hello, tell me a story about sun", model=model, tokenizer=tokenizer) |
|
print(out[0].strip()) |
|
``` |
|
|
|
``` |
|
def evaluate( |
|
batch_data, |
|
tokenizer, |
|
model, |
|
temperature=1, |
|
top_p=0.9, |
|
top_k=40, |
|
num_beams=1, |
|
max_new_tokens=2048, |
|
**kwargs, |
|
): |
|
prompts = generate_prompt(batch_data) |
|
inputs = tokenizer(prompts, return_tensors="pt", max_length=256, truncation=True) |
|
input_ids = inputs["input_ids"].to(device) |
|
generation_config = GenerationConfig( |
|
temperature=temperature, |
|
top_p=top_p, |
|
top_k=top_k, |
|
num_beams=num_beams, |
|
eos_token_id=tokenizer.eos_token_id, |
|
pad_token_id=tokenizer.pad_token_id, |
|
**kwargs, |
|
) |
|
with torch.no_grad(): |
|
generation_output = model.generate( |
|
input_ids=input_ids, |
|
generation_config=generation_config, |
|
return_dict_in_generate=True, |
|
output_scores=True, |
|
max_new_tokens=max_new_tokens, |
|
) |
|
s = generation_output.sequences |
|
output = tokenizer.batch_decode(s, skip_special_tokens=True) |
|
return output |
|
``` |
|
|
|
|
|
Citiation: |
|
``` |
|
@misc{xu2023wizardlm, |
|
title={WizardLM: Empowering Large Language Models to Follow Complex Instructions}, |
|
author={Can Xu and Qingfeng Sun and Kai Zheng and Xiubo Geng and Pu Zhao and Jiazhan Feng and Chongyang Tao and Daxin Jiang}, |
|
year={2023}, |
|
eprint={2304.12244}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |