File size: 2,761 Bytes
135440a 258bde0 ff57ab4 258bde0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
---
language:
- zh
pipeline_tag: text-generation
tags:
- llama2
---
This repository introduces a 4-bit quantized version of the [yayi-7b-llama2 model](https://huggingface.co/wenge-research/yayi-7b-llama2) proposed by [wenge-research](https://www.wenge.com/). The quantization process was performed using the [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ).
## Usage Example
```python
import torch
from auto_gptq import AutoGPTQForCausalLM
from transformers import LlamaTokenizer, GenerationConfig
from transformers import StoppingCriteria, StoppingCriteriaList
pretrained_model_name_or_path = "zake7749/yayi-7b-llama2-4bit-autogptq"
tokenizer = LlamaTokenizer.from_pretrained(pretrained_model_name_or_path)
model = AutoGPTQForCausalLM.from_quantized(pretrained_model_name_or_path)
# Define the stopping criteria
class KeywordsStoppingCriteria(StoppingCriteria):
def __init__(self, keywords_ids:list):
self.keywords = keywords_ids
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
if input_ids[0][-1] in self.keywords:
return True
return False
stop_words = ["<|End|>", "<|YaYi|>", "<|Human|>", "</s>"]
stop_ids = [tokenizer.encode(w)[-1] for w in stop_words]
stop_criteria = KeywordsStoppingCriteria(stop_ids)
# inference
prompt = "你是谁?"
formatted_prompt = f"""<|System|>:
You are a helpful, respectful and honest assistant named YaYi developed by Beijing Wenge Technology Co.,Ltd. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<|Human|>:
{prompt}
<|YaYi|>:
"""
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
eos_token_id = tokenizer("<|End|>").input_ids[0]
generation_config = GenerationConfig(
eos_token_id=eos_token_id,
pad_token_id=eos_token_id,
do_sample=True,
max_new_tokens=256,
temperature=0.3,
repetition_penalty=1.1,
no_repeat_ngram_size=0
)
response = model.generate(**inputs, generation_config=generation_config, stopping_criteria=StoppingCriteriaList([stop_criteria]))
response = [response[0][len(inputs.input_ids[0]):]]
response_str = tokenizer.batch_decode(response, skip_special_tokens=False, clean_up_tokenization_spaces=False)[0]
print(response_str)
```
## License
Please refer to [YaYi/LICENSE_MODEL](https://github.com/wenge-research/YaYi/blob/main/LICENSE_MODEL). |