|
--- |
|
library_name: peft |
|
license: apache-2.0 |
|
pipeline_tag: text-generation |
|
inference: false |
|
|
|
tags: |
|
- mistralai/Mistral-7B-Instruct-v0.1 |
|
- finetuned |
|
|
|
--- |
|
|
|
```bash |
|
pip install -q accelerate bitsandbytes trl datasets |
|
pip install git+https://github.com/huggingface/transformers |
|
pip install git+https://github.com/huggingface/peft.git |
|
``` |
|
|
|
```python |
|
import os |
|
import torch |
|
from datasets import load_dataset |
|
from transformers import ( |
|
AutoModelForCausalLM, |
|
AutoTokenizer, |
|
BitsAndBytesConfig, |
|
HfArgumentParser, |
|
pipeline, |
|
logging, |
|
) |
|
from peft import LoraConfig, PeftModel |
|
|
|
base_model_name = "mistralai/Mistral-7B-Instruct-v0.1" |
|
|
|
|
|
################################################################################ |
|
# bitsandbytes parameters |
|
################################################################################ |
|
|
|
# Activate 4-bit precision base model loading |
|
use_4bit = True |
|
|
|
# Compute dtype for 4-bit base models |
|
bnb_4bit_compute_dtype = "float16" |
|
|
|
# Quantization type (fp4 or nf4) |
|
bnb_4bit_quant_type = "nf4" |
|
|
|
# Activate nested quantization for 4-bit base models (double quantization) |
|
use_nested_quant = False |
|
|
|
# Load the entire model on the GPU 0 |
|
device_map = {"": 0} |
|
|
|
# Load tokenizer and model with QLoRA configuration |
|
compute_dtype = getattr(torch, bnb_4bit_compute_dtype) |
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=use_4bit, |
|
bnb_4bit_quant_type=bnb_4bit_quant_type, |
|
bnb_4bit_compute_dtype=compute_dtype, |
|
bnb_4bit_use_double_quant=use_nested_quant, |
|
) |
|
|
|
# Check GPU compatibility with bfloat16 |
|
if compute_dtype == torch.float16 and use_4bit: |
|
major, _ = torch.cuda.get_device_capability() |
|
if major >= 8: |
|
print("=" * 80) |
|
print("Your GPU supports bfloat16: accelerate training with bf16=True") |
|
print("=" * 80) |
|
|
|
|
|
# Reload model in FP16 and merge it with LoRA weights |
|
base_model = AutoModelForCausalLM.from_pretrained( |
|
base_model_name, |
|
low_cpu_mem_usage=True, |
|
return_dict=True, |
|
torch_dtype=torch.float16, |
|
quantization_config=bnb_config, |
|
device_map=device_map, |
|
) |
|
model = PeftModel.from_pretrained(base_model, "Ashishkr/mistral-medical-consultation") |
|
model = model.merge_and_unload() |
|
|
|
# Reload tokenizer to save it |
|
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
tokenizer.padding_side = "right" |
|
``` |
|
|
|
```python |
|
# Run text generation pipeline with the merged model |
|
prompt = """ |
|
i have a neck pain since 2 days . |
|
""" |
|
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,do_sample = True, temperature = 0.9, max_length=200) |
|
response = pipe(f"<s>[INST] {prompt} [/INST]") |
|
print(response[0]['generated_text']) |
|
|
|
``` |
|
|