File size: 1,877 Bytes
655aa2d 15ab7ee 655aa2d 15ab7ee c3d8156 655aa2d 15ab7ee 655aa2d 15ab7ee ac08f90 15ab7ee 655aa2d 15ab7ee 655aa2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
---
tags:
- text-generation
license: other
---
# Usage
```python
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = PeftConfig.from_pretrained("Ashishkr/llama2-qrecc-context-resolution")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf")
model = PeftModel.from_pretrained(model, "Ashishkr/llama2-qrecc-context-resolution").to(device)
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
def response_generate(
model: AutoModelForCausalLM,
tokenizer: AutoTokenizer,
prompt: str,
max_new_tokens: int = 128,
temperature: float = 0.7,
):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
inputs = tokenizer(
[prompt],
return_tensors="pt",
return_token_type_ids=False,
).to(
device
)
with torch.autocast("cuda", dtype=torch.bfloat16):
response = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
return_dict_in_generate=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
decoded_output = tokenizer.decode(
response["sequences"][0],
skip_special_tokens=True,
)
return decoded_output
prompt = """ Strictly use the context provided, to generate the repsonse. No additional information to be added. Re-write the user query using the context .
>>CONTEXT<<Where did jessica go to school? Where did she work at?>>USER<<What did she do next for work?>>REWRITE<<"""
response = response_generate(
model,
tokenizer,
prompt,
max_new_tokens=20,
temperature=0.1,
)
print(response)
``` |