migtissera
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -23,4 +23,76 @@ Therefore, you have 13 apples today.
|
|
23 |
|
24 |
You: How many oranges did I have on Saturday?
|
25 |
You didn't have any oranges on Saturday because you bought 3 oranges yesterday.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
```
|
|
|
23 |
|
24 |
You: How many oranges did I have on Saturday?
|
25 |
You didn't have any oranges on Saturday because you bought 3 oranges yesterday.
|
26 |
+
```
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
# Inference Code:
|
32 |
+
|
33 |
+
```python
|
34 |
+
# pip install accelerate
|
35 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
36 |
+
import torch
|
37 |
+
|
38 |
+
model_path = "/home/migel/Tess-v2.5-Gemma-2-27b/merged"
|
39 |
+
# model_path = "/home/migel/gemma-2-27b"
|
40 |
+
# model_path = "google/gemma-2-27b-it"
|
41 |
+
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
43 |
+
model = AutoModelForCausalLM.from_pretrained(
|
44 |
+
model_path,
|
45 |
+
device_map="auto",
|
46 |
+
torch_dtype=torch.bfloat16
|
47 |
+
)
|
48 |
+
|
49 |
+
terminators = [
|
50 |
+
tokenizer.convert_tokens_to_ids("<end_of_turn>"),
|
51 |
+
]
|
52 |
+
|
53 |
+
|
54 |
+
def generate_text(llm_prompt):
|
55 |
+
inputs = tokenizer.encode(llm_prompt, add_special_tokens=False, return_tensors="pt")
|
56 |
+
input_ids = inputs.to("cuda")
|
57 |
+
length = len(input_ids[0])
|
58 |
+
|
59 |
+
instance = {
|
60 |
+
"top_p": 1.0,
|
61 |
+
"temperature": 0.75,
|
62 |
+
"generate_len": 1024,
|
63 |
+
"top_k": 50,
|
64 |
+
}
|
65 |
+
|
66 |
+
generation = model.generate(
|
67 |
+
input_ids,
|
68 |
+
max_length=length + instance["generate_len"],
|
69 |
+
use_cache=True,
|
70 |
+
do_sample=True,
|
71 |
+
top_p=instance["top_p"],
|
72 |
+
temperature=instance["temperature"],
|
73 |
+
top_k=instance["top_k"],
|
74 |
+
num_return_sequences=1,
|
75 |
+
pad_token_id=tokenizer.eos_token_id,
|
76 |
+
eos_token_id=terminators,
|
77 |
+
)
|
78 |
+
# rest= tokenizer.decode(generation[0])
|
79 |
+
output = generation[0][length:]
|
80 |
+
string = tokenizer.decode(output, skip_special_tokens=True)
|
81 |
+
return f"{string}"
|
82 |
+
|
83 |
+
|
84 |
+
# conversation = f"""<|im_start|>system\nYou are Tesoro, a helful AI assitant. You always provide detailed answers without hesitation. No follow-up questions necessary.<|im_end|>\n<|im_start|>user\n"""
|
85 |
+
|
86 |
+
conversation = f"""<bos><start_of_turn>user\n"""
|
87 |
+
# conversation = f"""Write a long poem about statins."""
|
88 |
+
|
89 |
+
while True:
|
90 |
+
user_input = input("You: ")
|
91 |
+
llm_prompt = f"{conversation}{user_input}<end_of_turn>\n<start_of_turn>model\n"
|
92 |
+
# llm_prompt = f"{conversation}{user_input}"
|
93 |
+
answer = generate_text(llm_prompt)
|
94 |
+
print(answer)
|
95 |
+
conversation = f"{llm_prompt}{answer}\n<start_of_turn>user\n"
|
96 |
+
json_data = {"prompt": user_input, "answer": answer}
|
97 |
+
|
98 |
```
|