Update README.md
Browse files
README.md
CHANGED
@@ -106,133 +106,59 @@ This model is solely designed for research settings, and its testing has only be
|
|
106 |
out in such environments. It should not be used in downstream applications, as additional
|
107 |
analysis is needed to assess potential harm or bias in the proposed application.
|
108 |
|
109 |
-
## Getting started with Orca
|
110 |
|
111 |
**Inference with Hugging Face library**
|
112 |
|
113 |
```python
|
114 |
import torch
|
115 |
-
import
|
116 |
-
|
117 |
-
if torch.cuda.is_available():
|
118 |
-
torch.set_default_device("cuda")
|
119 |
-
else:
|
120 |
-
torch.set_default_device("cpu")
|
121 |
-
|
122 |
-
model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map='auto')
|
123 |
-
|
124 |
-
# https://github.com/huggingface/transformers/issues/27132
|
125 |
-
# please use the slow tokenizer since fast and slow tokenizer produces different tokens
|
126 |
-
tokenizer = transformers.AutoTokenizer.from_pretrained(
|
127 |
-
"microsoft/Orca-2-13b",
|
128 |
-
use_fast=False,
|
129 |
-
)
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
|
132 |
user_message = "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?"
|
133 |
|
134 |
-
prompt = f"
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
|
|
|
|
|
|
|
|
|
140 |
print(answer)
|
141 |
|
142 |
-
#
|
143 |
second_turn_user_message = "Give me a list of the key points of your first answer."
|
144 |
|
145 |
-
#
|
146 |
-
second_turn_message_in_markup = f"\
|
147 |
-
second_turn_tokens = tokenizer(second_turn_message_in_markup, return_tensors='pt', add_special_tokens=False)
|
148 |
-
|
149 |
-
|
150 |
-
output_ids_2 = model.generate(second_turn_input,)
|
151 |
-
second_turn_answer = tokenizer.batch_decode(output_ids_2)[0]
|
152 |
-
|
153 |
-
print(second_turn_answer)
|
154 |
-
```
|
155 |
-
|
156 |
-
|
157 |
-
**Safe inference with Azure AI Content Safety**
|
158 |
|
159 |
-
|
160 |
-
|
161 |
-
that uses AI to keep your content safe. By integrating Orca 2 with Azure AI Content Safety,
|
162 |
-
we can moderate the model output by scanning it for sexual content, violence, hate, and
|
163 |
-
self-harm with multiple severity levels and multi-lingual detection.
|
164 |
|
165 |
-
|
166 |
-
|
167 |
-
import math
|
168 |
-
import transformers
|
169 |
-
import torch
|
170 |
-
|
171 |
-
from azure.ai.contentsafety import ContentSafetyClient
|
172 |
-
from azure.core.credentials import AzureKeyCredential
|
173 |
-
from azure.core.exceptions import HttpResponseError
|
174 |
-
from azure.ai.contentsafety.models import AnalyzeTextOptions
|
175 |
-
|
176 |
-
CONTENT_SAFETY_KEY = os.environ["CONTENT_SAFETY_KEY"]
|
177 |
-
CONTENT_SAFETY_ENDPOINT = os.environ["CONTENT_SAFETY_ENDPOINT"]
|
178 |
-
|
179 |
-
# We use Azure AI Content Safety to filter out any content that reaches "Medium" threshold
|
180 |
-
# For more information: https://learn.microsoft.com/en-us/azure/ai-services/content-safety/
|
181 |
-
def should_filter_out(input_text, threshold=4):
|
182 |
-
# Create an Content Safety client
|
183 |
-
client = ContentSafetyClient(CONTENT_SAFETY_ENDPOINT, AzureKeyCredential(CONTENT_SAFETY_KEY))
|
184 |
-
|
185 |
-
# Construct a request
|
186 |
-
request = AnalyzeTextOptions(text=input_text)
|
187 |
-
|
188 |
-
# Analyze text
|
189 |
-
try:
|
190 |
-
response = client.analyze_text(request)
|
191 |
-
except HttpResponseError as e:
|
192 |
-
print("Analyze text failed.")
|
193 |
-
if e.error:
|
194 |
-
print(f"Error code: {e.error.code}")
|
195 |
-
print(f"Error message: {e.error.message}")
|
196 |
-
raise
|
197 |
-
print(e)
|
198 |
-
raise
|
199 |
-
|
200 |
-
categories = ["hate_result", "self_harm_result", "sexual_result", "violence_result"]
|
201 |
-
max_score = -math.inf
|
202 |
-
for category in categories:
|
203 |
-
max_score = max(max_score, getattr(response, category).severity)
|
204 |
-
|
205 |
-
return max_score >= threshold
|
206 |
-
|
207 |
-
model_path = 'microsoft/Orca-2-13b'
|
208 |
-
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
209 |
-
model = transformers.AutoModelForCausalLM.from_pretrained(model_path)
|
210 |
-
model.to(device)
|
211 |
-
|
212 |
-
tokenizer = transformers.AutoTokenizer.from_pretrained(
|
213 |
-
model_path,
|
214 |
-
model_max_length=4096,
|
215 |
-
padding_side="right",
|
216 |
-
use_fast=False,
|
217 |
-
add_special_tokens=False,
|
218 |
-
)
|
219 |
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
|
224 |
|
225 |
-
|
226 |
-
inputs = inputs.to(device)
|
227 |
|
228 |
-
output_ids = model.generate(inputs["input_ids"], max_length=4096, do_sample=False, temperature=0.0, use_cache=True)
|
229 |
-
sequence_length = inputs["input_ids"].shape[1]
|
230 |
-
new_output_ids = output_ids[:, sequence_length:]
|
231 |
-
answers = tokenizer.batch_decode(new_output_ids, skip_special_tokens=True)
|
232 |
-
final_output = answers[0] if not should_filter_out(answers[0]) else "[Content Filtered]"
|
233 |
|
234 |
-
print(final_output)
|
235 |
-
```
|
236 |
|
237 |
## Citation
|
238 |
```bibtex
|
|
|
106 |
out in such environments. It should not be used in downstream applications, as additional
|
107 |
analysis is needed to assess potential harm or bias in the proposed application.
|
108 |
|
109 |
+
## Getting started with Orca-2-13b-awq
|
110 |
|
111 |
**Inference with Hugging Face library**
|
112 |
|
113 |
```python
|
114 |
import torch
|
115 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
# Load the tokenizer and model from Hugging Face Hub
|
118 |
+
quant_path = "oieieio/Orca-2-13b-awq"
|
119 |
+
tokenizer = AutoTokenizer.from_pretrained(quant_path)
|
120 |
+
model = AutoModelForCausalLM.from_pretrained(quant_path)
|
121 |
+
|
122 |
+
# Move the model to GPU if available
|
123 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
124 |
+
model.to(device)
|
125 |
+
|
126 |
+
# First turn of the conversation
|
127 |
system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
|
128 |
user_message = "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?"
|
129 |
|
130 |
+
prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
|
131 |
|
132 |
+
# Encode the first prompt
|
133 |
+
inputs = tokenizer(prompt, return_tensors='pt').to(device)
|
134 |
+
output_ids = model.generate(inputs["input_ids"], max_length=512)
|
135 |
|
136 |
+
# Decode the first response
|
137 |
+
answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
138 |
+
|
139 |
+
# Print the first response
|
140 |
print(answer)
|
141 |
|
142 |
+
# Second turn of the conversation
|
143 |
second_turn_user_message = "Give me a list of the key points of your first answer."
|
144 |
|
145 |
+
# Append the second turn message to the already generated ids without adding special tokens
|
146 |
+
second_turn_message_in_markup = f"\nuser\n{second_turn_user_message}\nassistant"
|
147 |
+
second_turn_tokens = tokenizer(second_turn_message_in_markup, return_tensors='pt', add_special_tokens=False).to(device)
|
148 |
+
second_turn_input_ids = torch.cat([output_ids, second_turn_tokens['input_ids']], dim=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
# Generate the second response
|
151 |
+
output_ids_2 = model.generate(second_turn_input_ids, max_length=1024)
|
|
|
|
|
|
|
152 |
|
153 |
+
# Decode the second response
|
154 |
+
second_turn_answer = tokenizer.decode(output_ids_2[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
+
# Print the second response
|
157 |
+
print(second_turn_answer)
|
|
|
|
|
158 |
|
159 |
+
```
|
|
|
160 |
|
|
|
|
|
|
|
|
|
|
|
161 |
|
|
|
|
|
162 |
|
163 |
## Citation
|
164 |
```bibtex
|