oieieio commited on
Commit
e8c0d0e
·
1 Parent(s): ca4892f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +32 -106
README.md CHANGED
@@ -106,133 +106,59 @@ This model is solely designed for research settings, and its testing has only be
106
  out in such environments. It should not be used in downstream applications, as additional
107
  analysis is needed to assess potential harm or bias in the proposed application.
108
 
109
- ## Getting started with Orca 2
110
 
111
  **Inference with Hugging Face library**
112
 
113
  ```python
114
  import torch
115
- import transformers
116
-
117
- if torch.cuda.is_available():
118
- torch.set_default_device("cuda")
119
- else:
120
- torch.set_default_device("cpu")
121
-
122
- model = transformers.AutoModelForCausalLM.from_pretrained("microsoft/Orca-2-13b", device_map='auto')
123
-
124
- # https://github.com/huggingface/transformers/issues/27132
125
- # please use the slow tokenizer since fast and slow tokenizer produces different tokens
126
- tokenizer = transformers.AutoTokenizer.from_pretrained(
127
- "microsoft/Orca-2-13b",
128
- use_fast=False,
129
- )
130
 
 
 
 
 
 
 
 
 
 
 
131
  system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
132
  user_message = "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?"
133
 
134
- prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
135
 
136
- inputs = tokenizer(prompt, return_tensors='pt')
137
- output_ids = model.generate(inputs["input_ids"],)
138
- answer = tokenizer.batch_decode(output_ids)[0]
139
 
 
 
 
 
140
  print(answer)
141
 
142
- # This example continues showing how to add a second turn message by the user to the conversation
143
  second_turn_user_message = "Give me a list of the key points of your first answer."
144
 
145
- # we set add_special_tokens=False because we dont want to automatically add a bos_token between messages
146
- second_turn_message_in_markup = f"\n<|im_start|>user\n{second_turn_user_message}<|im_end|>\n<|im_start|>assistant"
147
- second_turn_tokens = tokenizer(second_turn_message_in_markup, return_tensors='pt', add_special_tokens=False)
148
- second_turn_input = torch.cat([output_ids, second_turn_tokens['input_ids']], dim=1)
149
-
150
- output_ids_2 = model.generate(second_turn_input,)
151
- second_turn_answer = tokenizer.batch_decode(output_ids_2)[0]
152
-
153
- print(second_turn_answer)
154
- ```
155
-
156
-
157
- **Safe inference with Azure AI Content Safety**
158
 
159
- The usage of [Azure AI Content Safety](https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety/) on top of model prediction is strongly encouraged
160
- and can help prevent content harms. Azure AI Content Safety is a content moderation platform
161
- that uses AI to keep your content safe. By integrating Orca 2 with Azure AI Content Safety,
162
- we can moderate the model output by scanning it for sexual content, violence, hate, and
163
- self-harm with multiple severity levels and multi-lingual detection.
164
 
165
- ```python
166
- import os
167
- import math
168
- import transformers
169
- import torch
170
-
171
- from azure.ai.contentsafety import ContentSafetyClient
172
- from azure.core.credentials import AzureKeyCredential
173
- from azure.core.exceptions import HttpResponseError
174
- from azure.ai.contentsafety.models import AnalyzeTextOptions
175
-
176
- CONTENT_SAFETY_KEY = os.environ["CONTENT_SAFETY_KEY"]
177
- CONTENT_SAFETY_ENDPOINT = os.environ["CONTENT_SAFETY_ENDPOINT"]
178
-
179
- # We use Azure AI Content Safety to filter out any content that reaches "Medium" threshold
180
- # For more information: https://learn.microsoft.com/en-us/azure/ai-services/content-safety/
181
- def should_filter_out(input_text, threshold=4):
182
- # Create an Content Safety client
183
- client = ContentSafetyClient(CONTENT_SAFETY_ENDPOINT, AzureKeyCredential(CONTENT_SAFETY_KEY))
184
-
185
- # Construct a request
186
- request = AnalyzeTextOptions(text=input_text)
187
-
188
- # Analyze text
189
- try:
190
- response = client.analyze_text(request)
191
- except HttpResponseError as e:
192
- print("Analyze text failed.")
193
- if e.error:
194
- print(f"Error code: {e.error.code}")
195
- print(f"Error message: {e.error.message}")
196
- raise
197
- print(e)
198
- raise
199
-
200
- categories = ["hate_result", "self_harm_result", "sexual_result", "violence_result"]
201
- max_score = -math.inf
202
- for category in categories:
203
- max_score = max(max_score, getattr(response, category).severity)
204
-
205
- return max_score >= threshold
206
-
207
- model_path = 'microsoft/Orca-2-13b'
208
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
209
- model = transformers.AutoModelForCausalLM.from_pretrained(model_path)
210
- model.to(device)
211
-
212
- tokenizer = transformers.AutoTokenizer.from_pretrained(
213
- model_path,
214
- model_max_length=4096,
215
- padding_side="right",
216
- use_fast=False,
217
- add_special_tokens=False,
218
- )
219
 
220
- system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
221
- user_message = "\" \n :You can't just say, \"\"that's crap\"\" and remove it without gaining a consensus. You already know this, based on your block history. —/ \" \nIs the comment obscene? \nOptions : Yes, No."
222
-
223
- prompt = f"<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
224
 
225
- inputs = tokenizer(prompt, return_tensors='pt')
226
- inputs = inputs.to(device)
227
 
228
- output_ids = model.generate(inputs["input_ids"], max_length=4096, do_sample=False, temperature=0.0, use_cache=True)
229
- sequence_length = inputs["input_ids"].shape[1]
230
- new_output_ids = output_ids[:, sequence_length:]
231
- answers = tokenizer.batch_decode(new_output_ids, skip_special_tokens=True)
232
- final_output = answers[0] if not should_filter_out(answers[0]) else "[Content Filtered]"
233
 
234
- print(final_output)
235
- ```
236
 
237
  ## Citation
238
  ```bibtex
 
106
  out in such environments. It should not be used in downstream applications, as additional
107
  analysis is needed to assess potential harm or bias in the proposed application.
108
 
109
+ ## Getting started with Orca-2-13b-awq
110
 
111
  **Inference with Hugging Face library**
112
 
113
  ```python
114
  import torch
115
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Load the tokenizer and model from Hugging Face Hub
118
+ quant_path = "oieieio/Orca-2-13b-awq"
119
+ tokenizer = AutoTokenizer.from_pretrained(quant_path)
120
+ model = AutoModelForCausalLM.from_pretrained(quant_path)
121
+
122
+ # Move the model to GPU if available
123
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
124
+ model.to(device)
125
+
126
+ # First turn of the conversation
127
  system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."
128
  user_message = "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?"
129
 
130
+ prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
131
 
132
+ # Encode the first prompt
133
+ inputs = tokenizer(prompt, return_tensors='pt').to(device)
134
+ output_ids = model.generate(inputs["input_ids"], max_length=512)
135
 
136
+ # Decode the first response
137
+ answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
138
+
139
+ # Print the first response
140
  print(answer)
141
 
142
+ # Second turn of the conversation
143
  second_turn_user_message = "Give me a list of the key points of your first answer."
144
 
145
+ # Append the second turn message to the already generated ids without adding special tokens
146
+ second_turn_message_in_markup = f"\nuser\n{second_turn_user_message}\nassistant"
147
+ second_turn_tokens = tokenizer(second_turn_message_in_markup, return_tensors='pt', add_special_tokens=False).to(device)
148
+ second_turn_input_ids = torch.cat([output_ids, second_turn_tokens['input_ids']], dim=1)
 
 
 
 
 
 
 
 
 
149
 
150
+ # Generate the second response
151
+ output_ids_2 = model.generate(second_turn_input_ids, max_length=1024)
 
 
 
152
 
153
+ # Decode the second response
154
+ second_turn_answer = tokenizer.decode(output_ids_2[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ # Print the second response
157
+ print(second_turn_answer)
 
 
158
 
159
+ ```
 
160
 
 
 
 
 
 
161
 
 
 
162
 
163
  ## Citation
164
  ```bibtex