oieieio commited on
Commit
066381b
·
1 Parent(s): 4b0da02

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +75 -0
README.md CHANGED
@@ -114,6 +114,81 @@ analysis is needed to assess potential harm or bias in the proposed application.
114
  !pip install autoawq
115
  ```
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  ```python
118
  import torch
119
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
114
  !pip install autoawq
115
  ```
116
 
117
+ ```python
118
+ !pip install torch --upgrade --index-url https://download.pytorch.org/whl/cu121
119
+ ```
120
+
121
+ ```python
122
+ import torch
123
+ from transformers import AutoTokenizer, AutoModelForCausalLM
124
+
125
+ # Load the tokenizer and model
126
+ quant_path = "oieieio/Orca-2-13b-awq"
127
+ tokenizer = AutoTokenizer.from_pretrained(quant_path)
128
+ model = AutoModelForCausalLM.from_pretrained(quant_path)
129
+
130
+ # Move the model to GPU if available
131
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
132
+ model.to(device)
133
+
134
+ # Initial system message
135
+ system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant..."
136
+
137
+ while True:
138
+ # User input
139
+ user_message = input("User: ")
140
+ if user_message.lower() == 'quit':
141
+ break
142
+
143
+ # Construct the prompt
144
+ prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
145
+
146
+ # Encode and generate response
147
+ inputs = tokenizer(prompt, return_tensors='pt').to(device)
148
+ output_ids = model.generate(inputs["input_ids"], max_length=512)
149
+ answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
150
+
151
+ # Print the response
152
+ print("AI: ", answer)
153
+ ```
154
+
155
+ ```python
156
+ import torch
157
+ from transformers import AutoTokenizer, AutoModelForCausalLM
158
+
159
+ # Load the tokenizer and model
160
+ quant_path = "oieieio/Orca-2-13b-awq"
161
+ tokenizer = AutoTokenizer.from_pretrained(quant_path)
162
+ model = AutoModelForCausalLM.from_pretrained(quant_path)
163
+
164
+ # Move the model to GPU if available
165
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
166
+ model.to(device)
167
+
168
+ # Initial system message
169
+ system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant..."
170
+
171
+ while True:
172
+ user_message = input("User: ")
173
+ if user_message.lower() == 'quit':
174
+ break
175
+
176
+ prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
177
+ inputs = tokenizer(prompt, return_tensors='pt').to(device)
178
+
179
+ output_ids = model.generate(
180
+ inputs["input_ids"],
181
+ max_new_tokens=50, # Adjust the number of generated tokens
182
+ temperature=0.7, # Adjust for randomness
183
+ top_k=50, # Adjust the number of highest probability tokens to consider
184
+ top_p=0.95, # Adjust the cumulative probability threshold
185
+ do_sample=True # Use sampling instead of greedy decoding
186
+
187
+ )
188
+ answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
189
+ print("AI: ", answer)
190
+ ```
191
+
192
  ```python
193
  import torch
194
  from transformers import AutoTokenizer, AutoModelForCausalLM