oieieio
/

Orca-2-13b-awq

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions Community

oieieio commited on Dec 2, 2023

Commit

066381b

·

1 Parent(s): 4b0da02

Update README.md

Files changed (1) hide show

README.md +75 -0

README.md CHANGED Viewed

@@ -114,6 +114,81 @@ analysis is needed to assess potential harm or bias in the proposed application.
 !pip install autoawq
 ```
 ```python
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM

 !pip install autoawq
 ```
+```python
+!pip install torch --upgrade --index-url https://download.pytorch.org/whl/cu121
+```
+```python
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load the tokenizer and model
+quant_path = "oieieio/Orca-2-13b-awq"
+tokenizer = AutoTokenizer.from_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path)
+# Move the model to GPU if available
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model.to(device)
+# Initial system message
+system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant..."
+while True:
+    # User input
+    user_message = input("User: ")
+    if user_message.lower() == 'quit':
+        break
+    # Construct the prompt
+    prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
+    # Encode and generate response
+    inputs = tokenizer(prompt, return_tensors='pt').to(device)
+    output_ids = model.generate(inputs["input_ids"], max_length=512)
+    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    # Print the response
+    print("AI: ", answer)
+```
+```python
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load the tokenizer and model
+quant_path = "oieieio/Orca-2-13b-awq"
+tokenizer = AutoTokenizer.from_pretrained(quant_path)
+model = AutoModelForCausalLM.from_pretrained(quant_path)
+# Move the model to GPU if available
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model.to(device)
+# Initial system message
+system_message = "You are Orca, an AI language model created by Microsoft. You are a cautious assistant..."
+while True:
+    user_message = input("User: ")
+    if user_message.lower() == 'quit':
+        break
+    prompt = f"system\n{system_message}\nuser\n{user_message}\nassistant"
+    inputs = tokenizer(prompt, return_tensors='pt').to(device)
+    output_ids = model.generate(
+        inputs["input_ids"],
+        max_new_tokens=50,  # Adjust the number of generated tokens
+        temperature=0.7,    # Adjust for randomness
+        top_k=50,           # Adjust the number of highest probability tokens to consider
+        top_p=0.95,         # Adjust the cumulative probability threshold
+        do_sample=True     # Use sampling instead of greedy decoding
+    )
+    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    print("AI: ", answer)
+```
 ```python
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM