Update README.md
Browse files
README.md
CHANGED
@@ -124,35 +124,51 @@ This section provides a step-by-step guide to loading and using the model for ge
|
|
124 |
pip install transformers torch peft
|
125 |
```
|
126 |
|
127 |
-
## Import Libraries
|
128 |
|
129 |
```python
|
130 |
-
import
|
131 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
132 |
```
|
133 |
|
134 |
-
|
|
|
|
|
135 |
|
136 |
```python
|
137 |
-
#
|
138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
-
|
|
|
|
|
141 |
model = AutoModelForCausalLM.from_pretrained(
|
142 |
-
|
|
|
143 |
device_map="auto",
|
144 |
-
trust_remote_code=True
|
145 |
-
offload_folder="offload_dir" # Specify the offload directory
|
146 |
)
|
147 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
148 |
```
|
149 |
|
150 |
-
|
|
|
|
|
151 |
|
152 |
```python
|
|
|
153 |
def test_model(prompt, max_new_tokens=256):
|
154 |
system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
|
155 |
-
full_prompt = f"
|
156 |
|
157 |
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
|
158 |
|
@@ -166,16 +182,18 @@ def test_model(prompt, max_new_tokens=256):
|
|
166 |
)
|
167 |
|
168 |
response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
|
169 |
-
|
170 |
-
assistant_response = response.split("
|
171 |
|
172 |
return assistant_response
|
173 |
```
|
174 |
|
175 |
-
|
|
|
|
|
176 |
|
177 |
```python
|
178 |
-
#
|
179 |
example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
|
180 |
print(f"\nคำถาม: {example_question}")
|
181 |
response = test_model(example_question)
|
|
|
124 |
pip install transformers torch peft
|
125 |
```
|
126 |
|
127 |
+
## Import Required Libraries
|
128 |
|
129 |
```python
|
130 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
|
|
131 |
```
|
132 |
|
133 |
+
In this cell, we import the necessary libraries from the Hugging Face Transformers package.
|
134 |
+
|
135 |
+
## Configure 4-bit Quantization
|
136 |
|
137 |
```python
|
138 |
+
# Configure quantization settings
|
139 |
+
bnb_config = BitsAndBytesConfig(
|
140 |
+
load_in_4bit=True,
|
141 |
+
bnb_4bit_compute_dtype="float16",
|
142 |
+
bnb_4bit_use_double_quant=True,
|
143 |
+
bnb_4bit_quant_type="nf4"
|
144 |
+
)
|
145 |
+
```
|
146 |
+
|
147 |
+
This cell configures 4-bit quantization to reduce the memory footprint of the model.
|
148 |
+
|
149 |
+
## Load Model and Tokenizer
|
150 |
|
151 |
+
```python
|
152 |
+
# Load the model directly
|
153 |
+
model_name = "amornpan/V3_qwen2.5-32b-med-thai-optimized"
|
154 |
model = AutoModelForCausalLM.from_pretrained(
|
155 |
+
model_name,
|
156 |
+
quantization_config=bnb_config,
|
157 |
device_map="auto",
|
158 |
+
trust_remote_code=True
|
|
|
159 |
)
|
160 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
161 |
```
|
162 |
|
163 |
+
In this cell, we load the pre-trained Qwen 2.5 32B Thai medical model.
|
164 |
+
|
165 |
+
## Create Testing Function
|
166 |
|
167 |
```python
|
168 |
+
# Function to test the model
|
169 |
def test_model(prompt, max_new_tokens=256):
|
170 |
system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
|
171 |
+
full_prompt = f"system\n{system_prompt}\nuser\n{prompt}\nassistant\n"
|
172 |
|
173 |
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
|
174 |
|
|
|
182 |
)
|
183 |
|
184 |
response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
|
185 |
+
# Extract only the assistant's response
|
186 |
+
assistant_response = response.split("assistant\n")[-1].split("")[0]
|
187 |
|
188 |
return assistant_response
|
189 |
```
|
190 |
|
191 |
+
This function handles the generation of responses.
|
192 |
+
|
193 |
+
## Test with Example Question
|
194 |
|
195 |
```python
|
196 |
+
# Test with a single example
|
197 |
example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
|
198 |
print(f"\nคำถาม: {example_question}")
|
199 |
response = test_model(example_question)
|