amornpan commited on
Commit
f474ba7
·
verified ·
1 Parent(s): 9286937

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -17
README.md CHANGED
@@ -124,35 +124,51 @@ This section provides a step-by-step guide to loading and using the model for ge
124
  pip install transformers torch peft
125
  ```
126
 
127
- ## Import Libraries
128
 
129
  ```python
130
- import os
131
- from transformers import AutoModelForCausalLM, AutoTokenizer
132
  ```
133
 
134
- ## Load Model with Disk Offloading
 
 
135
 
136
  ```python
137
- # Create a directory for offloading model weights
138
- os.makedirs("offload_dir", exist_ok=True)
 
 
 
 
 
 
 
 
 
 
139
 
140
- base_model_name = "amornpan/V3_qwen2.5-32b-med-thai-optimized"
 
 
141
  model = AutoModelForCausalLM.from_pretrained(
142
- base_model_name,
 
143
  device_map="auto",
144
- trust_remote_code=True,
145
- offload_folder="offload_dir" # Specify the offload directory
146
  )
147
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
148
  ```
149
 
150
- ## Define Test Function
 
 
151
 
152
  ```python
 
153
  def test_model(prompt, max_new_tokens=256):
154
  system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
155
- full_prompt = f"<s><|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
156
 
157
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
158
 
@@ -166,16 +182,18 @@ def test_model(prompt, max_new_tokens=256):
166
  )
167
 
168
  response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
169
-
170
- assistant_response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
171
 
172
  return assistant_response
173
  ```
174
 
175
- ## Test the Model
 
 
176
 
177
  ```python
178
- # ทดสอบโมเดล
179
  example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
180
  print(f"\nคำถาม: {example_question}")
181
  response = test_model(example_question)
 
124
  pip install transformers torch peft
125
  ```
126
 
127
+ ## Import Required Libraries
128
 
129
  ```python
130
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
131
  ```
132
 
133
+ In this cell, we import the necessary libraries from the Hugging Face Transformers package.
134
+
135
+ ## Configure 4-bit Quantization
136
 
137
  ```python
138
+ # Configure quantization settings
139
+ bnb_config = BitsAndBytesConfig(
140
+ load_in_4bit=True,
141
+ bnb_4bit_compute_dtype="float16",
142
+ bnb_4bit_use_double_quant=True,
143
+ bnb_4bit_quant_type="nf4"
144
+ )
145
+ ```
146
+
147
+ This cell configures 4-bit quantization to reduce the memory footprint of the model.
148
+
149
+ ## Load Model and Tokenizer
150
 
151
+ ```python
152
+ # Load the model directly
153
+ model_name = "amornpan/V3_qwen2.5-32b-med-thai-optimized"
154
  model = AutoModelForCausalLM.from_pretrained(
155
+ model_name,
156
+ quantization_config=bnb_config,
157
  device_map="auto",
158
+ trust_remote_code=True
 
159
  )
160
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
161
  ```
162
 
163
+ In this cell, we load the pre-trained Qwen 2.5 32B Thai medical model.
164
+
165
+ ## Create Testing Function
166
 
167
  ```python
168
+ # Function to test the model
169
  def test_model(prompt, max_new_tokens=256):
170
  system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
171
+ full_prompt = f"system\n{system_prompt}\nuser\n{prompt}\nassistant\n"
172
 
173
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
174
 
 
182
  )
183
 
184
  response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
185
+ # Extract only the assistant's response
186
+ assistant_response = response.split("assistant\n")[-1].split("")[0]
187
 
188
  return assistant_response
189
  ```
190
 
191
+ This function handles the generation of responses.
192
+
193
+ ## Test with Example Question
194
 
195
  ```python
196
+ # Test with a single example
197
  example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
198
  print(f"\nคำถาม: {example_question}")
199
  response = test_model(example_question)