amornpan
/

V3_qwen2.5-32b-med-thai-optimized

Model card Files Files and versions Community

amornpan commited on Mar 14

Commit

f474ba7

verified ·

1 Parent(s): 9286937

Update README.md

Browse files

Files changed (1) hide show

README.md +35 -17

README.md CHANGED Viewed

@@ -124,35 +124,51 @@ This section provides a step-by-step guide to loading and using the model for ge
 pip install transformers torch peft
 ```
-## Import Libraries
 ```python
-import os
-from transformers import AutoModelForCausalLM, AutoTokenizer
 ```
-## Load Model with Disk Offloading
 ```python
-# Create a directory for offloading model weights
-os.makedirs("offload_dir", exist_ok=True)
-base_model_name = "amornpan/V3_qwen2.5-32b-med-thai-optimized"
 model = AutoModelForCausalLM.from_pretrained(
-    base_model_name,
     device_map="auto",
-    trust_remote_code=True,
-    offload_folder="offload_dir"  # Specify the offload directory
 )
-tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
 ```
-## Define Test Function
 ```python
 def test_model(prompt, max_new_tokens=256):
     system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
-    full_prompt = f"<s><|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
@@ -166,16 +182,18 @@ def test_model(prompt, max_new_tokens=256):
     )
     response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
-    assistant_response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0]
     return assistant_response
 ```
-## Test the Model
 ```python
-# ทดสอบโมเดล
 example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
 print(f"\nคำถาม: {example_question}")
 response = test_model(example_question)

 pip install transformers torch peft
 ```
+## Import Required Libraries
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 ```
+In this cell, we import the necessary libraries from the Hugging Face Transformers package.
+## Configure 4-bit Quantization
 ```python
+# Configure quantization settings
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype="float16",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+```
+This cell configures 4-bit quantization to reduce the memory footprint of the model.
+## Load Model and Tokenizer
+```python
+# Load the model directly
+model_name = "amornpan/V3_qwen2.5-32b-med-thai-optimized"
 model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
     device_map="auto",
+    trust_remote_code=True
 )
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 ```
+In this cell, we load the pre-trained Qwen 2.5 32B Thai medical model.
+## Create Testing Function
 ```python
+# Function to test the model
 def test_model(prompt, max_new_tokens=256):
     system_prompt = "You are a question answering assistant. Answer the question as truthful and helpful as possible. คุณคือผู้ช่วยตอบคำถาม จงตอบคำถามอย่างถูกต้องและมีประโยชน์ที่สุด"
+    full_prompt = f"system\n{system_prompt}\nuser\n{prompt}\nassistant\n"
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     )
     response = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
+    # Extract only the assistant's response
+    assistant_response = response.split("assistant\n")[-1].split("")[0]
     return assistant_response
 ```
+This function handles the generation of responses.
+## Test with Example Question
 ```python
+# Test with a single example
 example_question = "อาการของโรคเบาหวานมีอะไรบ้าง"
 print(f"\nคำถาม: {example_question}")
 response = test_model(example_question)