Shedify / finetune_llama3.py
Borislav18's picture
Upload 2 files
f77e8bf verified
import os
import argparse
import json
from datetime import datetime
from typing import Dict, List, Any
try:
import datasets
from transformers import AutoTokenizer, TrainingArguments
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
import torch
except ImportError:
print("Installing required packages...")
import subprocess
subprocess.check_call(["pip", "install",
"transformers>=4.36.0",
"peft>=0.7.0",
"datasets>=2.14.0",
"accelerate>=0.25.0",
"trl>=0.7.1",
"bitsandbytes>=0.40.0",
"torch>=2.0.0"])
import datasets
from transformers import AutoTokenizer, TrainingArguments
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
from trl import SFTTrainer
import torch
def load_model_and_tokenizer(model_name_or_path: str,
adapter_path: str = None,
quantize: bool = True,
token: str = None):
"""
Load the model and tokenizer, with optional adapter and quantization.
This will load the model in 4-bit quantization by default (which is needed
for such a large model) and can optionally load an existing adapter.
"""
from transformers import BitsAndBytesConfig, AutoModelForCausalLM
print(f"Loading model: {model_name_or_path}")
# Configure for quantization
quantization_config = BitsAndBytesConfig(
load_in_4bit=quantize,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True
) if quantize else None
# Load the model
model = AutoModelForCausalLM.from_pretrained(
model_name_or_path,
quantization_config=quantization_config,
device_map="auto",
token=token
)
# Load adapter if provided
if adapter_path:
print(f"Loading adapter from {adapter_path}")
from peft import PeftModel
model = PeftModel.from_pretrained(model, adapter_path)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, token=token)
# Ensure we have a pad token
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
return model, tokenizer
def prepare_dataset(data_path: str):
"""Load and prepare datasets from JSON files."""
# Load datasets
if os.path.isdir(data_path):
train_path = os.path.join(data_path, "train.json")
val_path = os.path.join(data_path, "validation.json")
if not (os.path.exists(train_path) and os.path.exists(val_path)):
raise ValueError(f"Training data files not found in {data_path}")
else:
raise ValueError(f"Data path {data_path} is not a directory")
# Load JSON files
with open(train_path, 'r', encoding='utf-8') as f:
train_data = json.load(f)
with open(val_path, 'r', encoding='utf-8') as f:
val_data = json.load(f)
# Convert to datasets
train_dataset = datasets.Dataset.from_list(train_data)
eval_dataset = datasets.Dataset.from_list(val_data)
print(f"Loaded {len(train_dataset)} training examples and {len(eval_dataset)} validation examples")
return train_dataset, eval_dataset
def finetune(
model_name: str,
dataset_path: str,
output_dir: str,
hub_model_id: str = None,
hf_token: str = None,
use_peft: bool = True,
num_train_epochs: int = 3,
learning_rate: float = 2e-5,
bf16: bool = True,
quantize: bool = True,
max_seq_length: int = 2048,
gradient_accumulation_steps: int = 2
):
"""Fine-tune the model with PEFT on the provided dataset."""
# Set up output directory
if not output_dir:
output_dir = f"llama3-finetuned-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"
os.makedirs(output_dir, exist_ok=True)
# Load datasets
train_dataset, eval_dataset = prepare_dataset(dataset_path)
# Load base model
model, tokenizer = load_model_and_tokenizer(
model_name,
quantize=quantize,
token=hf_token
)
# Set up PEFT configuration if using PEFT
if use_peft:
print("Setting up PEFT (Parameter-Efficient Fine-Tuning)")
# Prepare model for k-bit training if quantized
if quantize:
model = prepare_model_for_kbit_training(model)
# Set up LoRA configuration
peft_config = LoraConfig(
r=16, # Rank dimension
lora_alpha=32, # Scale parameter
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
)
else:
peft_config = None
# Training arguments
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_train_epochs,
per_device_train_batch_size=1, # Adjust based on GPU memory
gradient_accumulation_steps=gradient_accumulation_steps,
learning_rate=learning_rate,
weight_decay=0.01,
max_grad_norm=0.3,
logging_steps=10,
optim="paged_adamw_32bit",
lr_scheduler_type="cosine",
warmup_ratio=0.03,
evaluation_strategy="steps",
eval_steps=0.1, # Evaluate every 10% of training
save_strategy="steps",
save_steps=0.1, # Save every 10% of training
save_total_limit=3,
bf16=bf16, # Use bfloat16 precision if available
push_to_hub=bool(hub_model_id),
hub_model_id=hub_model_id,
hub_token=hf_token,
)
# Initialize the SFT trainer
trainer = SFTTrainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
peft_config=peft_config,
tokenizer=tokenizer,
max_seq_length=max_seq_length,
)
# Train the model
print("Starting training...")
trainer.train()
# Save the fine-tuned model
print(f"Saving model to {output_dir}")
trainer.save_model()
# Push to hub if specified
if hub_model_id and hf_token:
print(f"Pushing model to Hugging Face Hub: {hub_model_id}")
trainer.push_to_hub()
return output_dir
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Fine-tune Llama 3.3 with your data")
parser.add_argument("--model_name", type=str, default="nvidia/Llama-3_3-Nemotron-Super-49B-v1",
help="Base model to fine-tune")
parser.add_argument("--dataset_path", type=str, required=True,
help="Path to the directory containing train.json and validation.json")
parser.add_argument("--output_dir", type=str, default=None,
help="Directory to save the fine-tuned model")
parser.add_argument("--hub_model_id", type=str, default=None,
help="Hugging Face Hub model ID to push the model to")
parser.add_argument("--hf_token", type=str, default=None,
help="Hugging Face token for accessing gated models and pushing to hub")
parser.add_argument("--no_peft", action='store_true',
help="Disable PEFT/LoRA (not recommended for large models)")
parser.add_argument("--no_quantize", action='store_true',
help="Disable quantization (requires much more VRAM)")
parser.add_argument("--no_bf16", action='store_true',
help="Disable bf16 precision")
parser.add_argument("--epochs", type=int, default=3,
help="Number of training epochs")
parser.add_argument("--learning_rate", type=float, default=2e-5,
help="Learning rate")
parser.add_argument("--max_seq_length", type=int, default=2048,
help="Maximum sequence length for training")
parser.add_argument("--gradient_accumulation_steps", type=int, default=2,
help="Gradient accumulation steps")
args = parser.parse_args()
# Get token from environment if not provided
hf_token = args.hf_token or os.environ.get("HF_TOKEN")
finetune(
model_name=args.model_name,
dataset_path=args.dataset_path,
output_dir=args.output_dir,
hub_model_id=args.hub_model_id,
hf_token=hf_token,
use_peft=not args.no_peft,
num_train_epochs=args.epochs,
learning_rate=args.learning_rate,
bf16=not args.no_bf16,
quantize=not args.no_quantize,
max_seq_length=args.max_seq_length,
gradient_accumulation_steps=args.gradient_accumulation_steps
)