Spaces:
Running
Running
File size: 8,439 Bytes
5fe83da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
#!/bin/bash
# Cloud Deployment Script for SmolLM3 DPO Training
# This script sets up a cloud instance for training and uploading to Hugging Face
set -e # Exit on any error
echo "π Starting SmolLM3 DPO Cloud Deployment"
echo "=========================================="
# Configuration
MODEL_NAME="HuggingFaceTB/SmolLM3-3B"
DATASET_NAME="HuggingFaceTB/smoltalk"
EXPERIMENT_NAME="smollm3_dpo_6epochs"
REPO_NAME="your-username/smollm3-dpo-6epochs" # Change this to your username
TRACKIO_URL="https://your-trackio-space.hf.space" # Change this to your Trackio Space URL
HF_TOKEN="your_hf_token_here" # Change this to your HF token
# Training Configuration
BATCH_SIZE=2
GRADIENT_ACCUMULATION_STEPS=8
LEARNING_RATE=5e-6
MAX_EPOCHS=6
MAX_SEQ_LENGTH=4096
SAVE_STEPS=500
EVAL_STEPS=100
LOGGING_STEPS=10
echo "π Configuration:"
echo " Model: $MODEL_NAME"
echo " Dataset: $DATASET_NAME"
echo " Experiment: $EXPERIMENT_NAME"
echo " Repository: $REPO_NAME"
echo " Epochs: $MAX_EPOCHS"
echo " Batch Size: $BATCH_SIZE"
echo " Learning Rate: $LEARNING_RATE"
# Step 1: Update system and install dependencies
echo ""
echo "π§ Step 1: Installing system dependencies..."
sudo apt-get update
sudo apt-get install -y git curl wget unzip
# Step 2: Install Python and pip
echo ""
echo "π Step 2: Installing Python dependencies..."
sudo apt-get install -y python3 python3-pip python3-venv
# Step 3: Create virtual environment
echo ""
echo "π¦ Step 3: Setting up Python virtual environment..."
python3 -m venv smollm3_env
source smollm3_env/bin/activate
# Step 4: Install PyTorch and CUDA
echo ""
echo "π₯ Step 4: Installing PyTorch with CUDA support..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Step 5: Install project dependencies
echo ""
echo "π Step 5: Installing project dependencies..."
pip install -r requirements.txt
# Step 6: Install additional dependencies for DPO
echo ""
echo "π― Step 6: Installing DPO-specific dependencies..."
pip install trl>=0.7.0
pip install peft>=0.4.0
pip install accelerate>=0.20.0
# Step 7: Set up Hugging Face token
echo ""
echo "π Step 7: Setting up Hugging Face authentication..."
export HF_TOKEN="$HF_TOKEN"
huggingface-cli login --token $HF_TOKEN
# Step 8: Create DPO configuration
echo ""
echo "βοΈ Step 8: Creating DPO configuration..."
cat > config/train_smollm3_dpo_6epochs.py << EOF
"""
SmolLM3 DPO Training Configuration - 6 Epochs
Optimized for cloud deployment
"""
from config.train_smollm3_dpo import SmolLM3DPOConfig
config = SmolLM3DPOConfig(
# Model configuration
model_name="$MODEL_NAME",
max_seq_length=$MAX_SEQ_LENGTH,
use_flash_attention=True,
use_gradient_checkpointing=True,
# Training configuration
batch_size=$BATCH_SIZE,
gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS,
learning_rate=$LEARNING_RATE,
weight_decay=0.01,
warmup_steps=100,
max_iters=None, # Will be calculated based on epochs
eval_interval=100,
log_interval=10,
save_interval=500,
# DPO configuration
beta=0.1,
max_prompt_length=$((MAX_SEQ_LENGTH // 2)),
# Optimizer configuration
optimizer="adamw",
beta1=0.9,
beta2=0.95,
eps=1e-8,
# Scheduler configuration
scheduler="cosine",
min_lr=1e-6,
# Mixed precision
fp16=True,
bf16=False,
# Logging and saving
save_steps=$SAVE_STEPS,
eval_steps=$EVAL_STEPS,
logging_steps=$LOGGING_STEPS,
save_total_limit=3,
# Evaluation
eval_strategy="steps",
metric_for_best_model="eval_loss",
greater_is_better=False,
load_best_model_at_end=True,
# Data configuration
data_dir="smoltalk_dataset",
train_file="train.json",
validation_file="validation.json",
# Chat template configuration
use_chat_template=True,
chat_template_kwargs={
"enable_thinking": False,
"add_generation_prompt": True
},
# Trackio monitoring configuration
enable_tracking=True,
trackio_url="$TRACKIO_URL",
trackio_token=None,
log_artifacts=True,
log_metrics=True,
log_config=True,
experiment_name="$EXPERIMENT_NAME"
)
EOF
# Step 9: Download and prepare dataset
echo ""
echo "π Step 9: Downloading and preparing dataset..."
python -c "
from datasets import load_dataset
import json
import os
# Load SmolTalk dataset
print('Loading SmolTalk dataset...')
dataset = load_dataset('$DATASET_NAME')
# Create dataset directory
os.makedirs('smoltalk_dataset', exist_ok=True)
# Convert to DPO format (preference pairs)
def convert_to_dpo_format(example):
# For SmolTalk, we'll create preference pairs based on response quality
# This is a simplified example - you may need to adjust based on your needs
return {
'prompt': example.get('prompt', ''),
'chosen': example.get('chosen', ''),
'rejected': example.get('rejected', '')
}
# Process train split
train_data = []
for example in dataset['train']:
dpo_example = convert_to_dpo_format(example)
if dpo_example['prompt'] and dpo_example['chosen'] and dpo_example['rejected']:
train_data.append(dpo_example)
# Process validation split
val_data = []
for example in dataset['validation']:
dpo_example = convert_to_dpo_format(example)
if dpo_example['prompt'] and dpo_example['chosen'] and dpo_example['rejected']:
val_data.append(dpo_example)
# Save to files
with open('smoltalk_dataset/train.json', 'w') as f:
json.dump(train_data, f, indent=2)
with open('smoltalk_dataset/validation.json', 'w') as f:
json.dump(val_data, f, indent=2)
print(f'Dataset prepared: {len(train_data)} train samples, {len(val_data)} validation samples')
"
# Step 10: Calculate training steps based on epochs
echo ""
echo "π Step 10: Calculating training parameters..."
TOTAL_SAMPLES=$(python -c "import json; data=json.load(open('smoltalk_dataset/train.json')); print(len(data))")
EFFECTIVE_BATCH_SIZE=$((BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS))
STEPS_PER_EPOCH=$((TOTAL_SAMPLES / EFFECTIVE_BATCH_SIZE))
MAX_STEPS=$((STEPS_PER_EPOCH * MAX_EPOCHS))
echo " Total samples: $TOTAL_SAMPLES"
echo " Effective batch size: $EFFECTIVE_BATCH_SIZE"
echo " Steps per epoch: $STEPS_PER_EPOCH"
echo " Total training steps: $MAX_STEPS"
# Step 11: Start DPO training
echo ""
echo "π― Step 11: Starting DPO training..."
python train.py config/train_smollm3_dpo_6epochs.py \
--dataset_dir smoltalk_dataset \
--out_dir /output-checkpoint \
--init_from scratch \
--max_iters $MAX_STEPS \
--batch_size $BATCH_SIZE \
--learning_rate $LEARNING_RATE \
--gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
--max_seq_length $MAX_SEQ_LENGTH \
--save_steps $SAVE_STEPS \
--eval_steps $EVAL_STEPS \
--logging_steps $LOGGING_STEPS \
--enable_tracking \
--trackio_url "$TRACKIO_URL" \
--experiment_name "$EXPERIMENT_NAME"
# Step 12: Push model to Hugging Face Hub
echo ""
echo "π€ Step 12: Pushing model to Hugging Face Hub..."
python push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
--token "$HF_TOKEN" \
--trackio-url "$TRACKIO_URL" \
--experiment-name "$EXPERIMENT_NAME"
# Step 13: Test the uploaded model
echo ""
echo "π§ͺ Step 13: Testing uploaded model..."
python -c "
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
print('Loading uploaded model...')
model = AutoModelForCausalLM.from_pretrained('$REPO_NAME', torch_dtype=torch.float16, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained('$REPO_NAME')
print('Testing model generation...')
prompt = 'Hello, how are you?'
inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f'Prompt: {prompt}')
print(f'Response: {response}')
print('β
Model test completed successfully!')
"
echo ""
echo "π Deployment completed successfully!"
echo "====================================="
echo "π Model: https://huggingface.co/$REPO_NAME"
echo "π Trackio: $TRACKIO_URL"
echo "π Experiment: $EXPERIMENT_NAME"
echo ""
echo "Next steps:"
echo "1. Monitor training progress in your Trackio Space"
echo "2. Check the model repository on Hugging Face Hub"
echo "3. Use the model in your applications" |