File size: 8,439 Bytes
5fe83da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/bin/bash
# Cloud Deployment Script for SmolLM3 DPO Training
# This script sets up a cloud instance for training and uploading to Hugging Face

set -e  # Exit on any error

echo "πŸš€ Starting SmolLM3 DPO Cloud Deployment"
echo "=========================================="

# Configuration
MODEL_NAME="HuggingFaceTB/SmolLM3-3B"
DATASET_NAME="HuggingFaceTB/smoltalk"
EXPERIMENT_NAME="smollm3_dpo_6epochs"
REPO_NAME="your-username/smollm3-dpo-6epochs"  # Change this to your username
TRACKIO_URL="https://your-trackio-space.hf.space"  # Change this to your Trackio Space URL
HF_TOKEN="your_hf_token_here"  # Change this to your HF token

# Training Configuration
BATCH_SIZE=2
GRADIENT_ACCUMULATION_STEPS=8
LEARNING_RATE=5e-6
MAX_EPOCHS=6
MAX_SEQ_LENGTH=4096
SAVE_STEPS=500
EVAL_STEPS=100
LOGGING_STEPS=10

echo "πŸ“‹ Configuration:"
echo "  Model: $MODEL_NAME"
echo "  Dataset: $DATASET_NAME"
echo "  Experiment: $EXPERIMENT_NAME"
echo "  Repository: $REPO_NAME"
echo "  Epochs: $MAX_EPOCHS"
echo "  Batch Size: $BATCH_SIZE"
echo "  Learning Rate: $LEARNING_RATE"

# Step 1: Update system and install dependencies
echo ""
echo "πŸ”§ Step 1: Installing system dependencies..."
sudo apt-get update
sudo apt-get install -y git curl wget unzip

# Step 2: Install Python and pip
echo ""
echo "🐍 Step 2: Installing Python dependencies..."
sudo apt-get install -y python3 python3-pip python3-venv

# Step 3: Create virtual environment
echo ""
echo "πŸ“¦ Step 3: Setting up Python virtual environment..."
python3 -m venv smollm3_env
source smollm3_env/bin/activate

# Step 4: Install PyTorch and CUDA
echo ""
echo "πŸ”₯ Step 4: Installing PyTorch with CUDA support..."
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Step 5: Install project dependencies
echo ""
echo "πŸ“š Step 5: Installing project dependencies..."
pip install -r requirements.txt

# Step 6: Install additional dependencies for DPO
echo ""
echo "🎯 Step 6: Installing DPO-specific dependencies..."
pip install trl>=0.7.0
pip install peft>=0.4.0
pip install accelerate>=0.20.0

# Step 7: Set up Hugging Face token
echo ""
echo "πŸ”‘ Step 7: Setting up Hugging Face authentication..."
export HF_TOKEN="$HF_TOKEN"
huggingface-cli login --token $HF_TOKEN

# Step 8: Create DPO configuration
echo ""
echo "βš™οΈ Step 8: Creating DPO configuration..."
cat > config/train_smollm3_dpo_6epochs.py << EOF
"""
SmolLM3 DPO Training Configuration - 6 Epochs
Optimized for cloud deployment
"""

from config.train_smollm3_dpo import SmolLM3DPOConfig

config = SmolLM3DPOConfig(
    # Model configuration
    model_name="$MODEL_NAME",
    max_seq_length=$MAX_SEQ_LENGTH,
    use_flash_attention=True,
    use_gradient_checkpointing=True,
    
    # Training configuration
    batch_size=$BATCH_SIZE,
    gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS,
    learning_rate=$LEARNING_RATE,
    weight_decay=0.01,
    warmup_steps=100,
    max_iters=None,  # Will be calculated based on epochs
    eval_interval=100,
    log_interval=10,
    save_interval=500,
    
    # DPO configuration
    beta=0.1,
    max_prompt_length=$((MAX_SEQ_LENGTH // 2)),
    
    # Optimizer configuration
    optimizer="adamw",
    beta1=0.9,
    beta2=0.95,
    eps=1e-8,
    
    # Scheduler configuration
    scheduler="cosine",
    min_lr=1e-6,
    
    # Mixed precision
    fp16=True,
    bf16=False,
    
    # Logging and saving
    save_steps=$SAVE_STEPS,
    eval_steps=$EVAL_STEPS,
    logging_steps=$LOGGING_STEPS,
    save_total_limit=3,
    
    # Evaluation
    eval_strategy="steps",
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    load_best_model_at_end=True,
    
    # Data configuration
    data_dir="smoltalk_dataset",
    train_file="train.json",
    validation_file="validation.json",
    
    # Chat template configuration
    use_chat_template=True,
    chat_template_kwargs={
        "enable_thinking": False,
        "add_generation_prompt": True
    },
    
    # Trackio monitoring configuration
    enable_tracking=True,
    trackio_url="$TRACKIO_URL",
    trackio_token=None,
    log_artifacts=True,
    log_metrics=True,
    log_config=True,
    experiment_name="$EXPERIMENT_NAME"
)
EOF

# Step 9: Download and prepare dataset
echo ""
echo "πŸ“Š Step 9: Downloading and preparing dataset..."
python -c "
from datasets import load_dataset
import json
import os

# Load SmolTalk dataset
print('Loading SmolTalk dataset...')
dataset = load_dataset('$DATASET_NAME')

# Create dataset directory
os.makedirs('smoltalk_dataset', exist_ok=True)

# Convert to DPO format (preference pairs)
def convert_to_dpo_format(example):
    # For SmolTalk, we'll create preference pairs based on response quality
    # This is a simplified example - you may need to adjust based on your needs
    return {
        'prompt': example.get('prompt', ''),
        'chosen': example.get('chosen', ''),
        'rejected': example.get('rejected', '')
    }

# Process train split
train_data = []
for example in dataset['train']:
    dpo_example = convert_to_dpo_format(example)
    if dpo_example['prompt'] and dpo_example['chosen'] and dpo_example['rejected']:
        train_data.append(dpo_example)

# Process validation split
val_data = []
for example in dataset['validation']:
    dpo_example = convert_to_dpo_format(example)
    if dpo_example['prompt'] and dpo_example['chosen'] and dpo_example['rejected']:
        val_data.append(dpo_example)

# Save to files
with open('smoltalk_dataset/train.json', 'w') as f:
    json.dump(train_data, f, indent=2)

with open('smoltalk_dataset/validation.json', 'w') as f:
    json.dump(val_data, f, indent=2)

print(f'Dataset prepared: {len(train_data)} train samples, {len(val_data)} validation samples')
"

# Step 10: Calculate training steps based on epochs
echo ""
echo "πŸ“ˆ Step 10: Calculating training parameters..."
TOTAL_SAMPLES=$(python -c "import json; data=json.load(open('smoltalk_dataset/train.json')); print(len(data))")
EFFECTIVE_BATCH_SIZE=$((BATCH_SIZE * GRADIENT_ACCUMULATION_STEPS))
STEPS_PER_EPOCH=$((TOTAL_SAMPLES / EFFECTIVE_BATCH_SIZE))
MAX_STEPS=$((STEPS_PER_EPOCH * MAX_EPOCHS))

echo "  Total samples: $TOTAL_SAMPLES"
echo "  Effective batch size: $EFFECTIVE_BATCH_SIZE"
echo "  Steps per epoch: $STEPS_PER_EPOCH"
echo "  Total training steps: $MAX_STEPS"

# Step 11: Start DPO training
echo ""
echo "🎯 Step 11: Starting DPO training..."
python train.py config/train_smollm3_dpo_6epochs.py \
    --dataset_dir smoltalk_dataset \
    --out_dir /output-checkpoint \
    --init_from scratch \
    --max_iters $MAX_STEPS \
    --batch_size $BATCH_SIZE \
    --learning_rate $LEARNING_RATE \
    --gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
    --max_seq_length $MAX_SEQ_LENGTH \
    --save_steps $SAVE_STEPS \
    --eval_steps $EVAL_STEPS \
    --logging_steps $LOGGING_STEPS \
    --enable_tracking \
    --trackio_url "$TRACKIO_URL" \
    --experiment_name "$EXPERIMENT_NAME"

# Step 12: Push model to Hugging Face Hub
echo ""
echo "πŸ“€ Step 12: Pushing model to Hugging Face Hub..."
python push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
    --token "$HF_TOKEN" \
    --trackio-url "$TRACKIO_URL" \
    --experiment-name "$EXPERIMENT_NAME"

# Step 13: Test the uploaded model
echo ""
echo "πŸ§ͺ Step 13: Testing uploaded model..."
python -c "
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

print('Loading uploaded model...')
model = AutoModelForCausalLM.from_pretrained('$REPO_NAME', torch_dtype=torch.float16, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained('$REPO_NAME')

print('Testing model generation...')
prompt = 'Hello, how are you?'
inputs = tokenizer(prompt, return_tensors='pt').to(model.device)
outputs = model.generate(**inputs, max_new_tokens=50, do_sample=True, temperature=0.7)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f'Prompt: {prompt}')
print(f'Response: {response}')
print('βœ… Model test completed successfully!')
"

echo ""
echo "πŸŽ‰ Deployment completed successfully!"
echo "====================================="
echo "πŸ“Š Model: https://huggingface.co/$REPO_NAME"
echo "πŸ“ˆ Trackio: $TRACKIO_URL"
echo "πŸ“‹ Experiment: $EXPERIMENT_NAME"
echo ""
echo "Next steps:"
echo "1. Monitor training progress in your Trackio Space"
echo "2. Check the model repository on Hugging Face Hub"
echo "3. Use the model in your applications"