Spaces:
Running
Running
File size: 7,649 Bytes
ebe598e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
#!/usr/bin/env python3
"""
Setup script for the interactive SmolLM3 end-to-end fine-tuning pipeline
Helps users prepare for the interactive launch script
"""
import os
import re
from pathlib import Path
def setup_launch_script():
"""Setup the launch.sh script with user configuration"""
print("π SmolLM3 Interactive End-to-End Fine-tuning Setup")
print("=" * 60)
print("\nπ This setup will help you prepare for the interactive pipeline.")
print("The launch script will now prompt you for all necessary information.")
# Check if launch.sh exists
launch_path = Path("launch.sh")
if not launch_path.exists():
print("β launch.sh not found")
return False
print("\nβ
launch.sh found - no configuration needed!")
print("The script is now interactive and will prompt you for all settings.")
return True
def create_requirements_check():
"""Create a requirements check script"""
check_script = """#!/usr/bin/env python3
\"\"\"
Requirements check for SmolLM3 fine-tuning
\"\"\"
import sys
import subprocess
def check_requirements():
\"\"\"Check if all requirements are met\"\"\"
print("π Checking requirements...")
# Check Python version
if sys.version_info < (3, 8):
print("β Python 3.8+ required")
return False
else:
print(f"β
Python {sys.version_info.major}.{sys.version_info.minor}")
# Check required packages
required_packages = [
'torch',
'transformers',
'datasets',
'accelerate',
'trl',
'huggingface_hub',
'requests'
]
missing_packages = []
for package in required_packages:
try:
__import__(package)
print(f"β
{package}")
except ImportError:
print(f"β {package}")
missing_packages.append(package)
if missing_packages:
print(f"\\nπ¦ Install missing packages:")
print(f"pip install {' '.join(missing_packages)}")
return False
# Check CUDA
try:
import torch
if torch.cuda.is_available():
print(f"β
CUDA available: {torch.cuda.get_device_name(0)}")
else:
print("β οΈ CUDA not available (training will be slower)")
except:
print("β οΈ Could not check CUDA availability")
print("\\nβ
All requirements met!")
return True
if __name__ == "__main__":
check_requirements()
"""
with open("check_requirements.py", 'w') as f:
f.write(check_script)
print("β
Created check_requirements.py")
def create_quick_start_guide():
"""Create a quick start guide"""
guide = """# SmolLM3 Interactive Pipeline - Quick Start Guide
## π Quick Start
### 1. Check Requirements
```bash
python check_requirements.py
```
### 2. Run the Interactive Pipeline
```bash
chmod +x launch.sh
./launch.sh
```
## π What the Interactive Pipeline Does
The pipeline will guide you through:
1. **Authentication** - Enter your HF username and token
2. **Configuration Selection** - Choose from predefined training configs:
- Basic Training (SmolLM3 + SmolTalk)
- H100 Lightweight (Rapid training on H100)
- A100 Large Scale (SmolLM3 + OpenHermes-FR)
- Multiple Passes (Extended training)
- Custom Configuration (User-defined)
3. **Experiment Setup** - Configure experiment name and repositories
4. **Training Parameters** - Adjust batch size, learning rate, etc.
5. **Deployment** - Automatic Trackio Space and HF Dataset setup
6. **Training** - Monitored fine-tuning with real-time tracking
7. **Model Push** - Upload to HF Hub with documentation
## π― Available Training Configurations
### 1. Basic Training (Default)
- **Model**: SmolLM3-3B
- **Dataset**: SmolTalk
- **Epochs**: 3
- **Batch Size**: 2
- **Learning Rate**: 5e-6
- **Best for**: Quick experiments, learning
### 2. H100 Lightweight (Rapid)
- **Model**: SmolLM3-3B
- **Dataset**: OpenHermes-FR (80K samples)
- **Epochs**: 1
- **Batch Size**: 16
- **Learning Rate**: 8e-6
- **Sequence Length**: 8192
- **Best for**: Rapid training on H100
### 3. A100 Large Scale
- **Model**: SmolLM3-3B
- **Dataset**: OpenHermes-FR
- **Epochs**: 1.3 passes
- **Batch Size**: 8
- **Learning Rate**: 5e-6
- **Sequence Length**: 8192
- **Best for**: High-performance training
### 4. Multiple Passes
- **Model**: SmolLM3-3B
- **Dataset**: OpenHermes-FR
- **Epochs**: 4 passes
- **Batch Size**: 6
- **Learning Rate**: 3e-6
- **Sequence Length**: 8192
- **Best for**: Thorough training
### 5. Custom Configuration
- **User-defined parameters**
- **Flexible model and dataset selection**
- **Custom training parameters**
## π§ Prerequisites
1. **Hugging Face Account**
- Create account at https://huggingface.co
- Generate token at https://huggingface.co/settings/tokens
2. **System Requirements**
- Python 3.8+
- CUDA-compatible GPU (recommended)
- 16GB+ RAM
- 50GB+ storage
3. **Dependencies**
- PyTorch with CUDA
- Transformers
- Datasets
- Accelerate
- TRL
## π Expected Outputs
After running the pipeline, you'll have:
- **Model Repository**: `https://huggingface.co/your-username/smollm3-finetuned-YYYYMMDD`
- **Trackio Space**: `https://huggingface.co/spaces/your-username/trackio-monitoring-YYYYMMDD`
- **Experiment Dataset**: `https://huggingface.co/datasets/your-username/trackio-experiments`
- **Training Summary**: `training_summary.md`
## π οΈ Troubleshooting
### Common Issues
1. **HF Token Issues**
```bash
huggingface-cli whoami
```
2. **CUDA Issues**
```bash
python -c "import torch; print(torch.cuda.is_available())"
```
3. **Memory Issues**
- Reduce batch size in custom configuration
- Increase gradient accumulation steps
4. **Network Issues**
- Check internet connection
- Verify HF token permissions
## π― Tips for Success
1. **Start with Basic Training** for your first run
2. **Use H100 Lightweight** for rapid experiments on H100
3. **Use A100 Large Scale** for serious experiments
3. **Monitor in Trackio Space** for real-time progress
4. **Check logs** if something goes wrong
5. **Test the model** after training completes
## π Support
- Check the troubleshooting section
- Review logs in `training.log`
- Monitor progress in Trackio Space
- Open an issue on GitHub
---
**Happy Fine-tuning! π**
"""
with open("QUICK_START_GUIDE.md", 'w') as f:
f.write(guide)
print("β
Created QUICK_START_GUIDE.md")
def main():
"""Main setup function"""
print("Welcome to SmolLM3 Interactive End-to-End Fine-tuning Setup!")
print("This will help you prepare for the interactive pipeline.")
if setup_launch_script():
create_requirements_check()
create_quick_start_guide()
print("\nπ Setup completed successfully!")
print("\nπ Files created:")
print(" - check_requirements.py (requirement checker)")
print(" - QUICK_START_GUIDE.md (usage guide)")
print("\nπ Ready to start training!")
print("Next steps:")
print("1. Run: python check_requirements.py")
print("2. Run: chmod +x launch.sh")
print("3. Run: ./launch.sh")
print("4. Follow the interactive prompts")
print("\nπ For detailed information, see:")
print(" - QUICK_START_GUIDE.md")
print(" - README_END_TO_END.md")
else:
print("\nβ Setup failed. Please check your input and try again.")
if __name__ == "__main__":
main() |