Spaces:
Running
Running
fix launch script deploy and refactors
Browse files- config/train_smollm3_h100_lightweight.py +2 -0
- docs/GIT_CONFIGURATION_GUIDE.md +258 -0
- launch.sh +62 -45
- scripts/dataset_tonic/setup_hf_dataset.py +18 -2
- scripts/trackio_tonic/deploy_trackio_space.py +69 -56
- scripts/training/train.py +6 -0
- src/data.py +31 -1
- src/train.py +4 -2
- templates/datasets/readme.md +95 -0
- templates/spaces/README.md +46 -0
- templates/spaces/{requirements_space.txt β requirements.txt} +0 -0
- test_pipeline.py +0 -260
- tests/test_deployment.py +167 -0
- test_formatting_fix.py β tests/test_formatting_fix.py +0 -0
- tests/test_pipeline.py +150 -0
- tests/test_readme_template.py +123 -0
- tests/test_simple_pipeline.py +130 -0
config/train_smollm3_h100_lightweight.py
CHANGED
|
@@ -56,6 +56,8 @@ config = SmolLM3Config(
|
|
| 56 |
target_field="completion",
|
| 57 |
filter_bad_entries=False,
|
| 58 |
bad_entry_field="bad_entry",
|
|
|
|
|
|
|
| 59 |
|
| 60 |
# Chat template configuration
|
| 61 |
use_chat_template=True,
|
|
|
|
| 56 |
target_field="completion",
|
| 57 |
filter_bad_entries=False,
|
| 58 |
bad_entry_field="bad_entry",
|
| 59 |
+
sample_size=80000, # 80K samples for lightweight training
|
| 60 |
+
sample_seed=42, # For reproducibility
|
| 61 |
|
| 62 |
# Chat template configuration
|
| 63 |
use_chat_template=True,
|
docs/GIT_CONFIGURATION_GUIDE.md
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git Configuration Guide for Hugging Face Operations
|
| 2 |
+
|
| 3 |
+
This guide explains the correct way to configure git for Hugging Face Spaces deployment and model pushing operations.
|
| 4 |
+
|
| 5 |
+
## π― **Overview**
|
| 6 |
+
|
| 7 |
+
When working with Hugging Face Spaces and model repositories, proper git configuration is essential for:
|
| 8 |
+
- Creating and deploying Spaces
|
| 9 |
+
- Pushing models to the Hub
|
| 10 |
+
- Managing experiment tracking datasets
|
| 11 |
+
- Ensuring proper authentication
|
| 12 |
+
- **Using the user's actual email address for proper git identity and commit attribution**
|
| 13 |
+
|
| 14 |
+
## β
**Correct Git Configuration**
|
| 15 |
+
|
| 16 |
+
### **1. Local vs Global Configuration**
|
| 17 |
+
|
| 18 |
+
**β Wrong (Current):**
|
| 19 |
+
```bash
|
| 20 |
+
git config --global user.email "[email protected]"
|
| 21 |
+
git config --global user.name "$HF_USERNAME"
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
**β
Correct (Updated):**
|
| 25 |
+
```bash
|
| 26 |
+
# Get user's actual email address
|
| 27 |
+
read -p "Enter your email address for git configuration: " GIT_EMAIL
|
| 28 |
+
|
| 29 |
+
# Configure git locally for this project only
|
| 30 |
+
git config user.email "$GIT_EMAIL"
|
| 31 |
+
git config user.name "$HF_USERNAME"
|
| 32 |
+
|
| 33 |
+
# Verify configuration
|
| 34 |
+
git config user.email
|
| 35 |
+
git config user.name
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### **2. Proper Authentication Setup**
|
| 39 |
+
|
| 40 |
+
**β
Correct Authentication:**
|
| 41 |
+
```bash
|
| 42 |
+
# Login with token and add to git credentials
|
| 43 |
+
huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
|
| 44 |
+
|
| 45 |
+
# Verify login
|
| 46 |
+
huggingface-cli whoami
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### **3. Error Handling**
|
| 50 |
+
|
| 51 |
+
**β
Robust Configuration:**
|
| 52 |
+
```bash
|
| 53 |
+
# Get user's email and configure git with error handling
|
| 54 |
+
read -p "Enter your email address for git configuration: " GIT_EMAIL
|
| 55 |
+
|
| 56 |
+
if git config user.email "$GIT_EMAIL" && \
|
| 57 |
+
git config user.name "$HF_USERNAME"; then
|
| 58 |
+
echo "β
Git configured successfully"
|
| 59 |
+
echo " Email: $(git config user.email)"
|
| 60 |
+
echo " Name: $(git config user.name)"
|
| 61 |
+
else
|
| 62 |
+
echo "β Failed to configure git"
|
| 63 |
+
exit 1
|
| 64 |
+
fi
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
## π§ **Why These Changes Matter**
|
| 68 |
+
|
| 69 |
+
### **1. Local Configuration Benefits**
|
| 70 |
+
- **Isolation**: Doesn't affect other projects on the system
|
| 71 |
+
- **Project-specific**: Each project can have different git settings
|
| 72 |
+
- **Cleaner**: No global state pollution
|
| 73 |
+
- **Safer**: Won't interfere with existing git configurations
|
| 74 |
+
|
| 75 |
+
### **2. User's Actual Email Address**
|
| 76 |
+
- **Professional**: Uses the user's real email address
|
| 77 |
+
- **Authentic**: Represents the actual user's identity
|
| 78 |
+
- **Consistent**: Matches the user's Hugging Face account
|
| 79 |
+
- **Best Practice**: Follows git configuration standards
|
| 80 |
+
|
| 81 |
+
### **3. Token-based Authentication**
|
| 82 |
+
- **Secure**: Uses HF token instead of username/password
|
| 83 |
+
- **Automated**: No manual password entry required
|
| 84 |
+
- **Persistent**: Credentials stored securely
|
| 85 |
+
- **Verified**: Includes verification steps
|
| 86 |
+
|
| 87 |
+
## π **Implementation in Launch Script**
|
| 88 |
+
|
| 89 |
+
### **Updated Authentication Step:**
|
| 90 |
+
```bash
|
| 91 |
+
# Step 8: Authentication setup
|
| 92 |
+
print_step "Step 8: Authentication Setup"
|
| 93 |
+
echo "================================"
|
| 94 |
+
|
| 95 |
+
export HF_TOKEN="$HF_TOKEN"
|
| 96 |
+
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
| 97 |
+
|
| 98 |
+
# Login to Hugging Face with token
|
| 99 |
+
print_info "Logging in to Hugging Face..."
|
| 100 |
+
if huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential; then
|
| 101 |
+
print_status "Successfully logged in to Hugging Face"
|
| 102 |
+
print_info "Username: $(huggingface-cli whoami)"
|
| 103 |
+
else
|
| 104 |
+
print_error "Failed to login to Hugging Face"
|
| 105 |
+
print_error "Please check your token and try again"
|
| 106 |
+
exit 1
|
| 107 |
+
fi
|
| 108 |
+
|
| 109 |
+
# Configure git for HF operations
|
| 110 |
+
print_step "Step 8.1: Git Configuration"
|
| 111 |
+
echo "================================"
|
| 112 |
+
|
| 113 |
+
print_info "Configuring git for Hugging Face operations..."
|
| 114 |
+
|
| 115 |
+
# Get user's email for git configuration
|
| 116 |
+
get_input "Enter your email address for git configuration" "" GIT_EMAIL
|
| 117 |
+
|
| 118 |
+
# Configure git locally (not globally) for this project
|
| 119 |
+
git config user.email "$GIT_EMAIL"
|
| 120 |
+
git config user.name "$HF_USERNAME"
|
| 121 |
+
|
| 122 |
+
# Verify git configuration
|
| 123 |
+
print_info "Verifying git configuration..."
|
| 124 |
+
if git config user.email && git config user.name; then
|
| 125 |
+
print_status "Git configured successfully"
|
| 126 |
+
print_info " Email: $(git config user.email)"
|
| 127 |
+
print_info " Name: $(git config user.name)"
|
| 128 |
+
else
|
| 129 |
+
print_error "Failed to configure git"
|
| 130 |
+
exit 1
|
| 131 |
+
fi
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
## π **Deployment Script Improvements**
|
| 135 |
+
|
| 136 |
+
### **Robust File Upload:**
|
| 137 |
+
```python
|
| 138 |
+
def upload_files(self) -> bool:
|
| 139 |
+
"""Upload necessary files to the Space"""
|
| 140 |
+
try:
|
| 141 |
+
print("Uploading files to Space...")
|
| 142 |
+
|
| 143 |
+
# Files to upload
|
| 144 |
+
files_to_upload = [
|
| 145 |
+
"app.py",
|
| 146 |
+
"requirements_space.txt",
|
| 147 |
+
"README.md"
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
# Check if we're in a git repository
|
| 151 |
+
try:
|
| 152 |
+
subprocess.run(["git", "status"], capture_output=True, check=True)
|
| 153 |
+
except subprocess.CalledProcessError:
|
| 154 |
+
print("β οΈ Not in a git repository, initializing...")
|
| 155 |
+
subprocess.run(["git", "init"], check=True)
|
| 156 |
+
subprocess.run(["git", "remote", "add", "origin", f"https://huggingface.co/spaces/{self.username}/{self.space_name}"], check=True)
|
| 157 |
+
|
| 158 |
+
# Add all files at once
|
| 159 |
+
existing_files = [f for f in files_to_upload if os.path.exists(f)]
|
| 160 |
+
if existing_files:
|
| 161 |
+
subprocess.run(["git", "add"] + existing_files, check=True)
|
| 162 |
+
subprocess.run(["git", "commit", "-m", "Initial Space setup"], check=True)
|
| 163 |
+
|
| 164 |
+
# Push to the space
|
| 165 |
+
try:
|
| 166 |
+
subprocess.run(["git", "push", "origin", "main"], check=True)
|
| 167 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
| 168 |
+
except subprocess.CalledProcessError:
|
| 169 |
+
# Try pushing to master branch if main doesn't exist
|
| 170 |
+
subprocess.run(["git", "push", "origin", "master"], check=True)
|
| 171 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
| 172 |
+
else:
|
| 173 |
+
print("β οΈ No files found to upload")
|
| 174 |
+
|
| 175 |
+
return True
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
print(f"β Error uploading files: {e}")
|
| 179 |
+
return False
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
## π **Troubleshooting**
|
| 183 |
+
|
| 184 |
+
### **Common Issues and Solutions:**
|
| 185 |
+
|
| 186 |
+
#### **1. Git Configuration Fails**
|
| 187 |
+
```bash
|
| 188 |
+
# Check current git config
|
| 189 |
+
git config --list
|
| 190 |
+
|
| 191 |
+
# Reset if needed
|
| 192 |
+
git config --unset user.email
|
| 193 |
+
git config --unset user.name
|
| 194 |
+
|
| 195 |
+
# Reconfigure
|
| 196 |
+
git config user.email "[email protected]"
|
| 197 |
+
git config user.name "your-username"
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
#### **2. Authentication Issues**
|
| 201 |
+
```bash
|
| 202 |
+
# Check HF login status
|
| 203 |
+
huggingface-cli whoami
|
| 204 |
+
|
| 205 |
+
# Re-login if needed
|
| 206 |
+
huggingface-cli logout
|
| 207 |
+
huggingface-cli login --token "your-token"
|
| 208 |
+
```
|
| 209 |
+
|
| 210 |
+
#### **3. Space Deployment Fails**
|
| 211 |
+
```bash
|
| 212 |
+
# Check git remote
|
| 213 |
+
git remote -v
|
| 214 |
+
|
| 215 |
+
# Re-add remote if needed
|
| 216 |
+
git remote remove origin
|
| 217 |
+
git remote add origin https://huggingface.co/spaces/username/space-name
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
## π **Best Practices**
|
| 221 |
+
|
| 222 |
+
### **1. Always Use Local Configuration**
|
| 223 |
+
- Use `git config` without `--global` flag
|
| 224 |
+
- Keeps project configurations isolated
|
| 225 |
+
- Prevents conflicts with other projects
|
| 226 |
+
|
| 227 |
+
### **2. Verify Configuration**
|
| 228 |
+
- Always check that git config was successful
|
| 229 |
+
- Display configured values for verification
|
| 230 |
+
- Exit on failure to prevent downstream issues
|
| 231 |
+
|
| 232 |
+
### **3. Use Token-based Authentication**
|
| 233 |
+
- More secure than username/password
|
| 234 |
+
- Automatically handles credential storage
|
| 235 |
+
- Works well with CI/CD systems
|
| 236 |
+
|
| 237 |
+
### **4. Handle Errors Gracefully**
|
| 238 |
+
- Check return codes from git commands
|
| 239 |
+
- Provide clear error messages
|
| 240 |
+
- Exit early on critical failures
|
| 241 |
+
|
| 242 |
+
### **5. Test Configuration**
|
| 243 |
+
- Verify git config after setting it
|
| 244 |
+
- Test HF login before proceeding
|
| 245 |
+
- Validate remote repository access
|
| 246 |
+
|
| 247 |
+
## π― **Summary**
|
| 248 |
+
|
| 249 |
+
The updated git configuration approach provides:
|
| 250 |
+
|
| 251 |
+
1. **β
Better Isolation**: Local configuration doesn't affect system-wide settings
|
| 252 |
+
2. **β
User's Actual Email**: Uses the user's real email address for proper git identity
|
| 253 |
+
3. **β
Proper Authentication**: Token-based login with credential storage
|
| 254 |
+
4. **β
Error Handling**: Robust verification and error reporting
|
| 255 |
+
5. **β
Professional Setup**: Uses user's actual email and verification
|
| 256 |
+
6. **β
Deployment Reliability**: Improved Space deployment with git repository handling
|
| 257 |
+
|
| 258 |
+
This ensures a more reliable and professional setup for Hugging Face operations in the SmolLM3 fine-tuning pipeline.
|
launch.sh
CHANGED
|
@@ -448,7 +448,41 @@ echo "================================"
|
|
| 448 |
|
| 449 |
export HF_TOKEN="$HF_TOKEN"
|
| 450 |
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
| 451 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
# Step 9: Deploy Trackio Space
|
| 454 |
print_step "Step 9: Deploying Trackio Space"
|
|
@@ -482,14 +516,14 @@ echo "================================="
|
|
| 482 |
cd ../trackio_tonic
|
| 483 |
python configure_trackio.py
|
| 484 |
|
| 485 |
-
# Step 12:
|
| 486 |
-
print_step "Step 12:
|
| 487 |
-
echo "
|
| 488 |
|
| 489 |
cd ../..
|
| 490 |
-
|
| 491 |
|
| 492 |
-
# Step 13: Dataset
|
| 493 |
print_step "Step 13: Dataset Configuration"
|
| 494 |
echo "=================================="
|
| 495 |
|
|
@@ -499,57 +533,40 @@ if [ "$TRAINING_CONFIG_TYPE" = "H100 Lightweight (Rapid)" ]; then
|
|
| 499 |
print_info "Sample size: ${DATASET_SAMPLE_SIZE:-80000} (will be handled by data.py)"
|
| 500 |
fi
|
| 501 |
|
| 502 |
-
# Step 14:
|
| 503 |
-
print_step "Step 14:
|
| 504 |
-
echo "
|
| 505 |
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
echo " Sequence length: $MAX_SEQ_LENGTH"
|
| 512 |
-
echo " Training steps will be calculated by the training script"
|
| 513 |
|
| 514 |
# Step 15: Start training
|
| 515 |
print_step "Step 15: Starting Training"
|
| 516 |
echo "=============================="
|
| 517 |
|
| 518 |
-
print_info "
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
--
|
| 529 |
-
--out_dir /output-checkpoint \
|
| 530 |
-
--init_from scratch \
|
| 531 |
-
--batch_size $BATCH_SIZE \
|
| 532 |
-
--learning_rate $LEARNING_RATE \
|
| 533 |
-
--gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
|
| 534 |
-
--max_seq_length $MAX_SEQ_LENGTH \
|
| 535 |
-
--save_steps $SAVE_STEPS \
|
| 536 |
-
--eval_steps $EVAL_STEPS \
|
| 537 |
-
--logging_steps $LOGGING_STEPS \
|
| 538 |
-
--enable_tracking \
|
| 539 |
-
--trackio_url "$TRACKIO_URL" \
|
| 540 |
-
--experiment_name "$EXPERIMENT_NAME" \
|
| 541 |
-
--hf_token "$HF_TOKEN" \
|
| 542 |
-
--dataset_repo "$TRACKIO_DATASET_REPO"
|
| 543 |
|
| 544 |
# Step 16: Push model to Hugging Face Hub
|
| 545 |
print_step "Step 16: Pushing Model to HF Hub"
|
| 546 |
echo "====================================="
|
| 547 |
|
| 548 |
-
print_info "
|
| 549 |
-
|
| 550 |
-
echo " Repository: $REPO_NAME"
|
| 551 |
|
| 552 |
-
# Run the
|
| 553 |
python scripts/model_tonic/push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
|
| 554 |
--token "$HF_TOKEN" \
|
| 555 |
--trackio-url "$TRACKIO_URL" \
|
|
|
|
| 448 |
|
| 449 |
export HF_TOKEN="$HF_TOKEN"
|
| 450 |
export TRACKIO_DATASET_REPO="$TRACKIO_DATASET_REPO"
|
| 451 |
+
|
| 452 |
+
# Login to Hugging Face with token
|
| 453 |
+
print_info "Logging in to Hugging Face..."
|
| 454 |
+
if huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential; then
|
| 455 |
+
print_status "Successfully logged in to Hugging Face"
|
| 456 |
+
print_info "Username: $(huggingface-cli whoami)"
|
| 457 |
+
else
|
| 458 |
+
print_error "Failed to login to Hugging Face"
|
| 459 |
+
print_error "Please check your token and try again"
|
| 460 |
+
exit 1
|
| 461 |
+
fi
|
| 462 |
+
|
| 463 |
+
# Configure git for HF operations
|
| 464 |
+
print_step "Step 8.1: Git Configuration"
|
| 465 |
+
echo "================================"
|
| 466 |
+
|
| 467 |
+
print_info "Configuring git for Hugging Face operations..."
|
| 468 |
+
|
| 469 |
+
# Get user's email for git configuration
|
| 470 |
+
get_input "Enter the email you used to register your account at huggingface for git configuration" "" GIT_EMAIL
|
| 471 |
+
|
| 472 |
+
# Configure git locally (not globally) for this project
|
| 473 |
+
git config user.email "$GIT_EMAIL"
|
| 474 |
+
git config user.name "$HF_USERNAME"
|
| 475 |
+
|
| 476 |
+
# Verify git configuration
|
| 477 |
+
print_info "Verifying git configuration..."
|
| 478 |
+
if git config user.email && git config user.name; then
|
| 479 |
+
print_status "Git configured successfully"
|
| 480 |
+
print_info " Email: $(git config user.email)"
|
| 481 |
+
print_info " Name: $(git config user.name)"
|
| 482 |
+
else
|
| 483 |
+
print_error "Failed to configure git"
|
| 484 |
+
exit 1
|
| 485 |
+
fi
|
| 486 |
|
| 487 |
# Step 9: Deploy Trackio Space
|
| 488 |
print_step "Step 9: Deploying Trackio Space"
|
|
|
|
| 516 |
cd ../trackio_tonic
|
| 517 |
python configure_trackio.py
|
| 518 |
|
| 519 |
+
# Step 12: Training Configuration
|
| 520 |
+
print_step "Step 12: Training Configuration"
|
| 521 |
+
echo "==================================="
|
| 522 |
|
| 523 |
cd ../..
|
| 524 |
+
print_info "Using existing configuration file: $CONFIG_FILE"
|
| 525 |
|
| 526 |
+
# Step 13: Dataset Configuration
|
| 527 |
print_step "Step 13: Dataset Configuration"
|
| 528 |
echo "=================================="
|
| 529 |
|
|
|
|
| 533 |
print_info "Sample size: ${DATASET_SAMPLE_SIZE:-80000} (will be handled by data.py)"
|
| 534 |
fi
|
| 535 |
|
| 536 |
+
# Step 14: Training Parameters
|
| 537 |
+
print_step "Step 14: Training Parameters"
|
| 538 |
+
echo "================================"
|
| 539 |
|
| 540 |
+
print_info "Training parameters will be loaded from configuration file"
|
| 541 |
+
print_info "Model: $MODEL_NAME"
|
| 542 |
+
print_info "Dataset: $DATASET_NAME"
|
| 543 |
+
print_info "Batch size: $BATCH_SIZE"
|
| 544 |
+
print_info "Learning rate: $LEARNING_RATE"
|
|
|
|
|
|
|
| 545 |
|
| 546 |
# Step 15: Start training
|
| 547 |
print_step "Step 15: Starting Training"
|
| 548 |
echo "=============================="
|
| 549 |
|
| 550 |
+
print_info "Starting training with configuration: $CONFIG_FILE"
|
| 551 |
+
print_info "Experiment: $EXPERIMENT_NAME"
|
| 552 |
+
print_info "Output: /output-checkpoint"
|
| 553 |
+
print_info "Trackio: $TRACKIO_URL"
|
| 554 |
+
|
| 555 |
+
# Run the simpler training script
|
| 556 |
+
python scripts/training/train.py \
|
| 557 |
+
--config "$CONFIG_FILE" \
|
| 558 |
+
--experiment-name "$EXPERIMENT_NAME" \
|
| 559 |
+
--output-dir /output-checkpoint \
|
| 560 |
+
--trackio-url "$TRACKIO_URL"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 561 |
|
| 562 |
# Step 16: Push model to Hugging Face Hub
|
| 563 |
print_step "Step 16: Pushing Model to HF Hub"
|
| 564 |
echo "====================================="
|
| 565 |
|
| 566 |
+
print_info "Pushing model to: $REPO_NAME"
|
| 567 |
+
print_info "Checkpoint: /output-checkpoint"
|
|
|
|
| 568 |
|
| 569 |
+
# Run the push script
|
| 570 |
python scripts/model_tonic/push_to_huggingface.py /output-checkpoint "$REPO_NAME" \
|
| 571 |
--token "$HF_TOKEN" \
|
| 572 |
--trackio-url "$TRACKIO_URL" \
|
scripts/dataset_tonic/setup_hf_dataset.py
CHANGED
|
@@ -6,6 +6,7 @@ Setup script for Hugging Face Dataset repository for Trackio experiments
|
|
| 6 |
import os
|
| 7 |
import json
|
| 8 |
from datetime import datetime
|
|
|
|
| 9 |
from datasets import Dataset
|
| 10 |
from huggingface_hub import HfApi
|
| 11 |
|
|
@@ -249,16 +250,31 @@ def setup_trackio_dataset():
|
|
| 249 |
# Create dataset
|
| 250 |
dataset = Dataset.from_list(initial_experiments)
|
| 251 |
|
| 252 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
api = HfApi(token=hf_token)
|
| 254 |
dataset.push_to_hub(
|
| 255 |
dataset_repo,
|
| 256 |
token=hf_token,
|
| 257 |
-
private=True # Make it private for security
|
|
|
|
| 258 |
)
|
| 259 |
|
| 260 |
print(f"β
Successfully created dataset: {dataset_repo}")
|
| 261 |
print(f"π Added {len(initial_experiments)} experiments")
|
|
|
|
|
|
|
| 262 |
print("π Dataset is private (only accessible with your token)")
|
| 263 |
print("\nπ― Next steps:")
|
| 264 |
print("1. Set HF_TOKEN in your Hugging Face Space environment")
|
|
|
|
| 6 |
import os
|
| 7 |
import json
|
| 8 |
from datetime import datetime
|
| 9 |
+
from pathlib import Path
|
| 10 |
from datasets import Dataset
|
| 11 |
from huggingface_hub import HfApi
|
| 12 |
|
|
|
|
| 250 |
# Create dataset
|
| 251 |
dataset = Dataset.from_list(initial_experiments)
|
| 252 |
|
| 253 |
+
# Get the project root directory (2 levels up from this script)
|
| 254 |
+
project_root = Path(__file__).parent.parent.parent
|
| 255 |
+
templates_dir = project_root / "templates" / "datasets"
|
| 256 |
+
readme_path = templates_dir / "readme.md"
|
| 257 |
+
|
| 258 |
+
# Read README content if it exists
|
| 259 |
+
readme_content = None
|
| 260 |
+
if readme_path.exists():
|
| 261 |
+
with open(readme_path, 'r', encoding='utf-8') as f:
|
| 262 |
+
readme_content = f.read()
|
| 263 |
+
print(f"β
Found README template: {readme_path}")
|
| 264 |
+
|
| 265 |
+
# Push to HF Hub with README
|
| 266 |
api = HfApi(token=hf_token)
|
| 267 |
dataset.push_to_hub(
|
| 268 |
dataset_repo,
|
| 269 |
token=hf_token,
|
| 270 |
+
private=True, # Make it private for security
|
| 271 |
+
readme_content=readme_content # Include README if available
|
| 272 |
)
|
| 273 |
|
| 274 |
print(f"β
Successfully created dataset: {dataset_repo}")
|
| 275 |
print(f"π Added {len(initial_experiments)} experiments")
|
| 276 |
+
if readme_content:
|
| 277 |
+
print("π Included README from templates")
|
| 278 |
print("π Dataset is private (only accessible with your token)")
|
| 279 |
print("\nπ― Next steps:")
|
| 280 |
print("1. Set HF_TOKEN in your Hugging Face Space environment")
|
scripts/trackio_tonic/deploy_trackio_space.py
CHANGED
|
@@ -61,22 +61,55 @@ class TrackioSpaceDeployer:
|
|
| 61 |
try:
|
| 62 |
print("Uploading files to Space...")
|
| 63 |
|
| 64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
files_to_upload = [
|
| 66 |
"app.py",
|
| 67 |
-
"
|
| 68 |
-
"README.md"
|
| 69 |
]
|
| 70 |
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
else:
|
| 79 |
-
print(f"β οΈ File not found: {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
return True
|
| 82 |
|
|
@@ -89,20 +122,28 @@ class TrackioSpaceDeployer:
|
|
| 89 |
try:
|
| 90 |
print("Configuring Space settings...")
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
title: Trackio Tonic
|
| 107 |
emoji: π
|
| 108 |
colorFrom: indigo
|
|
@@ -119,39 +160,11 @@ short_description: trackio for training monitoring
|
|
| 119 |
|
| 120 |
A Gradio interface for experiment tracking and monitoring.
|
| 121 |
|
| 122 |
-
## Features
|
| 123 |
-
|
| 124 |
-
- Create and manage experiments
|
| 125 |
-
- Log training metrics and parameters
|
| 126 |
-
- View experiment details and results
|
| 127 |
-
- Update experiment status
|
| 128 |
-
|
| 129 |
-
## Usage
|
| 130 |
-
|
| 131 |
-
1. Create a new experiment using the "Create Experiment" tab
|
| 132 |
-
2. Log metrics during training using the "Log Metrics" tab
|
| 133 |
-
3. View experiment details using the "View Experiments" tab
|
| 134 |
-
4. Update experiment status using the "Update Status" tab
|
| 135 |
-
|
| 136 |
-
## Integration
|
| 137 |
-
|
| 138 |
-
To connect your training script to this Trackio Space:
|
| 139 |
-
|
| 140 |
-
```python
|
| 141 |
-
from monitoring import SmolLM3Monitor
|
| 142 |
-
|
| 143 |
-
monitor = SmolLM3Monitor(
|
| 144 |
-
experiment_name="my_experiment",
|
| 145 |
-
trackio_url="{self.space_url}",
|
| 146 |
-
enable_tracking=True
|
| 147 |
-
)
|
| 148 |
-
```
|
| 149 |
-
|
| 150 |
Visit: {self.space_url}
|
| 151 |
"""
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
f.
|
| 155 |
|
| 156 |
return True
|
| 157 |
|
|
|
|
| 61 |
try:
|
| 62 |
print("Uploading files to Space...")
|
| 63 |
|
| 64 |
+
# Get the project root directory (3 levels up from this script)
|
| 65 |
+
project_root = Path(__file__).parent.parent.parent
|
| 66 |
+
templates_dir = project_root / "templates" / "spaces"
|
| 67 |
+
|
| 68 |
+
# Files to upload from templates/spaces
|
| 69 |
files_to_upload = [
|
| 70 |
"app.py",
|
| 71 |
+
"requirements.txt"
|
|
|
|
| 72 |
]
|
| 73 |
|
| 74 |
+
# README.md will be created by configure_space method
|
| 75 |
+
|
| 76 |
+
# Copy files from templates/spaces to current directory
|
| 77 |
+
copied_files = []
|
| 78 |
+
for file_name in files_to_upload:
|
| 79 |
+
source_path = templates_dir / file_name
|
| 80 |
+
if source_path.exists():
|
| 81 |
+
import shutil
|
| 82 |
+
shutil.copy2(source_path, file_name)
|
| 83 |
+
copied_files.append(file_name)
|
| 84 |
+
print(f"β
Copied {file_name} from templates")
|
| 85 |
else:
|
| 86 |
+
print(f"β οΈ File not found: {source_path}")
|
| 87 |
+
|
| 88 |
+
# Check if we're in a git repository
|
| 89 |
+
try:
|
| 90 |
+
subprocess.run(["git", "status"], capture_output=True, check=True)
|
| 91 |
+
except subprocess.CalledProcessError:
|
| 92 |
+
print("β οΈ Not in a git repository, initializing...")
|
| 93 |
+
subprocess.run(["git", "init"], check=True)
|
| 94 |
+
subprocess.run(["git", "remote", "add", "origin", f"https://huggingface.co/spaces/{self.username}/{self.space_name}"], check=True)
|
| 95 |
+
|
| 96 |
+
# Add all files at once
|
| 97 |
+
existing_files = [f for f in files_to_upload if os.path.exists(f)]
|
| 98 |
+
if existing_files:
|
| 99 |
+
subprocess.run(["git", "add"] + existing_files, check=True)
|
| 100 |
+
subprocess.run(["git", "add", "README.md"], check=True) # Add README.md that was created in configure_space
|
| 101 |
+
subprocess.run(["git", "commit", "-m", "Initial Space setup"], check=True)
|
| 102 |
+
|
| 103 |
+
# Push to the space
|
| 104 |
+
try:
|
| 105 |
+
subprocess.run(["git", "push", "origin", "main"], check=True)
|
| 106 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
| 107 |
+
except subprocess.CalledProcessError:
|
| 108 |
+
# Try pushing to master branch if main doesn't exist
|
| 109 |
+
subprocess.run(["git", "push", "origin", "master"], check=True)
|
| 110 |
+
print(f"β
Uploaded {len(existing_files)} files")
|
| 111 |
+
else:
|
| 112 |
+
print("β οΈ No files found to upload")
|
| 113 |
|
| 114 |
return True
|
| 115 |
|
|
|
|
| 122 |
try:
|
| 123 |
print("Configuring Space settings...")
|
| 124 |
|
| 125 |
+
# Get the project root directory (3 levels up from this script)
|
| 126 |
+
project_root = Path(__file__).parent.parent.parent
|
| 127 |
+
templates_dir = project_root / "templates" / "spaces"
|
| 128 |
+
readme_template_path = templates_dir / "README.md"
|
| 129 |
+
|
| 130 |
+
# Read README template if it exists
|
| 131 |
+
if readme_template_path.exists():
|
| 132 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
| 133 |
+
readme_template = f.read()
|
| 134 |
+
|
| 135 |
+
# Replace placeholder with actual space URL
|
| 136 |
+
readme_content = readme_template.replace("{SPACE_URL}", self.space_url)
|
| 137 |
+
|
| 138 |
+
# Write README.md for the space
|
| 139 |
+
with open("README.md", "w", encoding='utf-8') as f:
|
| 140 |
+
f.write(readme_content)
|
| 141 |
+
|
| 142 |
+
print(f"β
Created README.md from template")
|
| 143 |
+
else:
|
| 144 |
+
print(f"β οΈ README template not found: {readme_template_path}")
|
| 145 |
+
# Fallback to basic README
|
| 146 |
+
basic_readme = f"""---
|
| 147 |
title: Trackio Tonic
|
| 148 |
emoji: π
|
| 149 |
colorFrom: indigo
|
|
|
|
| 160 |
|
| 161 |
A Gradio interface for experiment tracking and monitoring.
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
Visit: {self.space_url}
|
| 164 |
"""
|
| 165 |
+
with open("README.md", "w", encoding='utf-8') as f:
|
| 166 |
+
f.write(basic_readme)
|
| 167 |
+
print(f"β
Created basic README.md")
|
| 168 |
|
| 169 |
return True
|
| 170 |
|
scripts/training/train.py
CHANGED
|
@@ -63,11 +63,13 @@ def main():
|
|
| 63 |
try:
|
| 64 |
from config.train_smollm3_openhermes_fr_a100_large import get_config as get_large_config
|
| 65 |
from config.train_smollm3_openhermes_fr_a100_multiple_passes import get_config as get_multiple_passes_config
|
|
|
|
| 66 |
|
| 67 |
# Map config files to their respective functions
|
| 68 |
config_map = {
|
| 69 |
"config/train_smollm3_openhermes_fr_a100_large.py": get_large_config,
|
| 70 |
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py": get_multiple_passes_config,
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
if args.config in config_map:
|
|
@@ -81,6 +83,7 @@ def main():
|
|
| 81 |
print("Available configurations:")
|
| 82 |
print(" - config/train_smollm3_openhermes_fr_a100_large.py (Large batch, 1.3 passes)")
|
| 83 |
print(" - config/train_smollm3_openhermes_fr_a100_multiple_passes.py (Multiple passes, 4 epochs)")
|
|
|
|
| 84 |
return 1
|
| 85 |
|
| 86 |
# Override experiment name if provided
|
|
@@ -124,6 +127,9 @@ def main():
|
|
| 124 |
|
| 125 |
# Import and run training
|
| 126 |
try:
|
|
|
|
|
|
|
|
|
|
| 127 |
from train import main as train_main
|
| 128 |
|
| 129 |
# Set up training arguments - config is positional, not --config
|
|
|
|
| 63 |
try:
|
| 64 |
from config.train_smollm3_openhermes_fr_a100_large import get_config as get_large_config
|
| 65 |
from config.train_smollm3_openhermes_fr_a100_multiple_passes import get_config as get_multiple_passes_config
|
| 66 |
+
from config.train_smollm3_h100_lightweight import config as h100_lightweight_config
|
| 67 |
|
| 68 |
# Map config files to their respective functions
|
| 69 |
config_map = {
|
| 70 |
"config/train_smollm3_openhermes_fr_a100_large.py": get_large_config,
|
| 71 |
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py": get_multiple_passes_config,
|
| 72 |
+
"config/train_smollm3_h100_lightweight.py": lambda x: h100_lightweight_config,
|
| 73 |
}
|
| 74 |
|
| 75 |
if args.config in config_map:
|
|
|
|
| 83 |
print("Available configurations:")
|
| 84 |
print(" - config/train_smollm3_openhermes_fr_a100_large.py (Large batch, 1.3 passes)")
|
| 85 |
print(" - config/train_smollm3_openhermes_fr_a100_multiple_passes.py (Multiple passes, 4 epochs)")
|
| 86 |
+
print(" - config/train_smollm3_h100_lightweight.py (H100 lightweight, 80K samples)")
|
| 87 |
return 1
|
| 88 |
|
| 89 |
# Override experiment name if provided
|
|
|
|
| 127 |
|
| 128 |
# Import and run training
|
| 129 |
try:
|
| 130 |
+
# Add src directory to path
|
| 131 |
+
src_path = str(Path(__file__).parent.parent.parent / "src")
|
| 132 |
+
sys.path.insert(0, src_path)
|
| 133 |
from train import main as train_main
|
| 134 |
|
| 135 |
# Set up training arguments - config is positional, not --config
|
src/data.py
CHANGED
|
@@ -24,7 +24,9 @@ class SmolLM3Dataset:
|
|
| 24 |
use_chat_template: bool = True,
|
| 25 |
chat_template_kwargs: Optional[Dict] = None,
|
| 26 |
filter_bad_entries: bool = False,
|
| 27 |
-
bad_entry_field: str = "bad_entry"
|
|
|
|
|
|
|
| 28 |
):
|
| 29 |
self.data_path = data_path
|
| 30 |
self.tokenizer = tokenizer
|
|
@@ -33,6 +35,8 @@ class SmolLM3Dataset:
|
|
| 33 |
self.chat_template_kwargs = chat_template_kwargs or {}
|
| 34 |
self.filter_bad_entries = filter_bad_entries
|
| 35 |
self.bad_entry_field = bad_entry_field
|
|
|
|
|
|
|
| 36 |
|
| 37 |
# Load and process dataset
|
| 38 |
self.dataset = self._load_dataset()
|
|
@@ -89,6 +93,32 @@ class SmolLM3Dataset:
|
|
| 89 |
filtered_size = len(dataset[split])
|
| 90 |
logger.info("Filtered %s: %d -> %d samples", split, original_size, filtered_size)
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# If only 'train' split exists, create validation and test splits
|
| 93 |
if ("train" in dataset) and ("validation" not in dataset or "test" not in dataset):
|
| 94 |
logger.info("Automatically splitting train into train/validation/test (98/1/1)")
|
|
|
|
| 24 |
use_chat_template: bool = True,
|
| 25 |
chat_template_kwargs: Optional[Dict] = None,
|
| 26 |
filter_bad_entries: bool = False,
|
| 27 |
+
bad_entry_field: str = "bad_entry",
|
| 28 |
+
sample_size: Optional[int] = None,
|
| 29 |
+
sample_seed: int = 42
|
| 30 |
):
|
| 31 |
self.data_path = data_path
|
| 32 |
self.tokenizer = tokenizer
|
|
|
|
| 35 |
self.chat_template_kwargs = chat_template_kwargs or {}
|
| 36 |
self.filter_bad_entries = filter_bad_entries
|
| 37 |
self.bad_entry_field = bad_entry_field
|
| 38 |
+
self.sample_size = sample_size
|
| 39 |
+
self.sample_seed = sample_seed
|
| 40 |
|
| 41 |
# Load and process dataset
|
| 42 |
self.dataset = self._load_dataset()
|
|
|
|
| 93 |
filtered_size = len(dataset[split])
|
| 94 |
logger.info("Filtered %s: %d -> %d samples", split, original_size, filtered_size)
|
| 95 |
|
| 96 |
+
# Apply sampling if requested
|
| 97 |
+
if self.sample_size is not None and "train" in dataset:
|
| 98 |
+
logger.info(f"Sampling {self.sample_size} random samples from {len(dataset['train'])} total samples")
|
| 99 |
+
import random
|
| 100 |
+
random.seed(self.sample_seed)
|
| 101 |
+
|
| 102 |
+
# Sample indices
|
| 103 |
+
total_samples = len(dataset["train"])
|
| 104 |
+
if self.sample_size > total_samples:
|
| 105 |
+
logger.warning(f"Requested sample size ({self.sample_size}) is larger than dataset size ({total_samples}). Using all samples.")
|
| 106 |
+
sampled_indices = list(range(total_samples))
|
| 107 |
+
else:
|
| 108 |
+
sampled_indices = random.sample(range(total_samples), self.sample_size)
|
| 109 |
+
|
| 110 |
+
# Apply sampling to train split
|
| 111 |
+
dataset["train"] = dataset["train"].select(sampled_indices)
|
| 112 |
+
logger.info(f"Sampled {len(dataset['train'])} train samples")
|
| 113 |
+
|
| 114 |
+
# Also sample validation if it exists and is large
|
| 115 |
+
if "validation" in dataset and len(dataset["validation"]) > 1000:
|
| 116 |
+
val_sample_size = min(1000, len(dataset["validation"]))
|
| 117 |
+
logger.info(f"Sampling {val_sample_size} validation samples from {len(dataset['validation'])} total")
|
| 118 |
+
val_sampled_indices = random.sample(range(len(dataset["validation"])), val_sample_size)
|
| 119 |
+
dataset["validation"] = dataset["validation"].select(val_sampled_indices)
|
| 120 |
+
logger.info(f"Sampled {len(dataset['validation'])} validation samples")
|
| 121 |
+
|
| 122 |
# If only 'train' split exists, create validation and test splits
|
| 123 |
if ("train" in dataset) and ("validation" not in dataset or "test" not in dataset):
|
| 124 |
logger.info("Automatically splitting train into train/validation/test (98/1/1)")
|
src/train.py
CHANGED
|
@@ -183,13 +183,15 @@ def main():
|
|
| 183 |
dataset_path = os.path.join('/input', args.dataset_dir)
|
| 184 |
logger.info(f"Using local dataset: {dataset_path}")
|
| 185 |
|
| 186 |
-
# Load dataset with filtering options
|
| 187 |
dataset = SmolLM3Dataset(
|
| 188 |
data_path=dataset_path,
|
| 189 |
tokenizer=model.tokenizer,
|
| 190 |
max_seq_length=args.max_seq_length,
|
| 191 |
filter_bad_entries=getattr(config, 'filter_bad_entries', False),
|
| 192 |
-
bad_entry_field=getattr(config, 'bad_entry_field', 'bad_entry')
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
|
| 195 |
# Initialize trainer
|
|
|
|
| 183 |
dataset_path = os.path.join('/input', args.dataset_dir)
|
| 184 |
logger.info(f"Using local dataset: {dataset_path}")
|
| 185 |
|
| 186 |
+
# Load dataset with filtering options and sampling
|
| 187 |
dataset = SmolLM3Dataset(
|
| 188 |
data_path=dataset_path,
|
| 189 |
tokenizer=model.tokenizer,
|
| 190 |
max_seq_length=args.max_seq_length,
|
| 191 |
filter_bad_entries=getattr(config, 'filter_bad_entries', False),
|
| 192 |
+
bad_entry_field=getattr(config, 'bad_entry_field', 'bad_entry'),
|
| 193 |
+
sample_size=getattr(config, 'sample_size', None),
|
| 194 |
+
sample_seed=getattr(config, 'sample_seed', 42)
|
| 195 |
)
|
| 196 |
|
| 197 |
# Initialize trainer
|
templates/datasets/readme.md
CHANGED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
dataset_info:
|
| 3 |
+
features:
|
| 4 |
+
- name: experiment_id
|
| 5 |
+
dtype: string
|
| 6 |
+
- name: name
|
| 7 |
+
dtype: string
|
| 8 |
+
- name: description
|
| 9 |
+
dtype: string
|
| 10 |
+
- name: created_at
|
| 11 |
+
dtype: string
|
| 12 |
+
- name: status
|
| 13 |
+
dtype: string
|
| 14 |
+
- name: metrics
|
| 15 |
+
dtype: string
|
| 16 |
+
- name: parameters
|
| 17 |
+
dtype: string
|
| 18 |
+
- name: artifacts
|
| 19 |
+
dtype: string
|
| 20 |
+
- name: logs
|
| 21 |
+
dtype: string
|
| 22 |
+
- name: last_updated
|
| 23 |
+
dtype: string
|
| 24 |
+
splits:
|
| 25 |
+
- name: train
|
| 26 |
+
num_bytes: 4945
|
| 27 |
+
num_examples: 2
|
| 28 |
+
download_size: 15529
|
| 29 |
+
dataset_size: 4945
|
| 30 |
+
configs:
|
| 31 |
+
- config_name: default
|
| 32 |
+
data_files:
|
| 33 |
+
- split: train
|
| 34 |
+
path: data/train-*
|
| 35 |
+
tags:
|
| 36 |
+
- trackio
|
| 37 |
+
- tonic
|
| 38 |
+
- experiment tracking
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
# Trackio Experiments Dataset
|
| 42 |
+
|
| 43 |
+
This dataset stores experiment tracking data for ML training runs, particularly focused on SmolLM3 fine-tuning experiments.
|
| 44 |
+
|
| 45 |
+
## Dataset Structure
|
| 46 |
+
|
| 47 |
+
The dataset contains the following columns:
|
| 48 |
+
|
| 49 |
+
- **experiment_id**: Unique identifier for each experiment
|
| 50 |
+
- **name**: Human-readable name for the experiment
|
| 51 |
+
- **description**: Detailed description of the experiment
|
| 52 |
+
- **created_at**: Timestamp when the experiment was created
|
| 53 |
+
- **status**: Current status (running, completed, failed, paused)
|
| 54 |
+
- **metrics**: JSON string containing training metrics over time
|
| 55 |
+
- **parameters**: JSON string containing experiment configuration
|
| 56 |
+
- **artifacts**: JSON string containing experiment artifacts
|
| 57 |
+
- **logs**: JSON string containing experiment logs
|
| 58 |
+
- **last_updated**: Timestamp of last update
|
| 59 |
+
|
| 60 |
+
## Usage
|
| 61 |
+
|
| 62 |
+
This dataset is automatically used by the Trackio monitoring system to store and retrieve experiment data. It provides persistent storage for experiment tracking across different training runs.
|
| 63 |
+
|
| 64 |
+
## Integration
|
| 65 |
+
|
| 66 |
+
The dataset is used by:
|
| 67 |
+
- Trackio Spaces for experiment visualization
|
| 68 |
+
- Training scripts for logging metrics and parameters
|
| 69 |
+
- Monitoring systems for experiment tracking
|
| 70 |
+
|
| 71 |
+
## Privacy
|
| 72 |
+
|
| 73 |
+
This dataset is private by default to ensure experiment data security. Only users with appropriate permissions can access the data.
|
| 74 |
+
|
| 75 |
+
## Examples
|
| 76 |
+
|
| 77 |
+
### Sample Experiment Entry
|
| 78 |
+
```json
|
| 79 |
+
{
|
| 80 |
+
"experiment_id": "exp_20250720_130853",
|
| 81 |
+
"name": "smollm3_finetune",
|
| 82 |
+
"description": "SmolLM3 fine-tuning experiment",
|
| 83 |
+
"created_at": "2025-07-20T11:20:01.780908",
|
| 84 |
+
"status": "running",
|
| 85 |
+
"metrics": "[{\"timestamp\": \"2025-07-20T11:20:01.780908\", \"step\": 25, \"metrics\": {\"loss\": 1.1659, \"accuracy\": 0.759}}]",
|
| 86 |
+
"parameters": "{\"model_name\": \"HuggingFaceTB/SmolLM3-3B\", \"batch_size\": 8, \"learning_rate\": 3.5e-06}",
|
| 87 |
+
"artifacts": "[]",
|
| 88 |
+
"logs": "[]",
|
| 89 |
+
"last_updated": "2025-07-20T11:20:01.780908"
|
| 90 |
+
}
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## License
|
| 94 |
+
|
| 95 |
+
This dataset is part of the Trackio experiment tracking system and follows the same license as the main project.
|
templates/spaces/README.md
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Trackio Tonic
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.38.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: true
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: trackio for training monitoring
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# Trackio Experiment Tracking
|
| 15 |
+
|
| 16 |
+
A Gradio interface for experiment tracking and monitoring.
|
| 17 |
+
|
| 18 |
+
## Features
|
| 19 |
+
|
| 20 |
+
- Create and manage experiments
|
| 21 |
+
- Log training metrics and parameters
|
| 22 |
+
- View experiment details and results
|
| 23 |
+
- Update experiment status
|
| 24 |
+
|
| 25 |
+
## Usage
|
| 26 |
+
|
| 27 |
+
1. Create a new experiment using the "Create Experiment" tab
|
| 28 |
+
2. Log metrics during training using the "Log Metrics" tab
|
| 29 |
+
3. View experiment details using the "View Experiments" tab
|
| 30 |
+
4. Update experiment status using the "Update Status" tab
|
| 31 |
+
|
| 32 |
+
## Integration
|
| 33 |
+
|
| 34 |
+
To connect your training script to this Trackio Space:
|
| 35 |
+
|
| 36 |
+
```python
|
| 37 |
+
from monitoring import SmolLM3Monitor
|
| 38 |
+
|
| 39 |
+
monitor = SmolLM3Monitor(
|
| 40 |
+
experiment_name="my_experiment",
|
| 41 |
+
trackio_url="{SPACE_URL}",
|
| 42 |
+
enable_tracking=True
|
| 43 |
+
)
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Visit: {SPACE_URL}
|
templates/spaces/{requirements_space.txt β requirements.txt}
RENAMED
|
File without changes
|
test_pipeline.py
DELETED
|
@@ -1,260 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script for the SmolLM3 end-to-end pipeline
|
| 4 |
-
Verifies all components are working correctly
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
-
import os
|
| 8 |
-
import sys
|
| 9 |
-
import subprocess
|
| 10 |
-
import importlib
|
| 11 |
-
from pathlib import Path
|
| 12 |
-
|
| 13 |
-
def test_imports():
|
| 14 |
-
"""Test that all required modules can be imported"""
|
| 15 |
-
print("π Testing imports...")
|
| 16 |
-
|
| 17 |
-
required_modules = [
|
| 18 |
-
'torch',
|
| 19 |
-
'transformers',
|
| 20 |
-
'datasets',
|
| 21 |
-
'accelerate',
|
| 22 |
-
'trl',
|
| 23 |
-
'huggingface_hub',
|
| 24 |
-
'requests'
|
| 25 |
-
]
|
| 26 |
-
|
| 27 |
-
failed_imports = []
|
| 28 |
-
for module in required_modules:
|
| 29 |
-
try:
|
| 30 |
-
importlib.import_module(module)
|
| 31 |
-
print(f"β
{module}")
|
| 32 |
-
except ImportError as e:
|
| 33 |
-
print(f"β {module}: {e}")
|
| 34 |
-
failed_imports.append(module)
|
| 35 |
-
|
| 36 |
-
if failed_imports:
|
| 37 |
-
print(f"\nβ Failed imports: {failed_imports}")
|
| 38 |
-
return False
|
| 39 |
-
|
| 40 |
-
print("β
All imports successful")
|
| 41 |
-
return True
|
| 42 |
-
|
| 43 |
-
def test_local_modules():
|
| 44 |
-
"""Test local module imports"""
|
| 45 |
-
print("\nπ Testing local modules...")
|
| 46 |
-
|
| 47 |
-
# Add src to path
|
| 48 |
-
sys.path.append('src')
|
| 49 |
-
|
| 50 |
-
local_modules = [
|
| 51 |
-
'config',
|
| 52 |
-
'model',
|
| 53 |
-
'data',
|
| 54 |
-
'trainer',
|
| 55 |
-
'monitoring'
|
| 56 |
-
]
|
| 57 |
-
|
| 58 |
-
failed_imports = []
|
| 59 |
-
for module in local_modules:
|
| 60 |
-
try:
|
| 61 |
-
importlib.import_module(module)
|
| 62 |
-
print(f"β
{module}")
|
| 63 |
-
except ImportError as e:
|
| 64 |
-
print(f"β {module}: {e}")
|
| 65 |
-
failed_imports.append(module)
|
| 66 |
-
|
| 67 |
-
if failed_imports:
|
| 68 |
-
print(f"\nβ Failed local imports: {failed_imports}")
|
| 69 |
-
return False
|
| 70 |
-
|
| 71 |
-
print("β
All local modules imported successfully")
|
| 72 |
-
return True
|
| 73 |
-
|
| 74 |
-
def test_scripts():
|
| 75 |
-
"""Test script availability"""
|
| 76 |
-
print("\nπ Testing scripts...")
|
| 77 |
-
|
| 78 |
-
required_scripts = [
|
| 79 |
-
'scripts/trackio_tonic/deploy_trackio_space.py',
|
| 80 |
-
'scripts/trackio_tonic/configure_trackio.py',
|
| 81 |
-
'scripts/dataset_tonic/setup_hf_dataset.py',
|
| 82 |
-
'scripts/model_tonic/push_to_huggingface.py',
|
| 83 |
-
'src/train.py'
|
| 84 |
-
]
|
| 85 |
-
|
| 86 |
-
missing_scripts = []
|
| 87 |
-
for script in required_scripts:
|
| 88 |
-
if Path(script).exists():
|
| 89 |
-
print(f"β
{script}")
|
| 90 |
-
else:
|
| 91 |
-
print(f"β {script}")
|
| 92 |
-
missing_scripts.append(script)
|
| 93 |
-
|
| 94 |
-
if missing_scripts:
|
| 95 |
-
print(f"\nβ Missing scripts: {missing_scripts}")
|
| 96 |
-
return False
|
| 97 |
-
|
| 98 |
-
print("β
All scripts found")
|
| 99 |
-
return True
|
| 100 |
-
|
| 101 |
-
def test_configs():
|
| 102 |
-
"""Test configuration files"""
|
| 103 |
-
print("\nπ Testing configurations...")
|
| 104 |
-
|
| 105 |
-
config_dir = Path('config')
|
| 106 |
-
if not config_dir.exists():
|
| 107 |
-
print("β config directory not found")
|
| 108 |
-
return False
|
| 109 |
-
|
| 110 |
-
config_files = list(config_dir.glob('*.py'))
|
| 111 |
-
if not config_files:
|
| 112 |
-
print("β No configuration files found")
|
| 113 |
-
return False
|
| 114 |
-
|
| 115 |
-
print(f"β
Found {len(config_files)} configuration files:")
|
| 116 |
-
for config in config_files:
|
| 117 |
-
print(f" - {config.name}")
|
| 118 |
-
|
| 119 |
-
return True
|
| 120 |
-
|
| 121 |
-
def test_requirements():
|
| 122 |
-
"""Test requirements files"""
|
| 123 |
-
print("\nπ Testing requirements...")
|
| 124 |
-
|
| 125 |
-
requirements_dir = Path('requirements')
|
| 126 |
-
if not requirements_dir.exists():
|
| 127 |
-
print("β requirements directory not found")
|
| 128 |
-
return False
|
| 129 |
-
|
| 130 |
-
req_files = list(requirements_dir.glob('*.txt'))
|
| 131 |
-
if not req_files:
|
| 132 |
-
print("β No requirements files found")
|
| 133 |
-
return False
|
| 134 |
-
|
| 135 |
-
print(f"β
Found {len(req_files)} requirements files:")
|
| 136 |
-
for req in req_files:
|
| 137 |
-
print(f" - {req.name}")
|
| 138 |
-
|
| 139 |
-
return True
|
| 140 |
-
|
| 141 |
-
def test_cuda():
|
| 142 |
-
"""Test CUDA availability"""
|
| 143 |
-
print("\nπ Testing CUDA...")
|
| 144 |
-
|
| 145 |
-
try:
|
| 146 |
-
import torch
|
| 147 |
-
if torch.cuda.is_available():
|
| 148 |
-
device_count = torch.cuda.device_count()
|
| 149 |
-
device_name = torch.cuda.get_device_name(0)
|
| 150 |
-
print(f"β
CUDA available: {device_count} device(s)")
|
| 151 |
-
print(f" - Device 0: {device_name}")
|
| 152 |
-
else:
|
| 153 |
-
print("β οΈ CUDA not available (training will be slower)")
|
| 154 |
-
except Exception as e:
|
| 155 |
-
print(f"β CUDA test failed: {e}")
|
| 156 |
-
return False
|
| 157 |
-
|
| 158 |
-
return True
|
| 159 |
-
|
| 160 |
-
def test_hf_token():
|
| 161 |
-
"""Test Hugging Face token"""
|
| 162 |
-
print("\nπ Testing HF token...")
|
| 163 |
-
|
| 164 |
-
token = os.environ.get('HF_TOKEN')
|
| 165 |
-
if not token:
|
| 166 |
-
print("β οΈ HF_TOKEN not set (will be prompted during setup)")
|
| 167 |
-
return True
|
| 168 |
-
|
| 169 |
-
try:
|
| 170 |
-
result = subprocess.run(
|
| 171 |
-
['huggingface-cli', 'whoami'],
|
| 172 |
-
capture_output=True,
|
| 173 |
-
text=True,
|
| 174 |
-
timeout=10
|
| 175 |
-
)
|
| 176 |
-
|
| 177 |
-
if result.returncode == 0:
|
| 178 |
-
username = result.stdout.strip()
|
| 179 |
-
print(f"β
HF token valid: {username}")
|
| 180 |
-
return True
|
| 181 |
-
else:
|
| 182 |
-
print(f"β HF token invalid: {result.stderr}")
|
| 183 |
-
return False
|
| 184 |
-
except Exception as e:
|
| 185 |
-
print(f"β HF token test failed: {e}")
|
| 186 |
-
return False
|
| 187 |
-
|
| 188 |
-
def test_pipeline_components():
|
| 189 |
-
"""Test individual pipeline components"""
|
| 190 |
-
print("\nπ Testing pipeline components...")
|
| 191 |
-
|
| 192 |
-
# Test setup script
|
| 193 |
-
if Path('setup_launch.py').exists():
|
| 194 |
-
print("β
setup_launch.py found")
|
| 195 |
-
else:
|
| 196 |
-
print("β setup_launch.py not found")
|
| 197 |
-
return False
|
| 198 |
-
|
| 199 |
-
# Test launch script
|
| 200 |
-
if Path('launch.sh').exists():
|
| 201 |
-
print("β
launch.sh found")
|
| 202 |
-
else:
|
| 203 |
-
print("β launch.sh not found")
|
| 204 |
-
return False
|
| 205 |
-
|
| 206 |
-
# Test README
|
| 207 |
-
if Path('README_END_TO_END.md').exists():
|
| 208 |
-
print("β
README_END_TO_END.md found")
|
| 209 |
-
else:
|
| 210 |
-
print("β README_END_TO_END.md not found")
|
| 211 |
-
return False
|
| 212 |
-
|
| 213 |
-
return True
|
| 214 |
-
|
| 215 |
-
def main():
|
| 216 |
-
"""Run all tests"""
|
| 217 |
-
print("π§ͺ SmolLM3 End-to-End Pipeline Test")
|
| 218 |
-
print("=" * 50)
|
| 219 |
-
|
| 220 |
-
tests = [
|
| 221 |
-
test_imports,
|
| 222 |
-
test_local_modules,
|
| 223 |
-
test_scripts,
|
| 224 |
-
test_configs,
|
| 225 |
-
test_requirements,
|
| 226 |
-
test_cuda,
|
| 227 |
-
test_hf_token,
|
| 228 |
-
test_pipeline_components
|
| 229 |
-
]
|
| 230 |
-
|
| 231 |
-
passed = 0
|
| 232 |
-
total = len(tests)
|
| 233 |
-
|
| 234 |
-
for test in tests:
|
| 235 |
-
try:
|
| 236 |
-
if test():
|
| 237 |
-
passed += 1
|
| 238 |
-
except Exception as e:
|
| 239 |
-
print(f"β Test failed with exception: {e}")
|
| 240 |
-
|
| 241 |
-
print(f"\nπ Test Results: {passed}/{total} passed")
|
| 242 |
-
|
| 243 |
-
if passed == total:
|
| 244 |
-
print("π All tests passed! Pipeline is ready to use.")
|
| 245 |
-
print("\nπ Next steps:")
|
| 246 |
-
print("1. Run: python setup_launch.py")
|
| 247 |
-
print("2. Run: chmod +x launch.sh")
|
| 248 |
-
print("3. Run: ./launch.sh")
|
| 249 |
-
else:
|
| 250 |
-
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
| 251 |
-
print("\nπ§ Common fixes:")
|
| 252 |
-
print("1. Install missing packages: pip install -r requirements/requirements_core.txt")
|
| 253 |
-
print("2. Set HF_TOKEN environment variable")
|
| 254 |
-
print("3. Check CUDA installation")
|
| 255 |
-
|
| 256 |
-
return passed == total
|
| 257 |
-
|
| 258 |
-
if __name__ == "__main__":
|
| 259 |
-
success = main()
|
| 260 |
-
sys.exit(0 if success else 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_deployment.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify deployment scripts work correctly
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Add project root to path
|
| 11 |
+
project_root = Path(__file__).parent
|
| 12 |
+
sys.path.insert(0, str(project_root))
|
| 13 |
+
|
| 14 |
+
def test_templates_exist():
|
| 15 |
+
"""Test that all required template files exist"""
|
| 16 |
+
print("π Testing template files...")
|
| 17 |
+
|
| 18 |
+
# Check spaces templates
|
| 19 |
+
spaces_dir = project_root / "templates" / "spaces"
|
| 20 |
+
spaces_files = ["app.py", "requirements.txt", "README.md"]
|
| 21 |
+
|
| 22 |
+
for file_name in spaces_files:
|
| 23 |
+
file_path = spaces_dir / file_name
|
| 24 |
+
if file_path.exists():
|
| 25 |
+
print(f"β
{file_path}")
|
| 26 |
+
else:
|
| 27 |
+
print(f"β {file_path} not found")
|
| 28 |
+
return False
|
| 29 |
+
|
| 30 |
+
# Check datasets templates
|
| 31 |
+
datasets_dir = project_root / "templates" / "datasets"
|
| 32 |
+
datasets_files = ["readme.md"]
|
| 33 |
+
|
| 34 |
+
for file_name in datasets_files:
|
| 35 |
+
file_path = datasets_dir / file_name
|
| 36 |
+
if file_path.exists():
|
| 37 |
+
print(f"β
{file_path}")
|
| 38 |
+
else:
|
| 39 |
+
print(f"β {file_path} not found")
|
| 40 |
+
return False
|
| 41 |
+
|
| 42 |
+
return True
|
| 43 |
+
|
| 44 |
+
def test_deployment_scripts():
|
| 45 |
+
"""Test that deployment scripts can import required modules"""
|
| 46 |
+
print("\nπ Testing deployment scripts...")
|
| 47 |
+
|
| 48 |
+
try:
|
| 49 |
+
# Test space deployment script
|
| 50 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
| 51 |
+
print("β
deploy_trackio_space.py imports successfully")
|
| 52 |
+
|
| 53 |
+
# Test dataset setup script
|
| 54 |
+
from scripts.dataset_tonic.setup_hf_dataset import setup_trackio_dataset
|
| 55 |
+
print("β
setup_hf_dataset.py imports successfully")
|
| 56 |
+
|
| 57 |
+
return True
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"β Deployment script test failed: {e}")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
def test_file_copying():
|
| 64 |
+
"""Test that file copying logic works"""
|
| 65 |
+
print("\nπ Testing file copying logic...")
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
# Test space deployment file copying
|
| 69 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
| 70 |
+
|
| 71 |
+
# Create a mock deployer
|
| 72 |
+
deployer = TrackioSpaceDeployer("test-space", "test-user", "test-token")
|
| 73 |
+
|
| 74 |
+
# Test that templates directory exists
|
| 75 |
+
project_root = Path(__file__).parent
|
| 76 |
+
templates_dir = project_root / "templates" / "spaces"
|
| 77 |
+
|
| 78 |
+
if templates_dir.exists():
|
| 79 |
+
print(f"β
Templates directory exists: {templates_dir}")
|
| 80 |
+
|
| 81 |
+
# Check that required files exist
|
| 82 |
+
for file_name in ["app.py", "requirements.txt", "README.md"]:
|
| 83 |
+
file_path = templates_dir / file_name
|
| 84 |
+
if file_path.exists():
|
| 85 |
+
print(f"β
Template file exists: {file_path}")
|
| 86 |
+
else:
|
| 87 |
+
print(f"β Template file missing: {file_path}")
|
| 88 |
+
return False
|
| 89 |
+
else:
|
| 90 |
+
print(f"β Templates directory missing: {templates_dir}")
|
| 91 |
+
return False
|
| 92 |
+
|
| 93 |
+
return True
|
| 94 |
+
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"β File copying test failed: {e}")
|
| 97 |
+
return False
|
| 98 |
+
|
| 99 |
+
def test_readme_inclusion():
|
| 100 |
+
"""Test that README inclusion logic works"""
|
| 101 |
+
print("\nπ Testing README inclusion...")
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
# Test dataset README inclusion
|
| 105 |
+
from scripts.dataset_tonic.setup_hf_dataset import setup_trackio_dataset
|
| 106 |
+
|
| 107 |
+
# Check that README template exists
|
| 108 |
+
project_root = Path(__file__).parent
|
| 109 |
+
readme_path = project_root / "templates" / "datasets" / "readme.md"
|
| 110 |
+
|
| 111 |
+
if readme_path.exists():
|
| 112 |
+
print(f"β
README template exists: {readme_path}")
|
| 113 |
+
|
| 114 |
+
# Check README content
|
| 115 |
+
with open(readme_path, 'r', encoding='utf-8') as f:
|
| 116 |
+
content = f.read()
|
| 117 |
+
if len(content.strip()) > 0:
|
| 118 |
+
print(f"β
README has content ({len(content)} characters)")
|
| 119 |
+
else:
|
| 120 |
+
print(f"β οΈ README is empty")
|
| 121 |
+
else:
|
| 122 |
+
print(f"β README template missing: {readme_path}")
|
| 123 |
+
return False
|
| 124 |
+
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"β README inclusion test failed: {e}")
|
| 129 |
+
return False
|
| 130 |
+
|
| 131 |
+
def main():
|
| 132 |
+
"""Run all tests"""
|
| 133 |
+
print("π Testing Deployment Scripts")
|
| 134 |
+
print("=" * 50)
|
| 135 |
+
|
| 136 |
+
tests = [
|
| 137 |
+
test_templates_exist,
|
| 138 |
+
test_deployment_scripts,
|
| 139 |
+
test_file_copying,
|
| 140 |
+
test_readme_inclusion
|
| 141 |
+
]
|
| 142 |
+
|
| 143 |
+
passed = 0
|
| 144 |
+
total = len(tests)
|
| 145 |
+
|
| 146 |
+
for test in tests:
|
| 147 |
+
if test():
|
| 148 |
+
passed += 1
|
| 149 |
+
else:
|
| 150 |
+
print(f"β Test failed: {test.__name__}")
|
| 151 |
+
|
| 152 |
+
print(f"\n{'='*50}")
|
| 153 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
| 154 |
+
|
| 155 |
+
if passed == total:
|
| 156 |
+
print("π All tests passed! Deployment scripts are ready to use.")
|
| 157 |
+
print("\nπ Deployment workflow:")
|
| 158 |
+
print("1. Space deployment will copy files from templates/spaces/")
|
| 159 |
+
print("2. Dataset creation will include README from templates/datasets/")
|
| 160 |
+
print("3. Both scripts will properly upload all required files")
|
| 161 |
+
return 0
|
| 162 |
+
else:
|
| 163 |
+
print("β Some tests failed. Please fix the issues before deployment.")
|
| 164 |
+
return 1
|
| 165 |
+
|
| 166 |
+
if __name__ == "__main__":
|
| 167 |
+
exit(main())
|
test_formatting_fix.py β tests/test_formatting_fix.py
RENAMED
|
File without changes
|
tests/test_pipeline.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Quick test script to verify pipeline components
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Add project root to path
|
| 11 |
+
project_root = Path(__file__).parent
|
| 12 |
+
sys.path.insert(0, str(project_root))
|
| 13 |
+
|
| 14 |
+
def test_imports():
|
| 15 |
+
"""Test that all required modules can be imported"""
|
| 16 |
+
print("π Testing imports...")
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
from src.config import get_config
|
| 20 |
+
print("β
src.config imported successfully")
|
| 21 |
+
except ImportError as e:
|
| 22 |
+
print(f"β Failed to import src.config: {e}")
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
try:
|
| 26 |
+
from src.model import SmolLM3Model
|
| 27 |
+
print("β
src.model imported successfully")
|
| 28 |
+
except ImportError as e:
|
| 29 |
+
print(f"β Failed to import src.model: {e}")
|
| 30 |
+
return False
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
from src.data import SmolLM3Dataset
|
| 34 |
+
print("β
src.data imported successfully")
|
| 35 |
+
except ImportError as e:
|
| 36 |
+
print(f"β Failed to import src.data: {e}")
|
| 37 |
+
return False
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
from src.trainer import SmolLM3Trainer
|
| 41 |
+
print("β
src.trainer imported successfully")
|
| 42 |
+
except ImportError as e:
|
| 43 |
+
print(f"β Failed to import src.trainer: {e}")
|
| 44 |
+
return False
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
from src.monitoring import create_monitor_from_config
|
| 48 |
+
print("β
src.monitoring imported successfully")
|
| 49 |
+
except ImportError as e:
|
| 50 |
+
print(f"β Failed to import src.monitoring: {e}")
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
return True
|
| 54 |
+
|
| 55 |
+
def test_config_loading():
|
| 56 |
+
"""Test that configuration files can be loaded"""
|
| 57 |
+
print("\nπ Testing config loading...")
|
| 58 |
+
|
| 59 |
+
config_files = [
|
| 60 |
+
"config/train_smollm3_h100_lightweight.py",
|
| 61 |
+
"config/train_smollm3_openhermes_fr_a100_large.py",
|
| 62 |
+
"config/train_smollm3.py"
|
| 63 |
+
]
|
| 64 |
+
|
| 65 |
+
for config_file in config_files:
|
| 66 |
+
if os.path.exists(config_file):
|
| 67 |
+
try:
|
| 68 |
+
config = get_config(config_file)
|
| 69 |
+
print(f"β
{config_file} loaded successfully")
|
| 70 |
+
print(f" Model: {config.model_name}")
|
| 71 |
+
print(f" Batch size: {config.batch_size}")
|
| 72 |
+
if hasattr(config, 'sample_size') and config.sample_size:
|
| 73 |
+
print(f" Sample size: {config.sample_size}")
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"β Failed to load {config_file}: {e}")
|
| 76 |
+
return False
|
| 77 |
+
else:
|
| 78 |
+
print(f"β οΈ {config_file} not found")
|
| 79 |
+
|
| 80 |
+
return True
|
| 81 |
+
|
| 82 |
+
def test_dataset_sampling():
|
| 83 |
+
"""Test dataset sampling functionality"""
|
| 84 |
+
print("\nπ Testing dataset sampling...")
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
from datasets import load_dataset
|
| 88 |
+
from transformers import AutoTokenizer
|
| 89 |
+
|
| 90 |
+
# Load a small test dataset
|
| 91 |
+
print("Loading test dataset...")
|
| 92 |
+
dataset = load_dataset("legmlai/openhermes-fr", split="train[:100]")
|
| 93 |
+
print(f"Loaded {len(dataset)} samples")
|
| 94 |
+
|
| 95 |
+
# Test tokenizer
|
| 96 |
+
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM3-3B")
|
| 97 |
+
print("β
Tokenizer loaded successfully")
|
| 98 |
+
|
| 99 |
+
# Test dataset with sampling
|
| 100 |
+
from src.data import SmolLM3Dataset
|
| 101 |
+
|
| 102 |
+
dataset_handler = SmolLM3Dataset(
|
| 103 |
+
data_path="legmlai/openhermes-fr",
|
| 104 |
+
tokenizer=tokenizer,
|
| 105 |
+
max_seq_length=1024,
|
| 106 |
+
sample_size=50, # Sample 50 from the 100 we loaded
|
| 107 |
+
sample_seed=42
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
train_dataset = dataset_handler.get_train_dataset()
|
| 111 |
+
print(f"β
Dataset sampling works: {len(train_dataset)} samples")
|
| 112 |
+
|
| 113 |
+
return True
|
| 114 |
+
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"β Dataset sampling test failed: {e}")
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
def main():
|
| 120 |
+
"""Run all tests"""
|
| 121 |
+
print("π Testing SmolLM3 Pipeline Components")
|
| 122 |
+
print("=" * 50)
|
| 123 |
+
|
| 124 |
+
tests = [
|
| 125 |
+
test_imports,
|
| 126 |
+
test_config_loading,
|
| 127 |
+
test_dataset_sampling
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
passed = 0
|
| 131 |
+
total = len(tests)
|
| 132 |
+
|
| 133 |
+
for test in tests:
|
| 134 |
+
if test():
|
| 135 |
+
passed += 1
|
| 136 |
+
else:
|
| 137 |
+
print(f"β Test failed: {test.__name__}")
|
| 138 |
+
|
| 139 |
+
print(f"\n{'='*50}")
|
| 140 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
| 141 |
+
|
| 142 |
+
if passed == total:
|
| 143 |
+
print("π All tests passed! Pipeline is ready to run.")
|
| 144 |
+
return 0
|
| 145 |
+
else:
|
| 146 |
+
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
| 147 |
+
return 1
|
| 148 |
+
|
| 149 |
+
if __name__ == "__main__":
|
| 150 |
+
exit(main())
|
tests/test_readme_template.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify README template replacement works correctly
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Add project root to path
|
| 11 |
+
project_root = Path(__file__).parent
|
| 12 |
+
sys.path.insert(0, str(project_root))
|
| 13 |
+
|
| 14 |
+
def test_readme_template():
|
| 15 |
+
"""Test README template replacement"""
|
| 16 |
+
print("π Testing README template replacement...")
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
# Get template path
|
| 20 |
+
templates_dir = project_root / "templates" / "spaces"
|
| 21 |
+
readme_template_path = templates_dir / "README.md"
|
| 22 |
+
|
| 23 |
+
if not readme_template_path.exists():
|
| 24 |
+
print(f"β README template not found: {readme_template_path}")
|
| 25 |
+
return False
|
| 26 |
+
|
| 27 |
+
# Read template
|
| 28 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
| 29 |
+
template_content = f.read()
|
| 30 |
+
|
| 31 |
+
print(f"β
README template loaded ({len(template_content)} characters)")
|
| 32 |
+
|
| 33 |
+
# Test placeholder replacement
|
| 34 |
+
test_space_url = "https://huggingface.co/spaces/test-user/test-space"
|
| 35 |
+
replaced_content = template_content.replace("{SPACE_URL}", test_space_url)
|
| 36 |
+
|
| 37 |
+
if "{SPACE_URL}" in replaced_content:
|
| 38 |
+
print("β Placeholder replacement failed")
|
| 39 |
+
return False
|
| 40 |
+
|
| 41 |
+
if test_space_url not in replaced_content:
|
| 42 |
+
print("β Space URL not found in replaced content")
|
| 43 |
+
return False
|
| 44 |
+
|
| 45 |
+
print("β
Placeholder replacement works correctly")
|
| 46 |
+
print(f"β
Space URL: {test_space_url}")
|
| 47 |
+
|
| 48 |
+
return True
|
| 49 |
+
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print(f"β README template test failed: {e}")
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
def test_deployment_readme():
|
| 55 |
+
"""Test that deployment script can use README template"""
|
| 56 |
+
print("\nπ Testing deployment script README usage...")
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
from scripts.trackio_tonic.deploy_trackio_space import TrackioSpaceDeployer
|
| 60 |
+
|
| 61 |
+
# Create a mock deployer
|
| 62 |
+
deployer = TrackioSpaceDeployer("test-space", "test-user", "test-token")
|
| 63 |
+
|
| 64 |
+
# Test that templates directory exists
|
| 65 |
+
project_root = Path(__file__).parent
|
| 66 |
+
templates_dir = project_root / "templates" / "spaces"
|
| 67 |
+
readme_template_path = templates_dir / "README.md"
|
| 68 |
+
|
| 69 |
+
if readme_template_path.exists():
|
| 70 |
+
print(f"β
README template exists: {readme_template_path}")
|
| 71 |
+
|
| 72 |
+
# Test reading template
|
| 73 |
+
with open(readme_template_path, 'r', encoding='utf-8') as f:
|
| 74 |
+
content = f.read()
|
| 75 |
+
if "{SPACE_URL}" in content:
|
| 76 |
+
print("β
Template contains placeholder")
|
| 77 |
+
else:
|
| 78 |
+
print("β οΈ Template missing placeholder")
|
| 79 |
+
|
| 80 |
+
return True
|
| 81 |
+
else:
|
| 82 |
+
print(f"β README template missing: {readme_template_path}")
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
except Exception as e:
|
| 86 |
+
print(f"β Deployment README test failed: {e}")
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
def main():
|
| 90 |
+
"""Run all tests"""
|
| 91 |
+
print("π Testing README Template System")
|
| 92 |
+
print("=" * 50)
|
| 93 |
+
|
| 94 |
+
tests = [
|
| 95 |
+
test_readme_template,
|
| 96 |
+
test_deployment_readme
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
passed = 0
|
| 100 |
+
total = len(tests)
|
| 101 |
+
|
| 102 |
+
for test in tests:
|
| 103 |
+
if test():
|
| 104 |
+
passed += 1
|
| 105 |
+
else:
|
| 106 |
+
print(f"β Test failed: {test.__name__}")
|
| 107 |
+
|
| 108 |
+
print(f"\n{'='*50}")
|
| 109 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
| 110 |
+
|
| 111 |
+
if passed == total:
|
| 112 |
+
print("π All tests passed! README template system is working correctly.")
|
| 113 |
+
print("\nπ Template workflow:")
|
| 114 |
+
print("1. README template is read from templates/spaces/README.md")
|
| 115 |
+
print("2. {SPACE_URL} placeholder is replaced with actual space URL")
|
| 116 |
+
print("3. Customized README is written to the space")
|
| 117 |
+
return 0
|
| 118 |
+
else:
|
| 119 |
+
print("β Some tests failed. Please fix the issues before deployment.")
|
| 120 |
+
return 1
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
exit(main())
|
tests/test_simple_pipeline.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Simple test script for the simplified pipeline approach
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
# Add project root to path
|
| 11 |
+
project_root = Path(__file__).parent
|
| 12 |
+
sys.path.insert(0, str(project_root))
|
| 13 |
+
|
| 14 |
+
def test_simple_training_script():
|
| 15 |
+
"""Test the simplified training script"""
|
| 16 |
+
print("π Testing simplified training script...")
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
# Test that the training script can be imported
|
| 20 |
+
from scripts.training.train import main as train_main
|
| 21 |
+
print("β
Training script imported successfully")
|
| 22 |
+
|
| 23 |
+
# Test config loading
|
| 24 |
+
from config.train_smollm3_h100_lightweight import config as h100_config
|
| 25 |
+
print("β
H100 lightweight config loaded successfully")
|
| 26 |
+
print(f" Model: {h100_config.model_name}")
|
| 27 |
+
print(f" Batch size: {h100_config.batch_size}")
|
| 28 |
+
print(f" Sample size: {h100_config.sample_size}")
|
| 29 |
+
|
| 30 |
+
return True
|
| 31 |
+
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"β Training script test failed: {e}")
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
def test_config_files():
|
| 37 |
+
"""Test that all required config files exist"""
|
| 38 |
+
print("\nπ Testing config files...")
|
| 39 |
+
|
| 40 |
+
config_files = [
|
| 41 |
+
"config/train_smollm3_h100_lightweight.py",
|
| 42 |
+
"config/train_smollm3_openhermes_fr_a100_large.py",
|
| 43 |
+
"config/train_smollm3_openhermes_fr_a100_multiple_passes.py"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
for config_file in config_files:
|
| 47 |
+
if os.path.exists(config_file):
|
| 48 |
+
print(f"β
{config_file}")
|
| 49 |
+
else:
|
| 50 |
+
print(f"β {config_file} not found")
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
return True
|
| 54 |
+
|
| 55 |
+
def test_scripts():
|
| 56 |
+
"""Test that all required scripts exist"""
|
| 57 |
+
print("\nπ Testing scripts...")
|
| 58 |
+
|
| 59 |
+
script_files = [
|
| 60 |
+
"scripts/training/train.py",
|
| 61 |
+
"scripts/trackio_tonic/deploy_trackio_space.py",
|
| 62 |
+
"scripts/trackio_tonic/configure_trackio.py",
|
| 63 |
+
"scripts/dataset_tonic/setup_hf_dataset.py",
|
| 64 |
+
"scripts/model_tonic/push_to_huggingface.py"
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
for script_file in script_files:
|
| 68 |
+
if os.path.exists(script_file):
|
| 69 |
+
print(f"β
{script_file}")
|
| 70 |
+
else:
|
| 71 |
+
print(f"β {script_file} not found")
|
| 72 |
+
return False
|
| 73 |
+
|
| 74 |
+
return True
|
| 75 |
+
|
| 76 |
+
def test_launch_script():
|
| 77 |
+
"""Test that the launch script exists and is executable"""
|
| 78 |
+
print("\nπ Testing launch script...")
|
| 79 |
+
|
| 80 |
+
launch_script = "launch.sh"
|
| 81 |
+
if os.path.exists(launch_script):
|
| 82 |
+
print(f"β
{launch_script} exists")
|
| 83 |
+
|
| 84 |
+
# Check if it's executable
|
| 85 |
+
if os.access(launch_script, os.X_OK):
|
| 86 |
+
print(f"β
{launch_script} is executable")
|
| 87 |
+
else:
|
| 88 |
+
print(f"β οΈ {launch_script} is not executable (run: chmod +x launch.sh)")
|
| 89 |
+
|
| 90 |
+
return True
|
| 91 |
+
else:
|
| 92 |
+
print(f"β {launch_script} not found")
|
| 93 |
+
return False
|
| 94 |
+
|
| 95 |
+
def main():
|
| 96 |
+
"""Run all tests"""
|
| 97 |
+
print("π Testing Simplified SmolLM3 Pipeline")
|
| 98 |
+
print("=" * 50)
|
| 99 |
+
|
| 100 |
+
tests = [
|
| 101 |
+
test_simple_training_script,
|
| 102 |
+
test_config_files,
|
| 103 |
+
test_scripts,
|
| 104 |
+
test_launch_script
|
| 105 |
+
]
|
| 106 |
+
|
| 107 |
+
passed = 0
|
| 108 |
+
total = len(tests)
|
| 109 |
+
|
| 110 |
+
for test in tests:
|
| 111 |
+
if test():
|
| 112 |
+
passed += 1
|
| 113 |
+
else:
|
| 114 |
+
print(f"β Test failed: {test.__name__}")
|
| 115 |
+
|
| 116 |
+
print(f"\n{'='*50}")
|
| 117 |
+
print(f"π Test Results: {passed}/{total} tests passed")
|
| 118 |
+
|
| 119 |
+
if passed == total:
|
| 120 |
+
print("π All tests passed! Simplified pipeline is ready to run.")
|
| 121 |
+
print("\nπ To run the pipeline:")
|
| 122 |
+
print("1. chmod +x launch.sh")
|
| 123 |
+
print("2. ./launch.sh")
|
| 124 |
+
return 0
|
| 125 |
+
else:
|
| 126 |
+
print("β Some tests failed. Please fix the issues before running the pipeline.")
|
| 127 |
+
return 1
|
| 128 |
+
|
| 129 |
+
if __name__ == "__main__":
|
| 130 |
+
exit(main())
|