File size: 6,072 Bytes
6f0279c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#!/usr/bin/env python3
"""
Test script for Trackio interface
Demonstrates how to use the enhanced monitoring interface
"""

import requests
import json
import time
from datetime import datetime

def test_trackio_interface():
    """Test the Trackio interface with realistic SmolLM3 training data"""
    
    # Trackio Space URL (replace with your actual URL)
    trackio_url = "https://tonic-test-trackio-test.hf.space"
    
    print("πŸš€ Testing Trackio Interface")
    print("=" * 50)
    
    # Step 1: Create an experiment
    print("\n1. Creating experiment...")
    experiment_name = "smollm3_openhermes_fr_balanced_test"
    experiment_description = "SmolLM3 fine-tuning on OpenHermes-FR dataset with balanced A100 configuration"
    
    # For demonstration, we'll simulate the API calls
    # In reality, these would be HTTP requests to your Trackio Space
    
    print(f"βœ… Created experiment: {experiment_name}")
    experiment_id = f"exp_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    print(f"   Experiment ID: {experiment_id}")
    
    # Step 2: Log parameters
    print("\n2. Logging experiment parameters...")
    parameters = {
        "model_name": "HuggingFaceTB/SmolLM3-3B",
        "dataset_name": "legmlai/openhermes-fr",
        "batch_size": 8,
        "gradient_accumulation_steps": 16,
        "effective_batch_size": 128,
        "learning_rate": 3.5e-6,
        "max_iters": 18000,
        "max_seq_length": 12288,
        "mixed_precision": "bf16",
        "use_flash_attention": True,
        "use_gradient_checkpointing": False,
        "optimizer": "adamw_torch",
        "scheduler": "cosine",
        "warmup_steps": 1200,
        "save_steps": 2000,
        "eval_steps": 1000,
        "logging_steps": 25,
        "no_think_system_message": True
    }
    
    print("βœ… Logged parameters:")
    for key, value in parameters.items():
        print(f"   {key}: {value}")
    
    # Step 3: Simulate training metrics
    print("\n3. Simulating training metrics...")
    
    # Simulate realistic training progression
    base_loss = 2.5
    steps = list(range(0, 1000, 50))  # Every 50 steps
    
    for i, step in enumerate(steps):
        # Simulate loss decreasing over time with some noise
        progress = step / 1000
        loss = base_loss * (0.1 + 0.9 * (1 - progress)) + 0.1 * (1 - progress) * (i % 3 - 1)
        
        # Simulate accuracy increasing
        accuracy = 0.2 + 0.7 * progress + 0.05 * (i % 2)
        
        # Simulate learning rate decay
        lr = 3.5e-6 * (0.9 ** (step // 200))
        
        # Simulate GPU metrics
        gpu_memory = 20 + 5 * (0.8 + 0.2 * (i % 4) / 4)
        gpu_utilization = 85 + 10 * (i % 3 - 1)
        
        # Simulate training time
        training_time = 0.4 + 0.2 * (i % 2)
        
        metrics = {
            "loss": round(loss, 4),
            "accuracy": round(accuracy, 4),
            "learning_rate": round(lr, 8),
            "gpu_memory_gb": round(gpu_memory, 2),
            "gpu_utilization_percent": round(gpu_utilization, 1),
            "training_time_per_step": round(training_time, 3),
            "step": step
        }
        
        print(f"   Step {step}: Loss={metrics['loss']:.4f}, Accuracy={metrics['accuracy']:.4f}, LR={metrics['learning_rate']:.2e}")
        
        # In reality, this would be an HTTP POST to your Trackio Space
        # requests.post(f"{trackio_url}/log_metrics", json={
        #     "experiment_id": experiment_id,
        #     "metrics": metrics,
        #     "step": step
        # })
        
        time.sleep(0.1)  # Simulate processing time
    
    # Step 4: Log final results
    print("\n4. Logging final results...")
    final_results = {
        "final_loss": 0.234,
        "final_accuracy": 0.892,
        "total_training_time_hours": 4.5,
        "total_steps": 1000,
        "model_size_gb": 6.2,
        "training_completed": True,
        "checkpoint_path": "./outputs/balanced/checkpoint-1000"
    }
    
    print("βœ… Final results:")
    for key, value in final_results.items():
        print(f"   {key}: {value}")
    
    # Step 5: Update experiment status
    print("\n5. Updating experiment status...")
    status = "completed"
    print(f"βœ… Experiment status updated to: {status}")
    
    print("\n" + "=" * 50)
    print("πŸŽ‰ Test completed successfully!")
    print(f"πŸ“Š View your experiment at: {trackio_url}")
    print(f"πŸ” Experiment ID: {experiment_id}")
    print("\nNext steps:")
    print("1. Visit your Trackio Space")
    print("2. Go to 'View Experiments' tab")
    print("3. Enter the experiment ID to see details")
    print("4. Go to 'Visualizations' tab to see plots")
    print("5. Use 'Demo Data' tab to generate more test data")

def show_interface_features():
    """Show what features are available in the enhanced interface"""
    
    print("\nπŸ“Š Enhanced Trackio Interface Features")
    print("=" * 50)
    
    features = [
        "βœ… Create experiments with detailed descriptions",
        "βœ… Log comprehensive training parameters",
        "βœ… Real-time metrics visualization with Plotly",
        "βœ… Multiple metric types: loss, accuracy, learning rate, GPU metrics",
        "βœ… Experiment comparison across multiple runs",
        "βœ… Demo data generation for testing",
        "βœ… Formatted experiment details with emojis and structure",
        "βœ… Status tracking (running, completed, failed, paused)",
        "βœ… Interactive plots with hover information",
        "βœ… Comprehensive experiment overview with statistics"
    ]
    
    for feature in features:
        print(feature)
    
    print("\n🎯 How to use with your SmolLM3 training:")
    print("1. Start your training with the monitoring enabled")
    print("2. Visit your Trackio Space during training")
    print("3. Watch real-time loss curves and metrics")
    print("4. Compare different training runs")
    print("5. Track GPU utilization and system metrics")

if __name__ == "__main__":
    test_trackio_interface()
    show_interface_features()