File size: 7,138 Bytes
75bcdb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/env python3
"""
Diagnostic script for Trackio Space issues
Helps debug dataset loading and API client issues
"""

import os
import sys
import logging

# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'templates', 'spaces', 'trackio'))

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def test_dataset_manager():
    """Test dataset manager functionality"""
    try:
        from dataset_utils import TrackioDatasetManager
        
        # Test with environment variables
        hf_token = os.environ.get('HF_TOKEN')
        dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments')
        
        if not hf_token:
            logger.warning("⚠️ HF_TOKEN not found in environment")
            return False
        
        logger.info(f"πŸ”§ Testing dataset manager with repo: {dataset_repo}")
        
        # Initialize dataset manager
        manager = TrackioDatasetManager(dataset_repo, hf_token)
        
        # Test loading experiments
        experiments = manager.load_existing_experiments()
        logger.info(f"πŸ“Š Loaded {len(experiments)} experiments from dataset")
        
        # Test creating a sample experiment
        sample_experiment = {
            'experiment_id': f'test_diagnostic_{int(os.urandom(4).hex(), 16)}',
            'name': 'Diagnostic Test Experiment',
            'description': 'Test experiment created by diagnostic script',
            'created_at': '2025-01-27T12:00:00',
            'status': 'completed',
            'metrics': '[]',
            'parameters': '{"test": true}',
            'artifacts': '[]',
            'logs': '[]',
            'last_updated': '2025-01-27T12:00:00'
        }
        
        # Test upsert functionality
        logger.info("πŸ§ͺ Testing experiment upsert...")
        success = manager.upsert_experiment(sample_experiment)
        
        if success:
            logger.info("βœ… Dataset manager working correctly")
            
            # Verify the experiment was saved
            experiments_after = manager.load_existing_experiments()
            logger.info(f"πŸ“Š After upsert: {len(experiments_after)} experiments")
            
            return True
        else:
            logger.error("❌ Failed to upsert test experiment")
            return False
            
    except ImportError as e:
        logger.error(f"❌ Failed to import dataset_utils: {e}")
        return False
    except Exception as e:
        logger.error(f"❌ Dataset manager test failed: {e}")
        return False

def test_trackio_space():
    """Test TrackioSpace initialization"""
    try:
        # Import the TrackioSpace class
        from app import TrackioSpace
        
        logger.info("πŸ§ͺ Testing TrackioSpace initialization...")
        
        # Initialize TrackioSpace
        space = TrackioSpace()
        
        logger.info(f"πŸ“Š TrackioSpace initialized with {len(space.experiments)} experiments")
        logger.info(f"πŸ›‘οΈ Dataset manager available: {'Yes' if space.dataset_manager else 'No'}")
        logger.info(f"πŸ”‘ HF Token available: {'Yes' if space.hf_token else 'No'}")
        logger.info(f"πŸ“‚ Dataset repo: {space.dataset_repo}")
        
        return True
        
    except ImportError as e:
        logger.error(f"❌ Failed to import TrackioSpace: {e}")
        return False
    except Exception as e:
        logger.error(f"❌ TrackioSpace test failed: {e}")
        return False

def test_environment():
    """Test environment configuration"""
    logger.info("πŸ” Checking environment configuration...")
    
    # Check required environment variables
    env_vars = {
        'HF_TOKEN': os.environ.get('HF_TOKEN'),
        'TRACKIO_DATASET_REPO': os.environ.get('TRACKIO_DATASET_REPO'),
        'TRACKIO_URL': os.environ.get('TRACKIO_URL'),
        'SPACE_ID': os.environ.get('SPACE_ID')
    }
    
    for var, value in env_vars.items():
        if value:
            masked_value = value[:8] + '...' if len(value) > 8 and 'TOKEN' in var else value
            logger.info(f"βœ… {var}: {masked_value}")
        else:
            logger.warning(f"⚠️ {var}: Not set")
    
    # Check if running on HF Spaces
    is_hf_spaces = bool(os.environ.get('SPACE_ID'))
    logger.info(f"πŸš€ Running on HF Spaces: {'Yes' if is_hf_spaces else 'No'}")
    
    return True

def fix_common_issues():
    """Suggest fixes for common issues"""
    logger.info("πŸ’‘ Common issue fixes:")
    
    # Check dataset repository format
    dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments')
    if '/' not in dataset_repo:
        logger.warning(f"⚠️ Dataset repo format issue: {dataset_repo} should be 'username/dataset-name'")
    else:
        logger.info(f"βœ… Dataset repo format looks good: {dataset_repo}")
    
    # Check for URL issues
    trackio_url = os.environ.get('TRACKIO_URL', 'https://tonic-test-trackio-test.hf.space')
    if trackio_url.startswith('https://https://') or trackio_url.startswith('http://http://'):
        logger.warning(f"⚠️ URL format issue detected: {trackio_url}")
        fixed_url = trackio_url.replace('https://https://', 'https://').replace('http://http://', 'http://')
        logger.info(f"πŸ’‘ Fixed URL should be: {fixed_url}")
    else:
        logger.info(f"βœ… Trackio URL format looks good: {trackio_url}")

def main():
    """Run all diagnostic tests"""
    logger.info("πŸ”§ Starting Trackio Space diagnostics...")
    logger.info("=" * 60)
    
    try:
        # Test environment
        test_environment()
        logger.info("-" * 40)
        
        # Test dataset manager
        dataset_manager_ok = test_dataset_manager()
        logger.info("-" * 40)
        
        # Test TrackioSpace
        trackio_space_ok = test_trackio_space()
        logger.info("-" * 40)
        
        # Suggest fixes
        fix_common_issues()
        logger.info("-" * 40)
        
        # Summary
        logger.info("πŸ“‹ DIAGNOSTIC SUMMARY:")
        logger.info(f"Dataset Manager: {'βœ… OK' if dataset_manager_ok else '❌ Issues'}")
        logger.info(f"TrackioSpace: {'βœ… OK' if trackio_space_ok else '❌ Issues'}")
        
        if dataset_manager_ok and trackio_space_ok:
            logger.info("πŸŽ‰ All systems appear to be working correctly!")
            logger.info("πŸ’‘ The issues in the logs might be related to:")
            logger.info("   - Empty dataset (expected for new setup)")
            logger.info("   - API client URL formatting (being auto-fixed)")
            logger.info("   - Remote data access (falling back to local data)")
        else:
            logger.warning("⚠️ Some issues detected. Check the logs above for details.")
        
    except Exception as e:
        logger.error(f"❌ Diagnostic script failed: {e}")
        return False
    
    return True

if __name__ == "__main__":
    main()