Spaces:
Running
Running
File size: 7,138 Bytes
75bcdb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
#!/usr/bin/env python3
"""
Diagnostic script for Trackio Space issues
Helps debug dataset loading and API client issues
"""
import os
import sys
import logging
# Add src directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'templates', 'spaces', 'trackio'))
# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def test_dataset_manager():
"""Test dataset manager functionality"""
try:
from dataset_utils import TrackioDatasetManager
# Test with environment variables
hf_token = os.environ.get('HF_TOKEN')
dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments')
if not hf_token:
logger.warning("β οΈ HF_TOKEN not found in environment")
return False
logger.info(f"π§ Testing dataset manager with repo: {dataset_repo}")
# Initialize dataset manager
manager = TrackioDatasetManager(dataset_repo, hf_token)
# Test loading experiments
experiments = manager.load_existing_experiments()
logger.info(f"π Loaded {len(experiments)} experiments from dataset")
# Test creating a sample experiment
sample_experiment = {
'experiment_id': f'test_diagnostic_{int(os.urandom(4).hex(), 16)}',
'name': 'Diagnostic Test Experiment',
'description': 'Test experiment created by diagnostic script',
'created_at': '2025-01-27T12:00:00',
'status': 'completed',
'metrics': '[]',
'parameters': '{"test": true}',
'artifacts': '[]',
'logs': '[]',
'last_updated': '2025-01-27T12:00:00'
}
# Test upsert functionality
logger.info("π§ͺ Testing experiment upsert...")
success = manager.upsert_experiment(sample_experiment)
if success:
logger.info("β
Dataset manager working correctly")
# Verify the experiment was saved
experiments_after = manager.load_existing_experiments()
logger.info(f"π After upsert: {len(experiments_after)} experiments")
return True
else:
logger.error("β Failed to upsert test experiment")
return False
except ImportError as e:
logger.error(f"β Failed to import dataset_utils: {e}")
return False
except Exception as e:
logger.error(f"β Dataset manager test failed: {e}")
return False
def test_trackio_space():
"""Test TrackioSpace initialization"""
try:
# Import the TrackioSpace class
from app import TrackioSpace
logger.info("π§ͺ Testing TrackioSpace initialization...")
# Initialize TrackioSpace
space = TrackioSpace()
logger.info(f"π TrackioSpace initialized with {len(space.experiments)} experiments")
logger.info(f"π‘οΈ Dataset manager available: {'Yes' if space.dataset_manager else 'No'}")
logger.info(f"π HF Token available: {'Yes' if space.hf_token else 'No'}")
logger.info(f"π Dataset repo: {space.dataset_repo}")
return True
except ImportError as e:
logger.error(f"β Failed to import TrackioSpace: {e}")
return False
except Exception as e:
logger.error(f"β TrackioSpace test failed: {e}")
return False
def test_environment():
"""Test environment configuration"""
logger.info("π Checking environment configuration...")
# Check required environment variables
env_vars = {
'HF_TOKEN': os.environ.get('HF_TOKEN'),
'TRACKIO_DATASET_REPO': os.environ.get('TRACKIO_DATASET_REPO'),
'TRACKIO_URL': os.environ.get('TRACKIO_URL'),
'SPACE_ID': os.environ.get('SPACE_ID')
}
for var, value in env_vars.items():
if value:
masked_value = value[:8] + '...' if len(value) > 8 and 'TOKEN' in var else value
logger.info(f"β
{var}: {masked_value}")
else:
logger.warning(f"β οΈ {var}: Not set")
# Check if running on HF Spaces
is_hf_spaces = bool(os.environ.get('SPACE_ID'))
logger.info(f"π Running on HF Spaces: {'Yes' if is_hf_spaces else 'No'}")
return True
def fix_common_issues():
"""Suggest fixes for common issues"""
logger.info("π‘ Common issue fixes:")
# Check dataset repository format
dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', 'tonic/trackio-experiments')
if '/' not in dataset_repo:
logger.warning(f"β οΈ Dataset repo format issue: {dataset_repo} should be 'username/dataset-name'")
else:
logger.info(f"β
Dataset repo format looks good: {dataset_repo}")
# Check for URL issues
trackio_url = os.environ.get('TRACKIO_URL', 'https://tonic-test-trackio-test.hf.space')
if trackio_url.startswith('https://https://') or trackio_url.startswith('http://http://'):
logger.warning(f"β οΈ URL format issue detected: {trackio_url}")
fixed_url = trackio_url.replace('https://https://', 'https://').replace('http://http://', 'http://')
logger.info(f"π‘ Fixed URL should be: {fixed_url}")
else:
logger.info(f"β
Trackio URL format looks good: {trackio_url}")
def main():
"""Run all diagnostic tests"""
logger.info("π§ Starting Trackio Space diagnostics...")
logger.info("=" * 60)
try:
# Test environment
test_environment()
logger.info("-" * 40)
# Test dataset manager
dataset_manager_ok = test_dataset_manager()
logger.info("-" * 40)
# Test TrackioSpace
trackio_space_ok = test_trackio_space()
logger.info("-" * 40)
# Suggest fixes
fix_common_issues()
logger.info("-" * 40)
# Summary
logger.info("π DIAGNOSTIC SUMMARY:")
logger.info(f"Dataset Manager: {'β
OK' if dataset_manager_ok else 'β Issues'}")
logger.info(f"TrackioSpace: {'β
OK' if trackio_space_ok else 'β Issues'}")
if dataset_manager_ok and trackio_space_ok:
logger.info("π All systems appear to be working correctly!")
logger.info("π‘ The issues in the logs might be related to:")
logger.info(" - Empty dataset (expected for new setup)")
logger.info(" - API client URL formatting (being auto-fixed)")
logger.info(" - Remote data access (falling back to local data)")
else:
logger.warning("β οΈ Some issues detected. Check the logs above for details.")
except Exception as e:
logger.error(f"β Diagnostic script failed: {e}")
return False
return True
if __name__ == "__main__":
main()
|