Spaces:
Running
Running
File size: 11,451 Bytes
ebe598e c417358 ebe598e c417358 40fd629 c417358 ebe598e 75bcdb3 2da5c04 75bcdb3 3c37508 c417358 2da5c04 c417358 2da5c04 ebe598e eb9e91f ebe598e eb9e91f 2da5c04 3c37508 ebe598e 75bcdb3 ebe598e 75bcdb3 3c37508 75bcdb3 3c37508 75bcdb3 3c37508 ebe598e eb9e91f ebe598e 2da5c04 ebe598e 2da5c04 3c37508 ebe598e 2da5c04 ebe598e 2da5c04 3c37508 2da5c04 ebe598e 2da5c04 3c37508 ebe598e eb9e91f ebe598e eb9e91f ebe598e 3c37508 2da5c04 ebe598e 75bcdb3 ebe598e eb9e91f ebe598e 3c37508 ebe598e 75bcdb3 3c37508 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 ebe598e 2da5c04 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 |
#!/usr/bin/env python3
"""
Configuration script for Trackio environment variables
"""
import os
import json
import subprocess
from datetime import datetime
def get_username_from_token(token: str) -> str:
"""Get username from HF token with fallback to CLI"""
try:
# Try API first
from huggingface_hub import HfApi
api = HfApi(token=token)
user_info = api.whoami()
# Handle different possible response formats
if isinstance(user_info, dict):
# Try different possible keys for username
username = (
user_info.get('name') or
user_info.get('username') or
user_info.get('user') or
None
)
elif isinstance(user_info, str):
# If whoami returns just the username as string
username = user_info
else:
username = None
if username:
print(f"β
Got username from API: {username}")
return username
else:
print("β οΈ Could not get username from API, trying CLI...")
return get_username_from_cli(token)
except Exception as e:
print(f"β οΈ API whoami failed: {e}")
print("β οΈ Trying CLI fallback...")
return get_username_from_cli(token)
def get_username_from_cli(token: str) -> str:
"""Fallback method to get username using CLI"""
try:
# Set HF token for CLI
os.environ['HF_TOKEN'] = token
# Get username using CLI
result = subprocess.run(
["hf", "whoami"],
capture_output=True,
text=True,
timeout=30
)
if result.returncode == 0:
username = result.stdout.strip()
if username:
print(f"β
Got username from CLI: {username}")
return username
else:
print("β οΈ CLI returned empty username")
return None
else:
print(f"β οΈ CLI whoami failed: {result.stderr}")
return None
except Exception as e:
print(f"β οΈ CLI fallback failed: {e}")
return None
def configure_trackio():
"""Configure Trackio environment variables"""
print("π§ Trackio Configuration")
print("=" * 40)
# Get HF token (single token approach)
hf_token = os.environ.get('HF_TOKEN')
# Use the single HF_TOKEN
active_token = hf_token
if active_token:
username = get_username_from_token(active_token)
if username:
print(f"β
Authenticated as: {username}")
else:
print("β οΈ Could not determine username from token")
username = 'unknown'
else:
username = 'unknown'
# Use username in dataset repository if not specified
dataset_repo = os.environ.get('TRACKIO_DATASET_REPO', f'{username}/trackio-experiments')
# Current configuration
# Never expose raw tokens in logs; only track presence
current_config = {
'HF_TOKEN': 'Set' if hf_token else 'Not set',
'TRACKIO_DATASET_REPO': dataset_repo,
'SPACE_ID': os.environ.get('SPACE_ID', 'Not set'),
'TRACKIO_URL': os.environ.get('TRACKIO_URL', 'Not set')
}
print("π Current Configuration:")
for key, value in current_config.items():
status = "β
" if value != "Not set" else "β"
print(f" {status} {key}: {value}")
print("\nπ― Configuration Options:")
print("1. Set HF_TOKEN - Main token (starts as write, switches to read after training)")
print("2. Set TRACKIO_DATASET_REPO - Dataset repository (optional)")
print("3. Set SPACE_ID - HF Space ID (auto-detected)")
print("4. Set TRACKIO_URL - Trackio Space URL (auto-detected)")
# Check if running on HF Spaces
if os.environ.get('SPACE_ID'):
print("\nπ Running on Hugging Face Spaces")
print(f" Space ID: {os.environ.get('SPACE_ID')}")
# Validate configuration
print("\nπ Configuration Validation:")
# Check HF_TOKEN
if current_config['HF_TOKEN'] != 'Not set':
print("β
HF_TOKEN is set")
print(" This allows training operations and dataset access")
print(" Note: Token will be automatically switched from write to read after training")
else:
print("β HF_TOKEN is not set")
print(" Please set HF_TOKEN for training operations")
print(" Get your token from: https://huggingface.co/settings/tokens")
# Check dataset repository
print(f"π Dataset Repository: {dataset_repo}")
# Test dataset access if token is available
test_token = hf_token
if test_token:
print("\nπ§ͺ Testing Dataset Access...")
try:
from datasets import load_dataset
from huggingface_hub import HfApi
# First check if the dataset repository exists
api = HfApi(token=test_token)
try:
# Try to get repository info
repo_info = api.repo_info(repo_id=dataset_repo, repo_type="dataset")
print(f"β
Dataset repository exists: {dataset_repo}")
# Try to load the dataset
dataset = load_dataset(dataset_repo, token=test_token)
print(f"β
Successfully loaded dataset: {dataset_repo}")
# Show experiment count
if 'train' in dataset:
experiment_count = len(dataset['train'])
print(f"π Found {experiment_count} experiments in dataset")
# Show sample experiments
if experiment_count > 0:
print("π¬ Sample experiments:")
for i, row in enumerate(dataset['train'][:3]): # Show first 3
exp_id = row.get('experiment_id', 'Unknown')
name = row.get('name', 'Unnamed')
print(f" {i+1}. {exp_id}: {name}")
except Exception as repo_error:
if "404" in str(repo_error) or "not found" in str(repo_error).lower():
print(f"β οΈ Dataset repository '{dataset_repo}' doesn't exist yet")
print(" This is normal if you haven't created the dataset yet")
print(" Run setup_hf_dataset.py to create the dataset")
else:
print(f"β Error accessing dataset repository: {repo_error}")
print(" Check that your token has read permissions")
except ImportError:
print("β Required packages not available")
print(" Install with: pip install datasets huggingface_hub")
except Exception as e:
print(f"β Failed to load dataset: {e}")
print(" This might be normal if the dataset doesn't exist yet")
print(" Run setup_hf_dataset.py to create the dataset")
else:
print("\nπ§ͺ Dataset Access Test:")
print("β Cannot test dataset access - no valid token set")
# Generate configuration file
config_file = "trackio_config.json"
# Do not persist raw tokens to disk; store only presence flag
config_data = {
'hf_token_set': bool(hf_token),
'dataset_repo': current_config['TRACKIO_DATASET_REPO'],
'space_id': current_config['SPACE_ID'],
'trackio_url': current_config['TRACKIO_URL'],
'username': username,
'last_updated': datetime.now().isoformat(),
'notes': 'Trackio configuration - HF_TOKEN starts as write token, switches to read token after training'
}
with open(config_file, 'w') as f:
json.dump(config_data, f, indent=2)
print(f"\nπΎ Configuration saved to: {config_file}")
# Show environment variable commands
print("\nπ Environment Variables for HF Space:")
print("=" * 50)
print(f"HF_TOKEN={'Set' if hf_token else 'Not set'}")
print(f"TRACKIO_DATASET_REPO={current_config['TRACKIO_DATASET_REPO']}")
if current_config['TRACKIO_URL'] != 'Not set':
print(f"TRACKIO_URL={current_config['TRACKIO_URL']}")
print("\nπ― Next Steps:")
print("1. HF_TOKEN will be automatically set during deployment (starts as write token)")
print("2. HF_TOKEN will be automatically switched to read token after training")
print("3. Optionally set TRACKIO_DATASET_REPO to use a different dataset")
print("4. Deploy your updated app.py to the Space")
print("5. Run setup_hf_dataset.py if you haven't created the dataset yet")
print("\nπ Usage Examples")
print("=" * 30)
print("1. Default Dataset")
print(f" Repository: {username}/trackio-experiments")
print(" Description: Default dataset for your experiments")
print(f" Set with: TRACKIO_DATASET_REPO={username}/trackio-experiments")
print()
print("2. Personal Dataset")
print(f" Repository: {username}/trackio-experiments")
print(" Description: Your personal experiment dataset")
print(f" Set with: TRACKIO_DATASET_REPO={username}/trackio-experiments")
print()
print("3. Team Dataset")
print(" Repository: your-org/team-experiments")
print(" Description: Shared dataset for team experiments")
print(" Set with: TRACKIO_DATASET_REPO=your-org/team-experiments")
print()
print("4. Project Dataset")
print(f" Repository: {username}/smollm3-experiments")
print(" Description: Dataset specific to SmolLM3 experiments")
print(f" Set with: TRACKIO_DATASET_REPO={username}/smollm3-experiments")
def show_usage_examples():
"""Show usage examples for different dataset configurations"""
examples = [
{
'name': 'Default Dataset',
'repo': 'your-username/trackio-experiments',
'description': 'Default dataset for your experiments',
'env_var': 'TRACKIO_DATASET_REPO=your-username/trackio-experiments'
},
{
'name': 'Personal Dataset',
'repo': 'your-username/trackio-experiments',
'description': 'Your personal experiment dataset',
'env_var': 'TRACKIO_DATASET_REPO=your-username/trackio-experiments'
},
{
'name': 'Team Dataset',
'repo': 'your-org/team-experiments',
'description': 'Shared dataset for team experiments',
'env_var': 'TRACKIO_DATASET_REPO=your-org/team-experiments'
},
{
'name': 'Project Dataset',
'repo': 'your-username/smollm3-experiments',
'description': 'Dataset specific to SmolLM3 experiments',
'env_var': 'TRACKIO_DATASET_REPO=your-username/smollm3-experiments'
}
]
print("\nπ Usage Examples")
print("=" * 30)
for i, example in enumerate(examples, 1):
print(f"{i}. {example['name']}")
print(f" Repository: {example['repo']}")
print(f" Description: {example['description']}")
print(f" Set with: {example['env_var']}")
print()
if __name__ == "__main__":
configure_trackio() |