Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Fix script to manually add missing experiments to trackio_experiments.json | |
| """ | |
| import json | |
| import os | |
| from datetime import datetime | |
| def add_missing_experiments(): | |
| """Add the missing experiments from the logs to the data file""" | |
| data_file = "trackio_experiments.json" | |
| # Load existing data | |
| if os.path.exists(data_file): | |
| with open(data_file, 'r') as f: | |
| data = json.load(f) | |
| else: | |
| data = { | |
| 'experiments': {}, | |
| 'current_experiment': None, | |
| 'last_updated': datetime.now().isoformat() | |
| } | |
| # Add the missing experiments based on the logs | |
| experiments = data['experiments'] | |
| # Experiment 1: exp_20250720_130853 | |
| experiments['exp_20250720_130853'] = { | |
| 'id': 'exp_20250720_130853', | |
| 'name': 'petite-elle-l-aime-3', | |
| 'description': 'SmolLM3 fine-tuning experiment', | |
| 'created_at': '2025-07-20T11:20:01.780908', | |
| 'status': 'running', | |
| 'metrics': [ | |
| { | |
| 'timestamp': '2025-07-20T11:20:01.780908', | |
| 'step': 25, | |
| 'metrics': { | |
| 'loss': 1.1659, | |
| 'grad_norm': 10.3125, | |
| 'learning_rate': 7e-08, | |
| 'num_tokens': 1642080.0, | |
| 'mean_token_accuracy': 0.75923578992486, | |
| 'epoch': 0.004851130919895701 | |
| } | |
| }, | |
| { | |
| 'timestamp': '2025-07-20T11:26:39.042155', | |
| 'step': 50, | |
| 'metrics': { | |
| 'loss': 1.165, | |
| 'grad_norm': 10.75, | |
| 'learning_rate': 1.4291666666666667e-07, | |
| 'num_tokens': 3324682.0, | |
| 'mean_token_accuracy': 0.7577659255266189, | |
| 'epoch': 0.009702261839791402 | |
| } | |
| }, | |
| { | |
| 'timestamp': '2025-07-20T11:33:16.203045', | |
| 'step': 75, | |
| 'metrics': { | |
| 'loss': 1.1639, | |
| 'grad_norm': 10.6875, | |
| 'learning_rate': 2.1583333333333334e-07, | |
| 'num_tokens': 4987941.0, | |
| 'mean_token_accuracy': 0.7581205774843692, | |
| 'epoch': 0.014553392759687101 | |
| } | |
| }, | |
| { | |
| 'timestamp': '2025-07-20T11:39:53.453917', | |
| 'step': 100, | |
| 'metrics': { | |
| 'loss': 1.1528, | |
| 'grad_norm': 10.75, | |
| 'learning_rate': 2.8875e-07, | |
| 'num_tokens': 6630190.0, | |
| 'mean_token_accuracy': 0.7614579878747463, | |
| 'epoch': 0.019404523679582803 | |
| } | |
| } | |
| ], | |
| 'parameters': { | |
| 'model_name': 'HuggingFaceTB/SmolLM3-3B', | |
| 'max_seq_length': 12288, | |
| 'use_flash_attention': True, | |
| 'use_gradient_checkpointing': False, | |
| 'batch_size': 8, | |
| 'gradient_accumulation_steps': 16, | |
| 'learning_rate': 3.5e-06, | |
| 'weight_decay': 0.01, | |
| 'warmup_steps': 1200, | |
| 'max_iters': 18000, | |
| 'eval_interval': 1000, | |
| 'log_interval': 25, | |
| 'save_interval': 2000, | |
| 'optimizer': 'adamw_torch', | |
| 'beta1': 0.9, | |
| 'beta2': 0.999, | |
| 'eps': 1e-08, | |
| 'scheduler': 'cosine', | |
| 'min_lr': 3.5e-07, | |
| 'fp16': False, | |
| 'bf16': True, | |
| 'ddp_backend': 'nccl', | |
| 'ddp_find_unused_parameters': False, | |
| 'save_steps': 2000, | |
| 'eval_steps': 1000, | |
| 'logging_steps': 25, | |
| 'save_total_limit': 5, | |
| 'eval_strategy': 'steps', | |
| 'metric_for_best_model': 'eval_loss', | |
| 'greater_is_better': False, | |
| 'load_best_model_at_end': True, | |
| 'data_dir': None, | |
| 'train_file': None, | |
| 'validation_file': None, | |
| 'test_file': None, | |
| 'use_chat_template': True, | |
| 'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True}, | |
| 'enable_tracking': True, | |
| 'trackio_url': 'https://tonic-test-trackio-test.hf.space', | |
| 'trackio_token': None, | |
| 'log_artifacts': True, | |
| 'log_metrics': True, | |
| 'log_config': True, | |
| 'experiment_name': 'petite-elle-l-aime-3', | |
| 'dataset_name': 'legmlai/openhermes-fr', | |
| 'dataset_split': 'train', | |
| 'input_field': 'prompt', | |
| 'target_field': 'accepted_completion', | |
| 'filter_bad_entries': True, | |
| 'bad_entry_field': 'bad_entry', | |
| 'packing': False, | |
| 'max_prompt_length': 12288, | |
| 'max_completion_length': 8192, | |
| 'truncation': True, | |
| 'dataloader_num_workers': 10, | |
| 'dataloader_pin_memory': True, | |
| 'dataloader_prefetch_factor': 3, | |
| 'max_grad_norm': 1.0, | |
| 'group_by_length': True | |
| }, | |
| 'artifacts': [], | |
| 'logs': [] | |
| } | |
| # Experiment 2: exp_20250720_134319 | |
| experiments['exp_20250720_134319'] = { | |
| 'id': 'exp_20250720_134319', | |
| 'name': 'petite-elle-l-aime-3-1', | |
| 'description': 'SmolLM3 fine-tuning experiment', | |
| 'created_at': '2025-07-20T11:54:31.993219', | |
| 'status': 'running', | |
| 'metrics': [ | |
| { | |
| 'timestamp': '2025-07-20T11:54:31.993219', | |
| 'step': 25, | |
| 'metrics': { | |
| 'loss': 1.166, | |
| 'grad_norm': 10.375, | |
| 'learning_rate': 7e-08, | |
| 'num_tokens': 1642080.0, | |
| 'mean_token_accuracy': 0.7590958896279335, | |
| 'epoch': 0.004851130919895701 | |
| } | |
| }, | |
| { | |
| 'timestamp': '2025-07-20T11:54:33.589487', | |
| 'step': 25, | |
| 'metrics': { | |
| 'gpu_0_memory_allocated': 17.202261447906494, | |
| 'gpu_0_memory_reserved': 75.474609375, | |
| 'gpu_0_utilization': 0, | |
| 'cpu_percent': 2.7, | |
| 'memory_percent': 10.1 | |
| } | |
| } | |
| ], | |
| 'parameters': { | |
| 'model_name': 'HuggingFaceTB/SmolLM3-3B', | |
| 'max_seq_length': 12288, | |
| 'use_flash_attention': True, | |
| 'use_gradient_checkpointing': False, | |
| 'batch_size': 8, | |
| 'gradient_accumulation_steps': 16, | |
| 'learning_rate': 3.5e-06, | |
| 'weight_decay': 0.01, | |
| 'warmup_steps': 1200, | |
| 'max_iters': 18000, | |
| 'eval_interval': 1000, | |
| 'log_interval': 25, | |
| 'save_interval': 2000, | |
| 'optimizer': 'adamw_torch', | |
| 'beta1': 0.9, | |
| 'beta2': 0.999, | |
| 'eps': 1e-08, | |
| 'scheduler': 'cosine', | |
| 'min_lr': 3.5e-07, | |
| 'fp16': False, | |
| 'bf16': True, | |
| 'ddp_backend': 'nccl', | |
| 'ddp_find_unused_parameters': False, | |
| 'save_steps': 2000, | |
| 'eval_steps': 1000, | |
| 'logging_steps': 25, | |
| 'save_total_limit': 5, | |
| 'eval_strategy': 'steps', | |
| 'metric_for_best_model': 'eval_loss', | |
| 'greater_is_better': False, | |
| 'load_best_model_at_end': True, | |
| 'data_dir': None, | |
| 'train_file': None, | |
| 'validation_file': None, | |
| 'test_file': None, | |
| 'use_chat_template': True, | |
| 'chat_template_kwargs': {'add_generation_prompt': True, 'no_think_system_message': True}, | |
| 'enable_tracking': True, | |
| 'trackio_url': 'https://tonic-test-trackio-test.hf.space', | |
| 'trackio_token': None, | |
| 'log_artifacts': True, | |
| 'log_metrics': True, | |
| 'log_config': True, | |
| 'experiment_name': 'petite-elle-l-aime-3-1', | |
| 'dataset_name': 'legmlai/openhermes-fr', | |
| 'dataset_split': 'train', | |
| 'input_field': 'prompt', | |
| 'target_field': 'accepted_completion', | |
| 'filter_bad_entries': True, | |
| 'bad_entry_field': 'bad_entry', | |
| 'packing': False, | |
| 'max_prompt_length': 12288, | |
| 'max_completion_length': 8192, | |
| 'truncation': True, | |
| 'dataloader_num_workers': 10, | |
| 'dataloader_pin_memory': True, | |
| 'dataloader_prefetch_factor': 3, | |
| 'max_grad_norm': 1.0, | |
| 'group_by_length': True | |
| }, | |
| 'artifacts': [], | |
| 'logs': [] | |
| } | |
| # Update metadata | |
| data['current_experiment'] = 'exp_20250720_134319' | |
| data['last_updated'] = datetime.now().isoformat() | |
| # Save the updated data | |
| with open(data_file, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| print("β Added missing experiments to trackio_experiments.json") | |
| print(f"π Total experiments: {len(experiments)}") | |
| print("π¬ Experiments added:") | |
| print(" - exp_20250720_130853 (petite-elle-l-aime-3)") | |
| print(" - exp_20250720_134319 (petite-elle-l-aime-3-1)") | |
| print("\nπ― You can now view these experiments in the Trackio interface!") | |
| if __name__ == "__main__": | |
| add_missing_experiments() |