#!/usr/bin/env python3 """ Test script for the SmolLM3 end-to-end pipeline Verifies all components are working correctly """ import os import sys import subprocess import importlib from pathlib import Path def test_imports(): """Test that all required modules can be imported""" print("๐Ÿ” Testing imports...") required_modules = [ 'torch', 'transformers', 'datasets', 'accelerate', 'trl', 'huggingface_hub', 'requests' ] failed_imports = [] for module in required_modules: try: importlib.import_module(module) print(f"โœ… {module}") except ImportError as e: print(f"โŒ {module}: {e}") failed_imports.append(module) if failed_imports: print(f"\nโŒ Failed imports: {failed_imports}") return False print("โœ… All imports successful") return True def test_local_modules(): """Test local module imports""" print("\n๐Ÿ” Testing local modules...") # Add src to path sys.path.append('src') local_modules = [ 'config', 'model', 'data', 'trainer', 'monitoring' ] failed_imports = [] for module in local_modules: try: importlib.import_module(module) print(f"โœ… {module}") except ImportError as e: print(f"โŒ {module}: {e}") failed_imports.append(module) if failed_imports: print(f"\nโŒ Failed local imports: {failed_imports}") return False print("โœ… All local modules imported successfully") return True def test_scripts(): """Test script availability""" print("\n๐Ÿ” Testing scripts...") required_scripts = [ 'scripts/trackio_tonic/deploy_trackio_space.py', 'scripts/trackio_tonic/configure_trackio.py', 'scripts/dataset_tonic/setup_hf_dataset.py', 'scripts/model_tonic/push_to_huggingface.py', 'src/train.py' ] missing_scripts = [] for script in required_scripts: if Path(script).exists(): print(f"โœ… {script}") else: print(f"โŒ {script}") missing_scripts.append(script) if missing_scripts: print(f"\nโŒ Missing scripts: {missing_scripts}") return False print("โœ… All scripts found") return True def test_configs(): """Test configuration files""" print("\n๐Ÿ” Testing configurations...") config_dir = Path('config') if not config_dir.exists(): print("โŒ config directory not found") return False config_files = list(config_dir.glob('*.py')) if not config_files: print("โŒ No configuration files found") return False print(f"โœ… Found {len(config_files)} configuration files:") for config in config_files: print(f" - {config.name}") return True def test_requirements(): """Test requirements files""" print("\n๐Ÿ” Testing requirements...") requirements_dir = Path('requirements') if not requirements_dir.exists(): print("โŒ requirements directory not found") return False req_files = list(requirements_dir.glob('*.txt')) if not req_files: print("โŒ No requirements files found") return False print(f"โœ… Found {len(req_files)} requirements files:") for req in req_files: print(f" - {req.name}") return True def test_cuda(): """Test CUDA availability""" print("\n๐Ÿ” Testing CUDA...") try: import torch if torch.cuda.is_available(): device_count = torch.cuda.device_count() device_name = torch.cuda.get_device_name(0) print(f"โœ… CUDA available: {device_count} device(s)") print(f" - Device 0: {device_name}") else: print("โš ๏ธ CUDA not available (training will be slower)") except Exception as e: print(f"โŒ CUDA test failed: {e}") return False return True def test_hf_token(): """Test Hugging Face token""" print("\n๐Ÿ” Testing HF token...") token = os.environ.get('HF_TOKEN') if not token: print("โš ๏ธ HF_TOKEN not set (will be prompted during setup)") return True try: result = subprocess.run( ['huggingface-cli', 'whoami'], capture_output=True, text=True, timeout=10 ) if result.returncode == 0: username = result.stdout.strip() print(f"โœ… HF token valid: {username}") return True else: print(f"โŒ HF token invalid: {result.stderr}") return False except Exception as e: print(f"โŒ HF token test failed: {e}") return False def test_pipeline_components(): """Test individual pipeline components""" print("\n๐Ÿ” Testing pipeline components...") # Test setup script if Path('setup_launch.py').exists(): print("โœ… setup_launch.py found") else: print("โŒ setup_launch.py not found") return False # Test launch script if Path('launch.sh').exists(): print("โœ… launch.sh found") else: print("โŒ launch.sh not found") return False # Test README if Path('README_END_TO_END.md').exists(): print("โœ… README_END_TO_END.md found") else: print("โŒ README_END_TO_END.md not found") return False return True def main(): """Run all tests""" print("๐Ÿงช SmolLM3 End-to-End Pipeline Test") print("=" * 50) tests = [ test_imports, test_local_modules, test_scripts, test_configs, test_requirements, test_cuda, test_hf_token, test_pipeline_components ] passed = 0 total = len(tests) for test in tests: try: if test(): passed += 1 except Exception as e: print(f"โŒ Test failed with exception: {e}") print(f"\n๐Ÿ“Š Test Results: {passed}/{total} passed") if passed == total: print("๐ŸŽ‰ All tests passed! Pipeline is ready to use.") print("\n๐Ÿš€ Next steps:") print("1. Run: python setup_launch.py") print("2. Run: chmod +x launch.sh") print("3. Run: ./launch.sh") else: print("โŒ Some tests failed. Please fix the issues before running the pipeline.") print("\n๐Ÿ”ง Common fixes:") print("1. Install missing packages: pip install -r requirements/requirements_core.txt") print("2. Set HF_TOKEN environment variable") print("3. Check CUDA installation") return passed == total if __name__ == "__main__": success = main() sys.exit(0 if success else 1)