import os import sys from pathlib import Path import logging import subprocess # Add src directory to Python path sys.path.append(os.path.join(os.path.dirname(__file__), 'src')) # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def setup_data_directories(): """Create necessary data directories""" directories = [ "data/raw", "data/processed", "data/synthetic" ] for directory in directories: path = Path(directory) path.mkdir(parents=True, exist_ok=True) logger.info(f"Created directory: {directory}") # Create .gitkeep file gitkeep = path / ".gitkeep" gitkeep.touch(exist_ok=True) logger.info(f"Created .gitkeep in {directory}") def main(): """Setup data directories and run collection""" logger.info("Setting up data directories...") setup_data_directories() logger.info("Running data collection script via subprocess...") result = subprocess.run([sys.executable, 'src/data_collection/data_collection.py']) if result.returncode != 0: logger.error(f"Data collection script failed with exit code {result.returncode}") else: logger.info("Data collection completed successfully.") if __name__ == "__main__": main()