File size: 1,387 Bytes
32519eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import sys
from pathlib import Path
import logging
import subprocess

# Add src directory to Python path
sys.path.append(os.path.join(os.path.dirname(__file__), 'src'))

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def setup_data_directories():
    """Create necessary data directories"""
    directories = [
        "data/raw",
        "data/processed",
        "data/synthetic"
    ]
    
    for directory in directories:
        path = Path(directory)
        path.mkdir(parents=True, exist_ok=True)
        logger.info(f"Created directory: {directory}")
        
        # Create .gitkeep file
        gitkeep = path / ".gitkeep"
        gitkeep.touch(exist_ok=True)
        logger.info(f"Created .gitkeep in {directory}")

def main():
    """Setup data directories and run collection"""
    logger.info("Setting up data directories...")
    setup_data_directories()
    
    logger.info("Running data collection script via subprocess...")
    result = subprocess.run([sys.executable, 'src/data_collection/data_collection.py'])
    if result.returncode != 0:
        logger.error(f"Data collection script failed with exit code {result.returncode}")
    else:
        logger.info("Data collection completed successfully.")

if __name__ == "__main__":
    main()