File size: 1,026 Bytes
c49b21b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python3
"""
Simple merge step 1: Copy latest features to merged features.
This creates the initial merged_features.parquet file for the pipeline.
"""

import shutil
from pathlib import Path

def main():
    """Copy latest features to merged features directory."""
    
    # Source file
    source_path = Path("data/advisorai-data/features/latest_features.parquet")
    
    # Destination file
    dest_path = Path("data/merged/features/merged_features.parquet")
    
    # Create destination directory if it doesn't exist
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Check if source file exists
    if not source_path.exists():
        raise FileNotFoundError(f"Source file not found: {source_path}")
    
    # Copy the file
    shutil.copy2(source_path, dest_path)
    
    # Read and report basic info
    import pandas as pd
    df = pd.read_parquet(dest_path)
    
    print(f"OK  wrote {dest_path} -> {len(df)} rows x {len(df.columns)} cols")

if __name__ == "__main__":
    main()