#!/usr/bin/env python3 """ Simple merge step 1: Copy latest features to merged features. This creates the initial merged_features.parquet file for the pipeline. """ import shutil from pathlib import Path def main(): """Copy latest features to merged features directory.""" # Source file source_path = Path("data/advisorai-data/features/latest_features.parquet") # Destination file dest_path = Path("data/merged/features/merged_features.parquet") # Create destination directory if it doesn't exist dest_path.parent.mkdir(parents=True, exist_ok=True) # Check if source file exists if not source_path.exists(): raise FileNotFoundError(f"Source file not found: {source_path}") # Copy the file shutil.copy2(source_path, dest_path) # Read and report basic info import pandas as pd df = pd.read_parquet(dest_path) print(f"OK wrote {dest_path} -> {len(df)} rows x {len(df.columns)} cols") if __name__ == "__main__": main()