Maaroufabousaleh
f
c49b21b
raw
history blame
1.03 kB
#!/usr/bin/env python3
"""
Simple merge step 1: Copy latest features to merged features.
This creates the initial merged_features.parquet file for the pipeline.
"""
import shutil
from pathlib import Path
def main():
"""Copy latest features to merged features directory."""
# Source file
source_path = Path("data/advisorai-data/features/latest_features.parquet")
# Destination file
dest_path = Path("data/merged/features/merged_features.parquet")
# Create destination directory if it doesn't exist
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Check if source file exists
if not source_path.exists():
raise FileNotFoundError(f"Source file not found: {source_path}")
# Copy the file
shutil.copy2(source_path, dest_path)
# Read and report basic info
import pandas as pd
df = pd.read_parquet(dest_path)
print(f"OK wrote {dest_path} -> {len(df)} rows x {len(df.columns)} cols")
if __name__ == "__main__":
main()