File size: 1,026 Bytes
c49b21b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
#!/usr/bin/env python3
"""
Simple merge step 1: Copy latest features to merged features.
This creates the initial merged_features.parquet file for the pipeline.
"""
import shutil
from pathlib import Path
def main():
"""Copy latest features to merged features directory."""
# Source file
source_path = Path("data/advisorai-data/features/latest_features.parquet")
# Destination file
dest_path = Path("data/merged/features/merged_features.parquet")
# Create destination directory if it doesn't exist
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Check if source file exists
if not source_path.exists():
raise FileNotFoundError(f"Source file not found: {source_path}")
# Copy the file
shutil.copy2(source_path, dest_path)
# Read and report basic info
import pandas as pd
df = pd.read_parquet(dest_path)
print(f"OK wrote {dest_path} -> {len(df)} rows x {len(df.columns)} cols")
if __name__ == "__main__":
main()
|