|
|
|
""" |
|
Simple merge step 1: Copy latest features to merged features. |
|
This creates the initial merged_features.parquet file for the pipeline. |
|
""" |
|
|
|
import shutil |
|
from pathlib import Path |
|
|
|
def main(): |
|
"""Copy latest features to merged features directory.""" |
|
|
|
|
|
source_path = Path("data/advisorai-data/features/latest_features.parquet") |
|
|
|
|
|
dest_path = Path("data/merged/features/merged_features.parquet") |
|
|
|
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
if not source_path.exists(): |
|
raise FileNotFoundError(f"Source file not found: {source_path}") |
|
|
|
|
|
shutil.copy2(source_path, dest_path) |
|
|
|
|
|
import pandas as pd |
|
df = pd.read_parquet(dest_path) |
|
|
|
print(f"OK wrote {dest_path} -> {len(df)} rows x {len(df.columns)} cols") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|