#!/usr/bin/env python3 import pandas as pd def main(): print("=== FINAL CRYPTO SYMBOL VERIFICATION ===") # Load crypto features df = pd.read_parquet('data/merged/features/crypto_features.parquet') print(f"Total rows: {len(df)}") print(f"Total columns: {len(df.columns)}") # Symbol analysis null_symbols = df['symbol'].isnull().sum() total_rows = len(df) null_percentage = (null_symbols / total_rows) * 100 print(f"Null symbols: {null_symbols} ({null_percentage:.1f}%)") print(f"Unique symbols: {df['symbol'].nunique()}") print("\nTop 10 symbols by count:") print(df['symbol'].value_counts().head(10)) print("\nSample of successfully extracted symbols:") sample = df[df['symbol'].notna()][['symbol', 'cg_id']].head(10) for _, row in sample.iterrows(): print(f" {row['symbol']} -> {row['cg_id']}") if null_symbols > 0: print(f"\nRows with remaining null symbols:") null_rows = df[df['symbol'].isnull()][['symbol', 'cg_id', 'symbols.binance', 'symbols.bybit']] print(null_rows.to_string(index=False)) print("\n=== SUCCESS METRICS ===") print(f"✅ Symbol extraction success rate: {((total_rows - null_symbols) / total_rows) * 100:.1f}%") print(f"✅ Total symbols populated: {total_rows - null_symbols}") print(f"✅ Pipeline integration: Complete") if __name__ == "__main__": main()