File size: 1,445 Bytes
c49b21b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
#!/usr/bin/env python3
import pandas as pd
def main():
print("=== FINAL CRYPTO SYMBOL VERIFICATION ===")
# Load crypto features
df = pd.read_parquet('data/merged/features/crypto_features.parquet')
print(f"Total rows: {len(df)}")
print(f"Total columns: {len(df.columns)}")
# Symbol analysis
null_symbols = df['symbol'].isnull().sum()
total_rows = len(df)
null_percentage = (null_symbols / total_rows) * 100
print(f"Null symbols: {null_symbols} ({null_percentage:.1f}%)")
print(f"Unique symbols: {df['symbol'].nunique()}")
print("\nTop 10 symbols by count:")
print(df['symbol'].value_counts().head(10))
print("\nSample of successfully extracted symbols:")
sample = df[df['symbol'].notna()][['symbol', 'cg_id']].head(10)
for _, row in sample.iterrows():
print(f" {row['symbol']} -> {row['cg_id']}")
if null_symbols > 0:
print(f"\nRows with remaining null symbols:")
null_rows = df[df['symbol'].isnull()][['symbol', 'cg_id', 'symbols.binance', 'symbols.bybit']]
print(null_rows.to_string(index=False))
print("\n=== SUCCESS METRICS ===")
print(f"β
Symbol extraction success rate: {((total_rows - null_symbols) / total_rows) * 100:.1f}%")
print(f"β
Total symbols populated: {total_rows - null_symbols}")
print(f"β
Pipeline integration: Complete")
if __name__ == "__main__":
main()
|