File size: 1,445 Bytes
c49b21b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python3

import pandas as pd

def main():
    print("=== FINAL CRYPTO SYMBOL VERIFICATION ===")
    
    # Load crypto features
    df = pd.read_parquet('data/merged/features/crypto_features.parquet')
    
    print(f"Total rows: {len(df)}")
    print(f"Total columns: {len(df.columns)}")
    
    # Symbol analysis
    null_symbols = df['symbol'].isnull().sum()
    total_rows = len(df)
    null_percentage = (null_symbols / total_rows) * 100
    
    print(f"Null symbols: {null_symbols} ({null_percentage:.1f}%)")
    print(f"Unique symbols: {df['symbol'].nunique()}")
    
    print("\nTop 10 symbols by count:")
    print(df['symbol'].value_counts().head(10))
    
    print("\nSample of successfully extracted symbols:")
    sample = df[df['symbol'].notna()][['symbol', 'cg_id']].head(10)
    for _, row in sample.iterrows():
        print(f"  {row['symbol']} -> {row['cg_id']}")
    
    if null_symbols > 0:
        print(f"\nRows with remaining null symbols:")
        null_rows = df[df['symbol'].isnull()][['symbol', 'cg_id', 'symbols.binance', 'symbols.bybit']]
        print(null_rows.to_string(index=False))
    
    print("\n=== SUCCESS METRICS ===")
    print(f"βœ… Symbol extraction success rate: {((total_rows - null_symbols) / total_rows) * 100:.1f}%")
    print(f"βœ… Total symbols populated: {total_rows - null_symbols}")
    print(f"βœ… Pipeline integration: Complete")

if __name__ == "__main__":
    main()