|
|
|
|
|
import pandas as pd |
|
|
|
def main(): |
|
print("=== FINAL CRYPTO SYMBOL VERIFICATION ===") |
|
|
|
|
|
df = pd.read_parquet('data/merged/features/crypto_features.parquet') |
|
|
|
print(f"Total rows: {len(df)}") |
|
print(f"Total columns: {len(df.columns)}") |
|
|
|
|
|
null_symbols = df['symbol'].isnull().sum() |
|
total_rows = len(df) |
|
null_percentage = (null_symbols / total_rows) * 100 |
|
|
|
print(f"Null symbols: {null_symbols} ({null_percentage:.1f}%)") |
|
print(f"Unique symbols: {df['symbol'].nunique()}") |
|
|
|
print("\nTop 10 symbols by count:") |
|
print(df['symbol'].value_counts().head(10)) |
|
|
|
print("\nSample of successfully extracted symbols:") |
|
sample = df[df['symbol'].notna()][['symbol', 'cg_id']].head(10) |
|
for _, row in sample.iterrows(): |
|
print(f" {row['symbol']} -> {row['cg_id']}") |
|
|
|
if null_symbols > 0: |
|
print(f"\nRows with remaining null symbols:") |
|
null_rows = df[df['symbol'].isnull()][['symbol', 'cg_id', 'symbols.binance', 'symbols.bybit']] |
|
print(null_rows.to_string(index=False)) |
|
|
|
print("\n=== SUCCESS METRICS ===") |
|
print(f"β
Symbol extraction success rate: {((total_rows - null_symbols) / total_rows) * 100:.1f}%") |
|
print(f"β
Total symbols populated: {total_rows - null_symbols}") |
|
print(f"β
Pipeline integration: Complete") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|