|
|
|
""" |
|
Test script for null filling during merge operations |
|
""" |
|
|
|
import pandas as pd |
|
import numpy as np |
|
import os |
|
import sys |
|
from pathlib import Path |
|
|
|
|
|
sys.path.append(str(Path(__file__).parent)) |
|
|
|
from merge_temp import fill_nulls_from_temp |
|
|
|
def create_test_data(): |
|
"""Create test data with strategic null values""" |
|
|
|
|
|
merged_data = { |
|
'symbol': ['AAPL', 'AAPL', 'BTC', 'BTC', 'ETH'], |
|
'interval_timestamp': [1640995200000, 1640995260000, 1640995200000, 1640995260000, 1640995200000], |
|
'price': [150.0, np.nan, 50000.0, np.nan, 4000.0], |
|
'volume': [1000000, 1200000, np.nan, 800000, np.nan], |
|
'rsi': [65.0, np.nan, 70.0, 45.0, np.nan], |
|
'macd': [1.5, 1.8, np.nan, -0.5, 2.1] |
|
} |
|
df_merged = pd.DataFrame(merged_data) |
|
|
|
|
|
temp_data = { |
|
'symbol': ['AAPL', 'AAPL', 'BTC', 'BTC', 'ETH', 'GOOGL'], |
|
'interval_timestamp': [1640995200000, 1640995260000, 1640995200000, 1640995260000, 1640995200000, 1640995200000], |
|
'price': [149.5, 152.3, 49950.0, 51200.0, 3980.0, 2850.0], |
|
'volume': [950000, 1150000, 2000000, 780000, 500000, 400000], |
|
'rsi': [64.0, 67.0, 69.5, 44.0, 55.0, 60.0], |
|
'macd': [1.4, 1.9, 15.2, -0.6, 2.0, 0.8], |
|
'new_feature': [100, 200, 300, 400, 500, 600] |
|
} |
|
df_temp = pd.DataFrame(temp_data) |
|
|
|
return df_merged, df_temp |
|
|
|
def test_null_filling(): |
|
"""Test the null filling functionality""" |
|
print("="*60) |
|
print("TESTING NULL FILLING DURING MERGE") |
|
print("="*60) |
|
|
|
|
|
df_merged, df_temp = create_test_data() |
|
|
|
print("BEFORE NULL FILLING:") |
|
print(f"Merged data shape: {df_merged.shape}") |
|
print(f"Temp data shape: {df_temp.shape}") |
|
print(f"Nulls in merged data: {df_merged.isnull().sum().sum()}") |
|
print("\nNull values by column in merged data:") |
|
for col in df_merged.columns: |
|
null_count = df_merged[col].isnull().sum() |
|
if null_count > 0: |
|
print(f" {col}: {null_count} nulls") |
|
|
|
print(f"\nMerged data preview:") |
|
print(df_merged.to_string()) |
|
print(f"\nTemp data preview:") |
|
print(df_temp.to_string()) |
|
|
|
|
|
df_merged_copy = df_merged.copy() |
|
nulls_filled = fill_nulls_from_temp(df_merged_copy, df_temp) |
|
|
|
print(f"\nAFTER NULL FILLING:") |
|
print(f"Nulls filled: {nulls_filled}") |
|
print(f"Remaining nulls: {df_merged_copy.isnull().sum().sum()}") |
|
print("\nRemaining null values by column:") |
|
for col in df_merged_copy.columns: |
|
null_count = df_merged_copy[col].isnull().sum() |
|
if null_count > 0: |
|
print(f" {col}: {null_count} nulls") |
|
|
|
print(f"\nFilled data preview:") |
|
print(df_merged_copy.to_string()) |
|
|
|
|
|
print(f"\nVERIFICATION:") |
|
|
|
|
|
aapl_price = df_merged_copy[(df_merged_copy['symbol'] == 'AAPL') & |
|
(df_merged_copy['interval_timestamp'] == 1640995260000)]['price'].iloc[0] |
|
print(f"AAPL price at 1640995260000: {aapl_price} (should be 152.3)") |
|
|
|
|
|
btc_volume = df_merged_copy[(df_merged_copy['symbol'] == 'BTC') & |
|
(df_merged_copy['interval_timestamp'] == 1640995200000)]['volume'].iloc[0] |
|
print(f"BTC volume at 1640995200000: {btc_volume} (should be 2000000)") |
|
|
|
|
|
has_new_feature = 'new_feature' in df_merged_copy.columns |
|
print(f"New feature added: {has_new_feature} (should be False)") |
|
|
|
|
|
original_nulls = df_merged.isnull().sum().sum() |
|
remaining_nulls = df_merged_copy.isnull().sum().sum() |
|
filled_nulls = original_nulls - remaining_nulls |
|
|
|
if filled_nulls == nulls_filled: |
|
print(f"β
Null counting is consistent: {filled_nulls} nulls filled") |
|
else: |
|
print(f"β Null counting mismatch: reported {nulls_filled}, actual {filled_nulls}") |
|
|
|
if nulls_filled > 0: |
|
fill_rate = (nulls_filled / original_nulls) * 100 |
|
print(f"β
Fill rate: {fill_rate:.1f}% ({nulls_filled}/{original_nulls})") |
|
return True |
|
else: |
|
print("β No nulls were filled") |
|
return False |
|
|
|
def test_edge_cases(): |
|
"""Test edge cases for null filling""" |
|
print(f"\n" + "="*60) |
|
print("TESTING EDGE CASES") |
|
print("="*60) |
|
|
|
|
|
df_empty = pd.DataFrame() |
|
df_test = pd.DataFrame({'symbol': ['A'], 'interval_timestamp': [123], 'value': [1]}) |
|
|
|
print("Test 1: Empty merged dataframe") |
|
nulls_filled = fill_nulls_from_temp(df_empty, df_test) |
|
print(f"Nulls filled: {nulls_filled} (should be 0)") |
|
|
|
print("Test 2: Empty temp dataframe") |
|
df_with_nulls = pd.DataFrame({'symbol': ['A'], 'interval_timestamp': [123], 'value': [np.nan]}) |
|
nulls_filled = fill_nulls_from_temp(df_with_nulls, df_empty) |
|
print(f"Nulls filled: {nulls_filled} (should be 0)") |
|
|
|
|
|
print("Test 3: No matching symbol+timestamp combinations") |
|
df_merged_nomatch = pd.DataFrame({ |
|
'symbol': ['A'], |
|
'interval_timestamp': [111], |
|
'value': [np.nan] |
|
}) |
|
df_temp_nomatch = pd.DataFrame({ |
|
'symbol': ['B'], |
|
'interval_timestamp': [222], |
|
'value': [100] |
|
}) |
|
nulls_filled = fill_nulls_from_temp(df_merged_nomatch, df_temp_nomatch) |
|
print(f"Nulls filled: {nulls_filled} (should be 0)") |
|
|
|
|
|
print("Test 4: No common columns") |
|
df_merged_nocols = pd.DataFrame({ |
|
'symbol': ['A'], |
|
'interval_timestamp': [123], |
|
'col1': [np.nan] |
|
}) |
|
df_temp_nocols = pd.DataFrame({ |
|
'symbol': ['A'], |
|
'interval_timestamp': [123], |
|
'col2': [100] |
|
}) |
|
nulls_filled = fill_nulls_from_temp(df_merged_nocols, df_temp_nocols) |
|
print(f"Nulls filled: {nulls_filled} (should be 0)") |
|
|
|
print("β
All edge case tests completed") |
|
|
|
def main(): |
|
"""Run all tests""" |
|
success = test_null_filling() |
|
test_edge_cases() |
|
|
|
print(f"\n" + "="*60) |
|
print("TEST SUMMARY") |
|
print("="*60) |
|
|
|
if success: |
|
print("π Null filling functionality is working correctly!") |
|
return 0 |
|
else: |
|
print("β Null filling functionality has issues") |
|
return 1 |
|
|
|
if __name__ == "__main__": |
|
exit_code = main() |
|
sys.exit(exit_code) |
|
|