abdev-leaderboard / test /test_validation.py
pquintero's picture
precommit
8f9985e
raw
history blame
4.67 kB
import pytest
import pandas as pd
import gradio as gr
from validation import validate_csv_file, validate_csv_can_be_read, validate_dataframe
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS
class TestValidateCsvCanBeRead:
"""Test cases for validate_csv_can_be_read function"""
def test_valid_csv_can_be_read(self, valid_csv_content):
"""Test that valid CSV content can be read"""
df = validate_csv_can_be_read(valid_csv_content)
assert isinstance(df, pd.DataFrame)
assert len(df) == MINIMAL_NUMBER_OF_ROWS
assert list(df.columns) == list(REQUIRED_COLUMNS)
def test_empty_csv_raises_error(self):
"""Test that empty CSV raises an error"""
empty_csv = ""
with pytest.raises(gr.Error) as exc_info:
validate_csv_can_be_read(empty_csv)
assert "empty or contains no valid data" in str(exc_info.value)
def test_invalid_csv_format_raises_error(self):
"""Test that invalid CSV format raises an error"""
# Create a CSV with malformed structure that pandas cannot parse
malformed_csv = 'column1,column2\nvalue1,"unclosed quote\nvalue4,value5'
with pytest.raises(gr.Error) as exc_info:
validate_csv_can_be_read(malformed_csv)
assert "Invalid CSV format" in str(exc_info.value)
def test_csv_with_quoted_fields_can_be_read(self):
"""Test that CSV with quoted fields can be read"""
# Create CSV with quoted fields and enough rows
base_row = 'AB001,"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS","DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASTLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPFTFGQGTKVEIK",95.2,0.85,0.92,0.78,0.81,72.5'
csv_content = "antibody_id,vh_protein_sequence,vl_protein_sequence,SEC %Monomer,HIC,PR_CHO,AC-SINS_pH6.0,AC-SINS_pH7.4,Tm\n"
csv_content += "\n".join([base_row] * MINIMAL_NUMBER_OF_ROWS)
df = validate_csv_can_be_read(csv_content)
assert isinstance(df, pd.DataFrame)
assert len(df) == MINIMAL_NUMBER_OF_ROWS
class TestValidateDataframe:
"""Test cases for validate_dataframe function"""
def test_valid_dataframe_passes(self, valid_input_dataframe):
"""Test that valid DataFrame passes validation"""
validate_dataframe(valid_input_dataframe)
def test_missing_columns_raises_error(self, valid_input_dataframe):
"""Test that DataFrame with missing columns raises an error"""
missing_column = REQUIRED_COLUMNS[0]
df = valid_input_dataframe.copy()
df.drop(columns=[missing_column], inplace=True)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"Missing required columns: {missing_column}" in str(exc_info.value)
def test_empty_dataframe_raises_error(self, valid_input_dataframe):
"""Test that empty DataFrame raises an error"""
empty_df = valid_input_dataframe.head(0)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(empty_df)
assert "CSV file is empty" in str(exc_info.value)
def test_insufficient_rows_raises_error(self, valid_input_dataframe):
"""Test that DataFrame with insufficient rows raises an error"""
df = valid_input_dataframe.head(MINIMAL_NUMBER_OF_ROWS - 1)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows" in str(
exc_info.value
)
def test_missing_values_raises_error(self, valid_input_dataframe):
"""Test that DataFrame with missing values raises an error"""
bad_column = REQUIRED_COLUMNS[0]
df = valid_input_dataframe.copy()
df[bad_column] = [None] * len(df)
with pytest.raises(gr.Error) as exc_info:
validate_dataframe(df)
assert f"contains {len(df)} missing values" in str(exc_info.value)
def test_csv_with_extra_columns_passes(self, valid_input_dataframe):
"""Test that DataFrame with extra columns passes validation"""
extra_column = "extra_column_1"
df = valid_input_dataframe.copy()
df[extra_column] = ["extra1"] * len(df)
df[extra_column] = ["extra2"] * len(df)
validate_dataframe(df)
class TestValidateCsvFile:
"""Test cases for the combined validate_csv_file function"""
def test_valid_csv_passes(self, valid_csv_content):
"""Test that a valid CSV with all required columns passes validation"""
validate_csv_file(valid_csv_content)