|  | import pytest | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import gradio as gr | 
					
						
						|  | from validation import validate_csv_file, validate_csv_can_be_read, validate_dataframe | 
					
						
						|  | from constants import REQUIRED_COLUMNS, ASSAY_LIST | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class TestValidateCsvCanBeRead: | 
					
						
						|  | """Test cases for validate_csv_can_be_read function""" | 
					
						
						|  |  | 
					
						
						|  | def test_valid_csv_can_be_read(self, valid_csv_content): | 
					
						
						|  | df = validate_csv_can_be_read(valid_csv_content) | 
					
						
						|  | assert isinstance(df, pd.DataFrame) | 
					
						
						|  |  | 
					
						
						|  | def test_empty_csv_raises_error(self): | 
					
						
						|  | empty_csv = "" | 
					
						
						|  |  | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_csv_can_be_read(empty_csv) | 
					
						
						|  |  | 
					
						
						|  | assert "empty or contains no valid data" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_invalid_csv_format_raises_error(self): | 
					
						
						|  |  | 
					
						
						|  | malformed_csv = 'column1,column2\nvalue1,"unclosed quote\nvalue4,value5' | 
					
						
						|  |  | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_csv_can_be_read(malformed_csv) | 
					
						
						|  |  | 
					
						
						|  | assert "Invalid CSV format" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_csv_with_quoted_fields_can_be_read(self): | 
					
						
						|  |  | 
					
						
						|  | base_row = 'test_antibody,"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMHWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDYGDGYYFDYWGQGTLVTVSS","DIQMTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIYAASTLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTPFTFGQGTKVEIK",95.2,0.85,0.92,0.78,0.81,72.5' | 
					
						
						|  | csv_content = "antibody_name,vh_protein_sequence,vl_protein_sequence,SEC %Monomer,HIC,PR_CHO,AC-SINS_pH6.0,AC-SINS_pH7.4,Tm\n" | 
					
						
						|  | csv_content += "\n".join([base_row] * 10) | 
					
						
						|  |  | 
					
						
						|  | df = validate_csv_can_be_read(csv_content) | 
					
						
						|  | assert isinstance(df, pd.DataFrame) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class TestValidateDataframe: | 
					
						
						|  | def test_valid_dataframe_passes(self, valid_input_dataframe): | 
					
						
						|  | validate_dataframe(valid_input_dataframe) | 
					
						
						|  |  | 
					
						
						|  | def test_missing_columns_raises_error(self, valid_input_dataframe): | 
					
						
						|  | missing_column = REQUIRED_COLUMNS[0] | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df.drop(columns=[missing_column], inplace=True) | 
					
						
						|  |  | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  |  | 
					
						
						|  | assert f"Missing required columns: {missing_column}" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_at_least_one_assay_column_raises_error(self, valid_input_dataframe): | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df.drop(columns=ASSAY_LIST, inplace=True, errors="ignore") | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  | assert "CSV should include at least one of the following assay columns" in str( | 
					
						
						|  | exc_info.value | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def test_empty_dataframe_raises_error(self, valid_input_dataframe): | 
					
						
						|  | empty_df = valid_input_dataframe.head(0) | 
					
						
						|  |  | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(empty_df) | 
					
						
						|  |  | 
					
						
						|  | assert "CSV file is empty" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_missing_antibodies_raises_error(self, valid_input_dataframe): | 
					
						
						|  | df = valid_input_dataframe.head(50) | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  | assert "Missing predictions for" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_missing_values_raises_error(self, valid_input_dataframe): | 
					
						
						|  | bad_column = REQUIRED_COLUMNS[0] | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df[bad_column] = [None] * len(df) | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  |  | 
					
						
						|  | assert f"contains {len(df)} missing values" in str(exc_info.value) | 
					
						
						|  |  | 
					
						
						|  | def test_csv_with_extra_columns_passes(self, valid_input_dataframe): | 
					
						
						|  | extra_column = "extra_column_1" | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df[extra_column] = ["extra1"] * len(df) | 
					
						
						|  | df[extra_column] = ["extra2"] * len(df) | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  |  | 
					
						
						|  | def test_duplicate_antibody_names_raises_error(self, valid_input_dataframe): | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df = pd.concat([df, df.head(1)], ignore_index=True) | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  | assert "CSV should have only one row per antibody. Found 1 duplicates." in str( | 
					
						
						|  | exc_info.value | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | def test_unrecognized_antibody_names_raises_error(self, valid_input_dataframe): | 
					
						
						|  | df = valid_input_dataframe.copy() | 
					
						
						|  | df.loc[0, "antibody_name"] = "unrecognized_antibody" | 
					
						
						|  | with pytest.raises(gr.Error) as exc_info: | 
					
						
						|  | validate_dataframe(df) | 
					
						
						|  | assert f"Found unrecognized antibody names: {'unrecognized_antibody'}" in str( | 
					
						
						|  | exc_info.value | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class TestValidateCsvFile: | 
					
						
						|  | def test_valid_csv_passes(self, valid_csv_content): | 
					
						
						|  | validate_csv_file(valid_csv_content) | 
					
						
						|  |  |