File size: 2,725 Bytes
eb50e2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import pandas as pd
import io
import gradio as gr
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame:
"""
Validate that the CSV file can be read and parsed.
Parameters
----------
file_content: str
The content of the uploaded CSV file.
Returns
-------
pd.DataFrame
The parsed DataFrame if successful.
Raises
------
gr.Error: If CSV cannot be read or parsed
"""
try:
# Read CSV content
df = pd.read_csv(io.StringIO(file_content))
return df
except pd.errors.EmptyDataError:
raise gr.Error(
"β CSV file is empty or contains no valid data"
)
except pd.errors.ParserError as e:
raise gr.Error(
f"β Invalid CSV format<br><br>"
f"Error: {str(e)}"
)
except UnicodeDecodeError:
raise gr.Error(
"β File encoding error<br><br>"
"Your file appears to have an unsupported encoding.<br>"
"Please save your CSV file with UTF-8 encoding and try again."
)
def validate_dataframe(df: pd.DataFrame) -> None:
"""
Validate the DataFrame content and structure.
Parameters
----------
df: pd.DataFrame
The DataFrame to validate.
Raises
------
gr.Error: If validation fails
"""
# Required columns should be present
missing_columns = set(REQUIRED_COLUMNS) - set(df.columns)
if missing_columns:
raise gr.Error(
f"β Missing required columns: {', '.join(missing_columns)}"
)
# Data should not be empty
if df.empty:
raise gr.Error(
"β CSV file is empty"
)
# Check for missing values in required columns
for col in REQUIRED_COLUMNS:
missing_count = df[col].isnull().sum()
if missing_count > 0:
raise gr.Error(
f"β Column '{col}' contains {missing_count} missing values"
)
# Check for reasonable number of rows
if len(df) < MINIMAL_NUMBER_OF_ROWS:
raise gr.Error(
f"β CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows"
)
print(f"β
CSV validation passed! Found {len(df)} rows with columns: {', '.join(df.columns)}")
def validate_csv_file(file_content: str) -> None:
"""
Validate the uploaded CSV file.
Parameters
----------
file_content: str
The content of the uploaded CSV file.
Raises
------
gr.Error: If validation fails
"""
df = validate_csv_can_be_read(file_content)
validate_dataframe(df)
|