|
|
import pandas as pd |
|
|
import io |
|
|
import gradio as gr |
|
|
from constants import REQUIRED_COLUMNS, MINIMAL_NUMBER_OF_ROWS |
|
|
|
|
|
def validate_csv_can_be_read(file_content: str) -> pd.DataFrame: |
|
|
""" |
|
|
Validate that the CSV file can be read and parsed. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
file_content: str |
|
|
The content of the uploaded CSV file. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
pd.DataFrame |
|
|
The parsed DataFrame if successful. |
|
|
|
|
|
Raises |
|
|
------ |
|
|
gr.Error: If CSV cannot be read or parsed |
|
|
""" |
|
|
try: |
|
|
|
|
|
df = pd.read_csv(io.StringIO(file_content)) |
|
|
return df |
|
|
|
|
|
except pd.errors.EmptyDataError: |
|
|
raise gr.Error( |
|
|
"β CSV file is empty or contains no valid data" |
|
|
) |
|
|
except pd.errors.ParserError as e: |
|
|
raise gr.Error( |
|
|
f"β Invalid CSV format<br><br>" |
|
|
f"Error: {str(e)}" |
|
|
) |
|
|
except UnicodeDecodeError: |
|
|
raise gr.Error( |
|
|
"β File encoding error<br><br>" |
|
|
"Your file appears to have an unsupported encoding.<br>" |
|
|
"Please save your CSV file with UTF-8 encoding and try again." |
|
|
) |
|
|
|
|
|
def validate_dataframe(df: pd.DataFrame) -> None: |
|
|
""" |
|
|
Validate the DataFrame content and structure. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
df: pd.DataFrame |
|
|
The DataFrame to validate. |
|
|
|
|
|
Raises |
|
|
------ |
|
|
gr.Error: If validation fails |
|
|
""" |
|
|
|
|
|
missing_columns = set(REQUIRED_COLUMNS) - set(df.columns) |
|
|
if missing_columns: |
|
|
raise gr.Error( |
|
|
f"β Missing required columns: {', '.join(missing_columns)}" |
|
|
) |
|
|
|
|
|
|
|
|
if df.empty: |
|
|
raise gr.Error( |
|
|
"β CSV file is empty" |
|
|
) |
|
|
|
|
|
|
|
|
for col in REQUIRED_COLUMNS: |
|
|
missing_count = df[col].isnull().sum() |
|
|
if missing_count > 0: |
|
|
raise gr.Error( |
|
|
f"β Column '{col}' contains {missing_count} missing values" |
|
|
) |
|
|
|
|
|
|
|
|
if len(df) < MINIMAL_NUMBER_OF_ROWS: |
|
|
raise gr.Error( |
|
|
f"β CSV should have at least {MINIMAL_NUMBER_OF_ROWS} rows" |
|
|
) |
|
|
|
|
|
print(f"β
CSV validation passed! Found {len(df)} rows with columns: {', '.join(df.columns)}") |
|
|
|
|
|
def validate_csv_file(file_content: str) -> None: |
|
|
""" |
|
|
Validate the uploaded CSV file. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
file_content: str |
|
|
The content of the uploaded CSV file. |
|
|
|
|
|
Raises |
|
|
------ |
|
|
gr.Error: If validation fails |
|
|
""" |
|
|
df = validate_csv_can_be_read(file_content) |
|
|
validate_dataframe(df) |
|
|
|