shrijayan
Remove file handler from logging setup for console-only logging
1a2035c
import logging
from io import BytesIO
def setup_logging():
"""Set up logging configuration."""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()] # Only console logging
)
return logging.getLogger(__name__)
def meters_to_miles(meters):
"""Convert distance in meters to miles."""
return meters * 0.000621371
def validate_excel_file(file_stream: BytesIO) -> tuple[bool, str]:
"""Validate the uploaded file is an Excel file by its magic numbers."""
try:
# Read the first 4 bytes to check the file signature
header = file_stream.read(4)
file_stream.seek(0) # Reset stream position for further processing
# Check for Excel file signatures
if header == b'\x50\x4B\x03\x04': # ZIP archive (xlsx)
return True, "Valid Excel file"
elif header == b'\xD0\xCF\x11\xE0': # Compound File (xls)
return True, "Valid Excel file"
else:
return False, "Invalid file type: Not an Excel file"
except Exception as e:
return False, f"Validation error: {str(e)}"
def clean_address(address):
"""Clean and standardize address strings."""
if not isinstance(address, str):
return ""
# Remove extra whitespace
cleaned = " ".join(address.split())
# Remove common abbreviations and standardize format
replacements = {
"ST.": "STREET",
"ST ": "STREET ",
"AVE.": "AVENUE",
"AVE ": "AVENUE ",
"RD.": "ROAD",
"RD ": "ROAD ",
"BLVD.": "BOULEVARD",
"BLVD ": "BOULEVARD ",
"DR.": "DRIVE",
"DR ": "DRIVE ",
}
for old, new in replacements.items():
cleaned = cleaned.replace(old, new)
return cleaned
def handle_empty_values(df, required_columns):
"""Handle empty values in required columns."""
# Create a copy to avoid modifying the original DataFrame
clean_df = df.copy()
# Fill empty values with empty strings
for col in required_columns:
if col in clean_df.columns:
clean_df[col] = clean_df[col].fillna("")
return clean_df