Spaces:
Configuration error
Configuration error
""" | |
helper.py | |
Utility functions for text processing and data cleaning. | |
""" | |
import re | |
def clean_text_whitespace(text: str) -> str: | |
""" | |
Clean up text by normalizing whitespace and newlines. | |
Args: | |
text (str): Input text string to clean | |
Returns: | |
str: Cleaned text with normalized whitespace and newlines | |
""" | |
if not text or not isinstance(text, str): | |
return text | |
# Replace multiple whitespace characters (spaces, tabs) with a single space | |
# This handles spaces, tabs, and other whitespace characters except newlines | |
text = re.sub(r'[^\S\n]+', ' ', text) | |
# Replace multiple consecutive newlines with a single newline | |
text = re.sub(r'\n{2,}', '\n', text) | |
# Strip leading and trailing whitespace | |
text = text.strip() | |
return text | |