resumate / functions /helper.py
gperdrizet's picture
Added helper function to clean whitespaces and newlines in text
f80cf2d verified
raw
history blame
822 Bytes
"""
helper.py
Utility functions for text processing and data cleaning.
"""
import re
def clean_text_whitespace(text: str) -> str:
"""
Clean up text by normalizing whitespace and newlines.
Args:
text (str): Input text string to clean
Returns:
str: Cleaned text with normalized whitespace and newlines
"""
if not text or not isinstance(text, str):
return text
# Replace multiple whitespace characters (spaces, tabs) with a single space
# This handles spaces, tabs, and other whitespace characters except newlines
text = re.sub(r'[^\S\n]+', ' ', text)
# Replace multiple consecutive newlines with a single newline
text = re.sub(r'\n{2,}', '\n', text)
# Strip leading and trailing whitespace
text = text.strip()
return text