Spaces:
Configuration error
Configuration error
File size: 822 Bytes
f80cf2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
"""
helper.py
Utility functions for text processing and data cleaning.
"""
import re
def clean_text_whitespace(text: str) -> str:
"""
Clean up text by normalizing whitespace and newlines.
Args:
text (str): Input text string to clean
Returns:
str: Cleaned text with normalized whitespace and newlines
"""
if not text or not isinstance(text, str):
return text
# Replace multiple whitespace characters (spaces, tabs) with a single space
# This handles spaces, tabs, and other whitespace characters except newlines
text = re.sub(r'[^\S\n]+', ' ', text)
# Replace multiple consecutive newlines with a single newline
text = re.sub(r'\n{2,}', '\n', text)
# Strip leading and trailing whitespace
text = text.strip()
return text
|