Spaces:
Build error
Build error
| import os | |
| import re | |
| import math | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import matplotlib.ticker as mtick | |
| import seaborn as sns | |
| import nltk | |
| import evaluate | |
| meteor = evaluate.load("meteor") | |
| print(f"loading: {__file__}") | |
| # final version | |
| pattern_excessive_whitespaces = re.compile(r"\s{5,}") | |
| pattern_text_repetitions = re.compile(r"(.{5}.*)\s*((\1)\s*)+", re.M | re.DOTALL) | |
| def del_excessive_whitespaces(text, debug=False): | |
| count = 0 | |
| if isinstance(text, str): | |
| if debug: | |
| print("----detect excessive whitespaces----") | |
| count = len(text) | |
| text = pattern_excessive_whitespaces.sub("", text) | |
| count -= len(text) | |
| if debug and count: | |
| print(f"removed excessive whitespaces: {count}") | |
| return text, count | |
| # final version for repetition detection | |
| def detect_text_repetitions(text, debug=False): | |
| count = 0 | |
| if isinstance(text, str): | |
| if debug: | |
| print("----detect text repetitions----") | |
| matches = pattern_text_repetitions.finditer(text) | |
| for match in matches: | |
| if debug: | |
| print(match) | |
| for groupNum in range(0, len(match.groups())): | |
| groupNum = groupNum + 1 | |
| print( | |
| "Group {groupNum} found at {start}-{end}: `{group}`".format( | |
| groupNum=groupNum, | |
| start=match.start(groupNum), | |
| end=match.end(groupNum), | |
| group=match.group(groupNum), | |
| ) | |
| ) | |
| start, end = match.span() | |
| count += end - start | |
| return count | |
| def detect_repetitions(text, debug=False): | |
| text, count_excessive_whitespaces = del_excessive_whitespaces(text, debug=debug) | |
| count_text_repetitions = detect_text_repetitions(text, debug=debug) | |
| total_repetitions = count_excessive_whitespaces + count_text_repetitions | |
| result = (count_excessive_whitespaces, count_text_repetitions, total_repetitions) | |
| if debug: | |
| print(result) | |
| return result | |
| def detect_scores(text, debug=False): | |
| newline_score, repetition_score, total_repetitions = detect_repetitions( | |
| text, debug=debug | |
| ) | |
| return pd.Series([newline_score, repetition_score, total_repetitions]) | |