lyric-buddy / src /profanity_filter.py
nschenone's picture
Added dedicated generation function and profanity filter
d9b077a
raw
history blame
1.82 kB
import string
import requests
class ProfanityFilter:
def __init__(self):
BANNED_LIST_URL = "https://raw.githubusercontent.com/snguyenthanh/better_profanity/master/better_profanity/profanity_wordlist.txt"
self.banned_list = requests.get(BANNED_LIST_URL).text.split("\n")
def censor(self, text="", censor_char="*", keep_first_letter=True):
# Split sentences by newline
sentence_list = text.split("\n")
for s, sentence in enumerate(sentence_list):
# Split words in sentence by space
word_list = sentence.split()
for w, word in enumerate(word_list):
# Process word to match banned list
processed_word = word.translate(
str.maketrans("", "", string.punctuation)
).lower()
# Replace if word is profane
if processed_word in self.banned_list:
censored_word = censor_char * len(word)
# Keep first letter of word for context if desired
if keep_first_letter:
censored_word = word[0] + censored_word[1:]
# Replcate punctuation
censored_word_punc = ""
for c, char in enumerate(word):
if char in string.punctuation:
censored_word_punc += word[c]
else:
censored_word_punc += censored_word[c]
# Update word list
word_list[w] = censored_word_punc
# Update sentence list
sentence_list[s] = word_list
# Join everything back together
return "\n".join([" ".join(word_list) for word_list in sentence_list])