lyric-buddy / src /profanity_filter.py
nschenone's picture
Changed censor to function instead of class
04d336a
raw
history blame contribute delete
No virus
1.62 kB
import string
import requests
BANNED_LIST_URL = "https://raw.githubusercontent.com/snguyenthanh/better_profanity/master/better_profanity/profanity_wordlist.txt"
def censor(text="", censor_char="*", keep_first_letter=True):
banned_list = requests.get(BANNED_LIST_URL).text.split("\n")
# Split sentences by newline
sentence_list = text.split("\n")
for s, sentence in enumerate(sentence_list):
# Split words in sentence by space
word_list = sentence.split()
for w, word in enumerate(word_list):
# Process word to match banned list
processed_word = word.translate(
str.maketrans("", "", string.punctuation)
).lower()
# Replace if word is profane
if processed_word in banned_list:
censored_word = censor_char * len(word)
# Keep first letter of word for context if desired
if keep_first_letter:
censored_word = word[0] + censored_word[1:]
# Replcate punctuation
censored_word_punc = ""
for c, char in enumerate(word):
if char in string.punctuation:
censored_word_punc += word[c]
else:
censored_word_punc += censored_word[c]
# Update word list
word_list[w] = censored_word_punc
# Update sentence list
sentence_list[s] = word_list
# Join everything back together
return "\n".join([" ".join(word_list) for word_list in sentence_list])