File size: 1,818 Bytes
d9b077a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import string

import requests


class ProfanityFilter:
    def __init__(self):
        BANNED_LIST_URL = "https://raw.githubusercontent.com/snguyenthanh/better_profanity/master/better_profanity/profanity_wordlist.txt"
        self.banned_list = requests.get(BANNED_LIST_URL).text.split("\n")

    def censor(self, text="", censor_char="*", keep_first_letter=True):

        # Split sentences by newline
        sentence_list = text.split("\n")
        for s, sentence in enumerate(sentence_list):

            # Split words in sentence by space
            word_list = sentence.split()
            for w, word in enumerate(word_list):

                # Process word to match banned list
                processed_word = word.translate(
                    str.maketrans("", "", string.punctuation)
                ).lower()

                # Replace if word is profane
                if processed_word in self.banned_list:
                    censored_word = censor_char * len(word)

                    # Keep first letter of word for context if desired
                    if keep_first_letter:
                        censored_word = word[0] + censored_word[1:]

                    # Replcate punctuation
                    censored_word_punc = ""
                    for c, char in enumerate(word):
                        if char in string.punctuation:
                            censored_word_punc += word[c]
                        else:
                            censored_word_punc += censored_word[c]

                    # Update word list
                    word_list[w] = censored_word_punc

                # Update sentence list
                sentence_list[s] = word_list

        # Join everything back together
        return "\n".join([" ".join(word_list) for word_list in sentence_list])