File size: 1,624 Bytes
d9b077a
 
 
 
04d336a
d9b077a
 
04d336a
d9b077a
04d336a
d9b077a
04d336a
 
 
d9b077a
04d336a
 
 
d9b077a
04d336a
 
 
 
d9b077a
04d336a
 
 
d9b077a
04d336a
 
 
d9b077a
04d336a
 
 
 
 
 
 
d9b077a
04d336a
 
d9b077a
04d336a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import string

import requests

BANNED_LIST_URL = "https://raw.githubusercontent.com/snguyenthanh/better_profanity/master/better_profanity/profanity_wordlist.txt"


def censor(text="", censor_char="*", keep_first_letter=True):

    banned_list = requests.get(BANNED_LIST_URL).text.split("\n")

    # Split sentences by newline
    sentence_list = text.split("\n")
    for s, sentence in enumerate(sentence_list):

        # Split words in sentence by space
        word_list = sentence.split()
        for w, word in enumerate(word_list):

            # Process word to match banned list
            processed_word = word.translate(
                str.maketrans("", "", string.punctuation)
            ).lower()

            # Replace if word is profane
            if processed_word in banned_list:
                censored_word = censor_char * len(word)

                # Keep first letter of word for context if desired
                if keep_first_letter:
                    censored_word = word[0] + censored_word[1:]

                # Replcate punctuation
                censored_word_punc = ""
                for c, char in enumerate(word):
                    if char in string.punctuation:
                        censored_word_punc += word[c]
                    else:
                        censored_word_punc += censored_word[c]

                # Update word list
                word_list[w] = censored_word_punc

            # Update sentence list
            sentence_list[s] = word_list

    # Join everything back together
    return "\n".join([" ".join(word_list) for word_list in sentence_list])