Commit
·
4e0f321
1
Parent(s):
e8ff41c
Attempt to set up application
Browse files- README.md +5 -6
- app.py +71 -0
- requirements.txt +9 -0
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-39.pyc +0 -0
- src/__pycache__/config.cpython-39.pyc +0 -0
- src/config.py +5 -0
- src/gibberish_detection/GibberishDetector.py +25 -0
- src/gibberish_detection/__init__.py +0 -0
- src/gibberish_detection/__pycache__/GibberishDetector.cpython-39.pyc +0 -0
- src/gibberish_detection/__pycache__/__init__.cpython-39.pyc +0 -0
- src/gibberish_detection/approved_comments.model +3 -0
- src/rule_based_system/BadWordRule.py +47 -0
- src/rule_based_system/HTMLRule.py +27 -0
- src/rule_based_system/LanguageRule.py +19 -0
- src/rule_based_system/PersonalDetailsRule.py +34 -0
- src/rule_based_system/Rule.py +25 -0
- src/rule_based_system/RuleBasedSystem.py +17 -0
- src/rule_based_system/TextLengthRule.py +29 -0
- src/rule_based_system/UrlRule.py +31 -0
- src/rule_based_system/Verdict.py +5 -0
- src/rule_based_system/__init__.py +0 -0
- src/rule_based_system/__pycache__/BadWordRule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/HTMLRule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/PersonalDetailsRule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/Rule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/RuleBasedSystem.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/TextLengthRule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/UrlRule.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/Verdict.cpython-39.pyc +0 -0
- src/rule_based_system/__pycache__/__init__.cpython-39.pyc +0 -0
- src/rule_based_system/word_lists/ambiguous_bad_words.csv +1467 -0
- src/rule_based_system/word_lists/strictly_bad_words.csv +100 -0
- src/start_up/__init__.py +0 -0
- src/start_up/__pycache__/__init__.cpython-39.pyc +0 -0
- src/start_up/__pycache__/start_up_bad_words_rule.cpython-39.pyc +0 -0
- src/start_up/__pycache__/start_up_gibberish.cpython-39.pyc +0 -0
- src/start_up/__pycache__/start_up_rbs.cpython-39.pyc +0 -0
- src/start_up/start_up_bad_words_rule.py +12 -0
- src/start_up/start_up_gibberish.py +9 -0
- src/start_up/start_up_rbs.py +50 -0
README.md
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: cc-by-nc-nd-4.0
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Comment Classifier
|
3 |
+
emoji: ☑️
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.28.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import from_pretrained_fastai
|
3 |
+
|
4 |
+
from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs
|
5 |
+
from src.start_up.start_up_gibberish import create_gibberish_detector
|
6 |
+
|
7 |
+
# Start up modules
|
8 |
+
# Initiate model
|
9 |
+
learner = from_pretrained_fastai("felixolivier/comment-classifier") # TODO rename this
|
10 |
+
verdict_map = {
|
11 |
+
0: 'Inappropriate',
|
12 |
+
1: 'Allowed'
|
13 |
+
}
|
14 |
+
|
15 |
+
# Initiate rule based systems (RBS)
|
16 |
+
weak_rbs = create_weak_rbs()
|
17 |
+
strong_rbs = create_strong_rbs()
|
18 |
+
|
19 |
+
# Initiate gibberish detector
|
20 |
+
gibberish_detector = create_gibberish_detector()
|
21 |
+
|
22 |
+
|
23 |
+
# Define function to judge comment
|
24 |
+
def predict(input_text):
|
25 |
+
|
26 |
+
# Get model predictions
|
27 |
+
predictions = learner.predict(input_text)
|
28 |
+
|
29 |
+
# Pass through weak RBS
|
30 |
+
allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text)
|
31 |
+
|
32 |
+
# Pass through strong RBS
|
33 |
+
allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text)
|
34 |
+
|
35 |
+
# Pass through gibberish detector
|
36 |
+
is_gibberish_free = gibberish_detector.predict(input_text)
|
37 |
+
|
38 |
+
# Construct json respons
|
39 |
+
to_return = {
|
40 |
+
'model': {
|
41 |
+
'allowed': bool(int(predictions[0])),
|
42 |
+
'verdict': verdict_map[int(predictions[0])],
|
43 |
+
'highlights': [],
|
44 |
+
'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)]
|
45 |
+
},
|
46 |
+
'gibberish': {
|
47 |
+
'allowed': bool(is_gibberish_free),
|
48 |
+
'verdict': verdict_map[int(is_gibberish_free)],
|
49 |
+
'highlights': [],
|
50 |
+
'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el]
|
51 |
+
},
|
52 |
+
'weak_rbs': {
|
53 |
+
'allowed': allows_weak,
|
54 |
+
'verdict': verdict_map[allows_weak],
|
55 |
+
'highlights': highlights_weak,
|
56 |
+
'reasons': reasons_weak
|
57 |
+
},
|
58 |
+
'strong_rbs': {
|
59 |
+
'allowed': allows_strong,
|
60 |
+
'verdict': verdict_map[allows_strong],
|
61 |
+
'highlights': highlights_strong,
|
62 |
+
'reasons': reasons_strong
|
63 |
+
},
|
64 |
+
}
|
65 |
+
|
66 |
+
return to_return
|
67 |
+
|
68 |
+
|
69 |
+
# Set up app
|
70 |
+
iface = gr.Interface(fn=predict, inputs="text", outputs="text")
|
71 |
+
iface.launch(share=False)
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
torch
|
3 |
+
fastai
|
4 |
+
fastcore
|
5 |
+
toml
|
6 |
+
gibberish-detector
|
7 |
+
pandas
|
8 |
+
urlextract
|
9 |
+
bs4
|
src/__init__.py
ADDED
File without changes
|
src/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (144 Bytes). View file
|
|
src/__pycache__/config.cpython-39.pyc
ADDED
Binary file (387 Bytes). View file
|
|
src/config.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config = {
|
2 |
+
'gibberish_model': 'src/gibberish_detection/approved_comments.model',
|
3 |
+
'bad_words_strict': 'src/rule_based_system/word_lists/strictly_bad_words.csv',
|
4 |
+
'bad_words_ambiguous': 'src/rule_based_system/word_lists/ambiguous_bad_words.csv',
|
5 |
+
}
|
src/gibberish_detection/GibberishDetector.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class GibberishDetector:
|
2 |
+
"""
|
3 |
+
Detects whether the comment is actual text or a random sequence of characters.
|
4 |
+
The model has been trained to learn character sequence probabilities from a set of approximately 4 million
|
5 |
+
approved comments.
|
6 |
+
"""
|
7 |
+
|
8 |
+
model = None
|
9 |
+
skip_sequences = ['duh', 'ah'] # tokens that are allowed even tough the model might see these as gibberish
|
10 |
+
|
11 |
+
def __init__(self, model):
|
12 |
+
self.model = model
|
13 |
+
|
14 |
+
def predict(self, text):
|
15 |
+
return not self.contains_gibberish(text)
|
16 |
+
|
17 |
+
def contains_gibberish(self, text):
|
18 |
+
|
19 |
+
# Skip over tokens that often are misjudged by the model
|
20 |
+
for skip in self.skip_sequences:
|
21 |
+
if skip in text:
|
22 |
+
return False
|
23 |
+
|
24 |
+
# Pass text through model
|
25 |
+
return self.model.is_gibberish(text)
|
src/gibberish_detection/__init__.py
ADDED
File without changes
|
src/gibberish_detection/__pycache__/GibberishDetector.cpython-39.pyc
ADDED
Binary file (1.14 kB). View file
|
|
src/gibberish_detection/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (164 Bytes). View file
|
|
src/gibberish_detection/approved_comments.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26c786281ab9642063f9b6c0ad2b07d6122cd00b5c574aec0d7be63d3d1438f6
|
3 |
+
size 29971
|
src/rule_based_system/BadWordRule.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.rule_based_system.Rule import Rule
|
2 |
+
|
3 |
+
from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
|
4 |
+
from src.rule_based_system.Verdict import Verdict
|
5 |
+
|
6 |
+
|
7 |
+
class BadWordRule(Rule):
|
8 |
+
"""
|
9 |
+
Bad words obtained from corners of the internet you do not want to visit:
|
10 |
+
- https://www.ensie.nl/scheldwoordenboek#
|
11 |
+
- https://scheldwoorden.goedbegin.nl/
|
12 |
+
- https://nl.wiktionary.org/wiki/Categorie:Scheldwoord_in_het_Nederlands
|
13 |
+
- https://www.lannoo.be/sites/default/files/books/issuu/9789401453417.pdf
|
14 |
+
- https://www.dutchmultimedia.nl/meest-verschrikkelijke-engelse-scheldwoorden/
|
15 |
+
- https://www.dutchmultimedia.nl/scheldwoordenboek-1-000-den-nederlandse-scheldwoorden/
|
16 |
+
- https://www.henkyspapiamento.com/10-papiaments-scheldwoorden-die-we-liever-niet-horen/
|
17 |
+
- https://volkabulaire.nl/tag/scheldwoorden/
|
18 |
+
- https://data.world/wordlists/dirty-naughty-obscene-and-otherwise-bad-words-in-dutch
|
19 |
+
"""
|
20 |
+
|
21 |
+
bad_words = None
|
22 |
+
|
23 |
+
def __init__(self, bad_words: list, strict: bool):
|
24 |
+
self.bad_words = bad_words
|
25 |
+
self.strict = strict
|
26 |
+
|
27 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
28 |
+
comment_text = comment_text[0:TEXT_SIZE_LIMIT]
|
29 |
+
|
30 |
+
bad_words = self.find_bad_words(comment_text.split())
|
31 |
+
|
32 |
+
return Verdict(len(bad_words) == 0, bad_words)
|
33 |
+
|
34 |
+
def find_bad_words(self, text: list) -> list:
|
35 |
+
detected_bad_words = []
|
36 |
+
for word in text:
|
37 |
+
if word in self.bad_words:
|
38 |
+
detected_bad_words.append(word)
|
39 |
+
|
40 |
+
return detected_bad_words
|
41 |
+
|
42 |
+
def is_strict(self) -> bool:
|
43 |
+
return self.strict
|
44 |
+
|
45 |
+
def get_rule_description(self) -> str:
|
46 |
+
return "Comment text contained %s inappropriate words" % \
|
47 |
+
('strictly' if self.is_strict() else 'ambiguous')
|
src/rule_based_system/HTMLRule.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from bs4 import BeautifulSoup
|
2 |
+
|
3 |
+
from src.rule_based_system.Rule import Rule
|
4 |
+
from src.rule_based_system.Verdict import Verdict
|
5 |
+
|
6 |
+
|
7 |
+
class HTMLRule(Rule):
|
8 |
+
|
9 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
10 |
+
html = self.find_html(comment_text)
|
11 |
+
|
12 |
+
return Verdict(len(html) == 0, html)
|
13 |
+
|
14 |
+
@staticmethod
|
15 |
+
def find_html(text: str) -> list:
|
16 |
+
html = BeautifulSoup(text, "html.parser").find_all()
|
17 |
+
|
18 |
+
return [str(tag) for tag in html]
|
19 |
+
|
20 |
+
def is_strict(self) -> bool:
|
21 |
+
"""
|
22 |
+
This rule occasionally removes appropriate comments when names are enclosed in triangular brackets e.g. <name>
|
23 |
+
"""
|
24 |
+
return False
|
25 |
+
|
26 |
+
def get_rule_description(self) -> str:
|
27 |
+
return 'HTML used in comment text'
|
src/rule_based_system/LanguageRule.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.rule_based_system.Rule import Rule
|
2 |
+
|
3 |
+
|
4 |
+
class LanguageRule(Rule):
|
5 |
+
|
6 |
+
def get_verdict(self, comment_text: str) -> bool:
|
7 |
+
"""
|
8 |
+
This rule requires further research. The library "langdetect" allows for language detection,
|
9 |
+
but when applied to jeugdjournaal comments returns unstable results.
|
10 |
+
A simpler approach could be by checking the percentage of non latin characters,
|
11 |
+
but not taking into account smileys and punctuation
|
12 |
+
"""
|
13 |
+
raise NotImplementedError()
|
14 |
+
|
15 |
+
def is_strict(self) -> bool:
|
16 |
+
return False
|
17 |
+
|
18 |
+
|
19 |
+
language_rule = LanguageRule()
|
src/rule_based_system/PersonalDetailsRule.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
from src.rule_based_system.Rule import Rule
|
4 |
+
from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
|
5 |
+
from src.rule_based_system.Verdict import Verdict
|
6 |
+
|
7 |
+
|
8 |
+
class PersonalDetailsRule(Rule):
|
9 |
+
|
10 |
+
def __init__(self, regexes: list, strict: bool):
|
11 |
+
self.regexes = regexes
|
12 |
+
self.strict = strict
|
13 |
+
|
14 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
15 |
+
comment_text = comment_text[0:TEXT_SIZE_LIMIT]
|
16 |
+
|
17 |
+
personal_details = self.find_personal_details(comment_text)
|
18 |
+
|
19 |
+
return Verdict(len(personal_details) == 0, personal_details)
|
20 |
+
|
21 |
+
def find_personal_details(self, text: str) -> list:
|
22 |
+
details = []
|
23 |
+
for regex in self.regexes:
|
24 |
+
matches = re.findall(regex, text)
|
25 |
+
details += matches
|
26 |
+
|
27 |
+
return details
|
28 |
+
|
29 |
+
def is_strict(self) -> bool:
|
30 |
+
return self.strict
|
31 |
+
|
32 |
+
@staticmethod
|
33 |
+
def get_rule_description() -> str:
|
34 |
+
return 'Personal details were mentioned in text'
|
src/rule_based_system/Rule.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC
|
2 |
+
|
3 |
+
from src.rule_based_system.Verdict import Verdict
|
4 |
+
|
5 |
+
|
6 |
+
class Rule(ABC):
|
7 |
+
|
8 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
9 |
+
"""
|
10 |
+
Takes the comment text as input, tests a specific rule and returns a verdict,
|
11 |
+
which contains whether the comment is allowed according to the specific rule and
|
12 |
+
contains a list of substrings in the comment that may explain why a comment was
|
13 |
+
marked as inappropriate.
|
14 |
+
"""
|
15 |
+
pass
|
16 |
+
|
17 |
+
def is_strict(self) -> bool:
|
18 |
+
"""
|
19 |
+
Returns True if rule can be used directly. False if results may be ambiguous.
|
20 |
+
"""
|
21 |
+
pass
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def get_rule_description() -> str:
|
25 |
+
pass
|
src/rule_based_system/RuleBasedSystem.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class RuleBasedSystem:
|
2 |
+
rules = []
|
3 |
+
|
4 |
+
def __init__(self, rules: list):
|
5 |
+
self.rules = rules
|
6 |
+
|
7 |
+
def allows(self, comment_text: str) -> (bool, list):
|
8 |
+
allows, reasons, highlights = True, [], []
|
9 |
+
|
10 |
+
for rule in self.rules:
|
11 |
+
verdict = rule.get_verdict(comment_text)
|
12 |
+
if not verdict.allowed:
|
13 |
+
allows = False
|
14 |
+
reasons.append(rule.get_rule_description())
|
15 |
+
highlights += verdict.highlights
|
16 |
+
|
17 |
+
return allows, reasons, highlights
|
src/rule_based_system/TextLengthRule.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.rule_based_system.Rule import Rule
|
2 |
+
from src.rule_based_system.Verdict import Verdict
|
3 |
+
|
4 |
+
TEXT_SIZE_LIMIT = 500
|
5 |
+
|
6 |
+
|
7 |
+
class TextLengthRule(Rule):
|
8 |
+
|
9 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
10 |
+
allows = True \
|
11 |
+
and not self.is_empty(comment_text) \
|
12 |
+
and not self.is_too_long(comment_text, TEXT_SIZE_LIMIT)
|
13 |
+
|
14 |
+
return Verdict(allows, [])
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def is_empty(text):
|
18 |
+
return len(text) == 0
|
19 |
+
|
20 |
+
@staticmethod
|
21 |
+
def is_too_long(text, limit):
|
22 |
+
return len(text) > limit
|
23 |
+
|
24 |
+
def is_strict(self) -> bool:
|
25 |
+
return True
|
26 |
+
|
27 |
+
@staticmethod
|
28 |
+
def get_rule_description() -> str:
|
29 |
+
return 'Inappropriate text length'
|
src/rule_based_system/UrlRule.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.rule_based_system.Rule import Rule
|
2 |
+
from src.rule_based_system.Verdict import Verdict
|
3 |
+
|
4 |
+
|
5 |
+
class UrlRule(Rule):
|
6 |
+
|
7 |
+
url_extractor = None
|
8 |
+
|
9 |
+
def __init__(self, url_extractor):
|
10 |
+
self.url_extractor = url_extractor
|
11 |
+
|
12 |
+
def get_verdict(self, comment_text: str) -> Verdict:
|
13 |
+
urls = self.find_urls(comment_text)
|
14 |
+
|
15 |
+
return Verdict(len(urls) == 0, urls)
|
16 |
+
|
17 |
+
def find_urls(self, text: str) -> list:
|
18 |
+
urls = self.url_extractor.find_urls(text)
|
19 |
+
|
20 |
+
# url_extractor does not find url with spaces. Add extra check for urls like http:// goatse info
|
21 |
+
if len(urls) == 0 and 'http' in text:
|
22 |
+
urls = ['http']
|
23 |
+
|
24 |
+
return urls
|
25 |
+
|
26 |
+
def is_strict(self) -> bool:
|
27 |
+
return True
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def get_rule_description() -> str:
|
31 |
+
return 'Url was mentioned in text'
|
src/rule_based_system/Verdict.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Verdict:
|
2 |
+
|
3 |
+
def __init__(self, allowed: bool, highlights: list = []):
|
4 |
+
self.allowed = allowed
|
5 |
+
self.highlights = highlights
|
src/rule_based_system/__init__.py
ADDED
File without changes
|
src/rule_based_system/__pycache__/BadWordRule.cpython-39.pyc
ADDED
Binary file (2.38 kB). View file
|
|
src/rule_based_system/__pycache__/HTMLRule.cpython-39.pyc
ADDED
Binary file (1.51 kB). View file
|
|
src/rule_based_system/__pycache__/PersonalDetailsRule.cpython-39.pyc
ADDED
Binary file (1.64 kB). View file
|
|
src/rule_based_system/__pycache__/Rule.cpython-39.pyc
ADDED
Binary file (1.3 kB). View file
|
|
src/rule_based_system/__pycache__/RuleBasedSystem.cpython-39.pyc
ADDED
Binary file (862 Bytes). View file
|
|
src/rule_based_system/__pycache__/TextLengthRule.cpython-39.pyc
ADDED
Binary file (1.37 kB). View file
|
|
src/rule_based_system/__pycache__/UrlRule.cpython-39.pyc
ADDED
Binary file (1.36 kB). View file
|
|
src/rule_based_system/__pycache__/Verdict.cpython-39.pyc
ADDED
Binary file (498 Bytes). View file
|
|
src/rule_based_system/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (162 Bytes). View file
|
|
src/rule_based_system/word_lists/ambiguous_bad_words.csv
ADDED
@@ -0,0 +1,1467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
parasiet
|
2 |
+
varkenskop
|
3 |
+
halvezool
|
4 |
+
pannekoek
|
5 |
+
makak
|
6 |
+
dameskapper
|
7 |
+
pennelikker
|
8 |
+
papomslag
|
9 |
+
lafaard
|
10 |
+
takketrol
|
11 |
+
lolo
|
12 |
+
flaps
|
13 |
+
steenezel
|
14 |
+
aalskakker
|
15 |
+
aarselaar
|
16 |
+
ass
|
17 |
+
gamefreak
|
18 |
+
afzeiken
|
19 |
+
zwijn
|
20 |
+
kechba
|
21 |
+
yoghurtventje
|
22 |
+
homo
|
23 |
+
tollie
|
24 |
+
knor
|
25 |
+
koolgalmug
|
26 |
+
dike
|
27 |
+
zandloper
|
28 |
+
cunnie
|
29 |
+
anita
|
30 |
+
vot
|
31 |
+
klootoog
|
32 |
+
masque
|
33 |
+
railnicht
|
34 |
+
zalfpot
|
35 |
+
zeiksnor
|
36 |
+
kapsoneslijer
|
37 |
+
kaffer
|
38 |
+
zeurkous
|
39 |
+
pasjakroet
|
40 |
+
piemel
|
41 |
+
boerenhufter
|
42 |
+
baarmoederslijmvlieg
|
43 |
+
bedrijfspoedel
|
44 |
+
fatsich
|
45 |
+
kouloh
|
46 |
+
wankjob
|
47 |
+
kaketoe
|
48 |
+
kelerelijer
|
49 |
+
lijpo
|
50 |
+
achterbuurtfossiel
|
51 |
+
camslet
|
52 |
+
gangster
|
53 |
+
kloefkapper
|
54 |
+
fuckwad
|
55 |
+
kankernicht
|
56 |
+
schaamluis
|
57 |
+
geitenbreier
|
58 |
+
muilezelin
|
59 |
+
flappie
|
60 |
+
choad
|
61 |
+
perenplukker
|
62 |
+
uiersnuiver
|
63 |
+
cockshit
|
64 |
+
aarslander
|
65 |
+
parkschijter
|
66 |
+
kuthoer
|
67 |
+
onderkruipsel
|
68 |
+
apenkont
|
69 |
+
zaadbal
|
70 |
+
aarsvis
|
71 |
+
douche-fag
|
72 |
+
olifantenlul
|
73 |
+
prod
|
74 |
+
aapmens
|
75 |
+
optyfen
|
76 |
+
makaak
|
77 |
+
piggelmee
|
78 |
+
sodemieter
|
79 |
+
naadschurftsmurfigebeftekkel
|
80 |
+
prostituee
|
81 |
+
tampony
|
82 |
+
dumbfuck
|
83 |
+
mofo
|
84 |
+
droplul
|
85 |
+
apenhaar
|
86 |
+
dakhaas
|
87 |
+
parkinghomo
|
88 |
+
mafkaas
|
89 |
+
twats
|
90 |
+
zeiker
|
91 |
+
rapenschijter
|
92 |
+
zaadzwerver
|
93 |
+
dickjuice
|
94 |
+
paardenplasserwasser
|
95 |
+
karhengst
|
96 |
+
inktfis
|
97 |
+
mafketel
|
98 |
+
assbanger
|
99 |
+
okkenootje
|
100 |
+
anita
|
101 |
+
neetoor
|
102 |
+
eierzak
|
103 |
+
jerkass
|
104 |
+
geitenneuker
|
105 |
+
papegaai
|
106 |
+
snikkel
|
107 |
+
aarsmaad
|
108 |
+
afberen
|
109 |
+
takkentrut
|
110 |
+
dakduif
|
111 |
+
badaap
|
112 |
+
muts
|
113 |
+
autist
|
114 |
+
puto
|
115 |
+
raaskallen
|
116 |
+
schaamliplikker
|
117 |
+
baarvader
|
118 |
+
faggotcock
|
119 |
+
poontang
|
120 |
+
cuntlicker
|
121 |
+
koffieboon
|
122 |
+
padoog
|
123 |
+
vuilak
|
124 |
+
drooggeiler
|
125 |
+
fag
|
126 |
+
beurt
|
127 |
+
kakkie
|
128 |
+
afrossen
|
129 |
+
pekelteef
|
130 |
+
olalul
|
131 |
+
loser
|
132 |
+
kloothannes
|
133 |
+
augurk
|
134 |
+
fuckhole
|
135 |
+
avocadoneuker
|
136 |
+
kooch
|
137 |
+
panooch
|
138 |
+
piemelhoofdje
|
139 |
+
lapzwans
|
140 |
+
fielt
|
141 |
+
graftak
|
142 |
+
idioot
|
143 |
+
kurwa
|
144 |
+
whore
|
145 |
+
fuckbag
|
146 |
+
paardenlul
|
147 |
+
schadewijf
|
148 |
+
gladjakker
|
149 |
+
bep
|
150 |
+
cockfucker
|
151 |
+
broekhoesten
|
152 |
+
kriel
|
153 |
+
cockmuncher
|
154 |
+
gladiool
|
155 |
+
schijtlijster
|
156 |
+
lomperd
|
157 |
+
loeder
|
158 |
+
pekelhoer
|
159 |
+
sanka
|
160 |
+
sekreet
|
161 |
+
dweil
|
162 |
+
oesterkut
|
163 |
+
pothoer
|
164 |
+
skeet
|
165 |
+
uit-de-baarmoeder-gerukte-tien-tepelige-homofiel
|
166 |
+
fuckin
|
167 |
+
clit
|
168 |
+
piss
|
169 |
+
aarsklodder
|
170 |
+
natkut
|
171 |
+
leipo
|
172 |
+
boemelen
|
173 |
+
facadeklasher
|
174 |
+
harpij
|
175 |
+
jam-bek
|
176 |
+
paardebeffer
|
177 |
+
aardgeest
|
178 |
+
reetkever
|
179 |
+
aap
|
180 |
+
rugridder
|
181 |
+
cock
|
182 |
+
eiermix-imitatie
|
183 |
+
haai
|
184 |
+
muffdiver
|
185 |
+
paardenanus
|
186 |
+
campinghoer
|
187 |
+
kechies
|
188 |
+
vetzak
|
189 |
+
assgoblin
|
190 |
+
bedonderen
|
191 |
+
knob
|
192 |
+
jetslet
|
193 |
+
woeshoem
|
194 |
+
homodumbshit
|
195 |
+
zalf
|
196 |
+
boekenwurm
|
197 |
+
asscracker
|
198 |
+
minger
|
199 |
+
ini-mini-scheefgepoepte-pornokabouter
|
200 |
+
halvegare
|
201 |
+
zeikstraal
|
202 |
+
gook
|
203 |
+
ruski
|
204 |
+
yogoteef
|
205 |
+
zaadvarken
|
206 |
+
sodomiet
|
207 |
+
retenlikker
|
208 |
+
aambeienlikker
|
209 |
+
kakbal
|
210 |
+
kutstreek
|
211 |
+
caveman
|
212 |
+
oester
|
213 |
+
tit
|
214 |
+
achterpoter
|
215 |
+
bellend
|
216 |
+
heeb
|
217 |
+
assbag
|
218 |
+
smeerpijp
|
219 |
+
janettescheter
|
220 |
+
ragkut
|
221 |
+
zakrat
|
222 |
+
bouwdoos
|
223 |
+
gaylord
|
224 |
+
pielemans
|
225 |
+
peul
|
226 |
+
zemmer
|
227 |
+
bloodclaat
|
228 |
+
liefdesgrot
|
229 |
+
ass-pirate
|
230 |
+
dookie
|
231 |
+
penislijder
|
232 |
+
quasimodo
|
233 |
+
slettenbak
|
234 |
+
muff
|
235 |
+
penisfucker
|
236 |
+
anustoerist
|
237 |
+
fucknutt
|
238 |
+
neukpaal
|
239 |
+
pijphomo
|
240 |
+
bumblefuck
|
241 |
+
asshole
|
242 |
+
pruim
|
243 |
+
heihaas
|
244 |
+
vaginavochtabsorbeerder
|
245 |
+
schatje
|
246 |
+
peckerhead
|
247 |
+
cunt
|
248 |
+
piggel
|
249 |
+
luibuis
|
250 |
+
anussap-verzuring
|
251 |
+
zenuwlijer
|
252 |
+
mothafuckin'
|
253 |
+
paal
|
254 |
+
hell
|
255 |
+
shitter
|
256 |
+
billelikkertje
|
257 |
+
mafkees
|
258 |
+
flamoes
|
259 |
+
parkiet
|
260 |
+
uienreet
|
261 |
+
achterlader
|
262 |
+
bitchtits
|
263 |
+
honkey
|
264 |
+
splooge
|
265 |
+
slag
|
266 |
+
adderengebroedsels
|
267 |
+
janksnor
|
268 |
+
fucktard
|
269 |
+
fucktart
|
270 |
+
rotmof
|
271 |
+
insekt
|
272 |
+
kringspiermusketier
|
273 |
+
cockburger
|
274 |
+
shitbag
|
275 |
+
naarling
|
276 |
+
luibak
|
277 |
+
cooter
|
278 |
+
sod-off
|
279 |
+
racechinees
|
280 |
+
bruinwerker
|
281 |
+
racesjinees
|
282 |
+
gaybob
|
283 |
+
superkut
|
284 |
+
hondenlul
|
285 |
+
klapkut
|
286 |
+
foefkop
|
287 |
+
lijer
|
288 |
+
pigmentvreter
|
289 |
+
stinker
|
290 |
+
paarsedruiveneikel
|
291 |
+
fuckup
|
292 |
+
baanduivel
|
293 |
+
lauwbikker
|
294 |
+
gay
|
295 |
+
dicksucking
|
296 |
+
hakker
|
297 |
+
vag
|
298 |
+
kontneuker
|
299 |
+
nul
|
300 |
+
kruimelbuik
|
301 |
+
assface
|
302 |
+
oen
|
303 |
+
junglebunny
|
304 |
+
aaprikaan
|
305 |
+
anuspiloot
|
306 |
+
landloper
|
307 |
+
punani
|
308 |
+
pijpbekkieg
|
309 |
+
lameass
|
310 |
+
penis
|
311 |
+
tollielikker
|
312 |
+
dònchin
|
313 |
+
pampa
|
314 |
+
huisdealer
|
315 |
+
pierlala
|
316 |
+
zwijnjak
|
317 |
+
pepermuntvreter
|
318 |
+
gadoodengeefmijbrood
|
319 |
+
viespeuk
|
320 |
+
anuspiraat
|
321 |
+
pijpert
|
322 |
+
pijpmonk
|
323 |
+
egocentrischebullebak
|
324 |
+
ezel
|
325 |
+
kenau
|
326 |
+
vergallen
|
327 |
+
assmunch
|
328 |
+
zeikhannes
|
329 |
+
queer
|
330 |
+
arsehole
|
331 |
+
yid
|
332 |
+
raas
|
333 |
+
infanterist
|
334 |
+
dickmilk
|
335 |
+
tamponkut
|
336 |
+
ettertje
|
337 |
+
mcfagget
|
338 |
+
labrat
|
339 |
+
picorniekop
|
340 |
+
darmgas-wildebras
|
341 |
+
wabipi
|
342 |
+
cumtart
|
343 |
+
acrobaat
|
344 |
+
centenschraper
|
345 |
+
aarsbaard
|
346 |
+
autokraat
|
347 |
+
suckass
|
348 |
+
nigga
|
349 |
+
choco
|
350 |
+
tang
|
351 |
+
pijppiloot
|
352 |
+
beaner
|
353 |
+
morsebel
|
354 |
+
takketut
|
355 |
+
dirkdoos
|
356 |
+
bastard
|
357 |
+
faggit
|
358 |
+
modderduivel
|
359 |
+
asbestmuis
|
360 |
+
damhoer
|
361 |
+
cuntass
|
362 |
+
oksel
|
363 |
+
passoa-slet
|
364 |
+
kakkerlak
|
365 |
+
zeekbos
|
366 |
+
mick
|
367 |
+
edammer
|
368 |
+
darmkikker
|
369 |
+
pezewever
|
370 |
+
zibi
|
371 |
+
plucheplakker
|
372 |
+
nachtuil
|
373 |
+
xylofoonneuker
|
374 |
+
lampenhoer
|
375 |
+
anusreflector
|
376 |
+
fuckhead
|
377 |
+
billig
|
378 |
+
wippen
|
379 |
+
poonani
|
380 |
+
politiemuts
|
381 |
+
schuurmeid
|
382 |
+
naadhopper
|
383 |
+
jap
|
384 |
+
palenlaaier
|
385 |
+
dwaas
|
386 |
+
spuiten
|
387 |
+
renob
|
388 |
+
pokkenwijf
|
389 |
+
lampelul
|
390 |
+
naaiaap
|
391 |
+
dreuzel
|
392 |
+
aardpiraat
|
393 |
+
doshi
|
394 |
+
arrefakker
|
395 |
+
schaamhaarverzamelaar
|
396 |
+
piemelbrie
|
397 |
+
randdebiel
|
398 |
+
lesbo
|
399 |
+
chocoladesnol
|
400 |
+
kechje
|
401 |
+
testicle
|
402 |
+
nitwit
|
403 |
+
reetroeier
|
404 |
+
kadaver
|
405 |
+
shitbrains
|
406 |
+
asslicker
|
407 |
+
crapuul
|
408 |
+
vampier
|
409 |
+
bitchboy
|
410 |
+
baarmoederscheefgepoept-kuttekind
|
411 |
+
goddamn
|
412 |
+
zaadvragende-budcontainer
|
413 |
+
asswipe
|
414 |
+
gayfuck
|
415 |
+
olifantebabyface
|
416 |
+
standje
|
417 |
+
fucking
|
418 |
+
dumbass
|
419 |
+
kankerhond
|
420 |
+
slapper
|
421 |
+
maf
|
422 |
+
poot
|
423 |
+
rammelhoofd
|
424 |
+
maangezicht
|
425 |
+
reutelen
|
426 |
+
kakboer
|
427 |
+
opzouten
|
428 |
+
paardenpijper
|
429 |
+
cumbubble
|
430 |
+
douchebag
|
431 |
+
temeier
|
432 |
+
boerenkaffer
|
433 |
+
cockmonkey
|
434 |
+
teef
|
435 |
+
slempen
|
436 |
+
zaadhamsteraar
|
437 |
+
bokkenrijder
|
438 |
+
aambeienschoffelaar
|
439 |
+
pushi
|
440 |
+
deggo
|
441 |
+
brilsmurf
|
442 |
+
anuspuntverknetteraar
|
443 |
+
asscock
|
444 |
+
piemelgymmer
|
445 |
+
dickface
|
446 |
+
fuckbutt
|
447 |
+
penisspuiter
|
448 |
+
appelzwijn
|
449 |
+
pensklepper
|
450 |
+
schlemiel
|
451 |
+
badmuts
|
452 |
+
slijmerd
|
453 |
+
asshat
|
454 |
+
aarsridder
|
455 |
+
schelm
|
456 |
+
hoerenjong
|
457 |
+
schurftkop
|
458 |
+
keilef
|
459 |
+
punanny
|
460 |
+
moederneuker
|
461 |
+
zakkendroller
|
462 |
+
bermslet
|
463 |
+
smeg
|
464 |
+
lauwwaterdrinker
|
465 |
+
zaadspuitende
|
466 |
+
ragnicht
|
467 |
+
zaadcontainer
|
468 |
+
tietvlieg
|
469 |
+
engerd
|
470 |
+
zaadslikker
|
471 |
+
achterlijke
|
472 |
+
shittiest
|
473 |
+
meretrix
|
474 |
+
papzak
|
475 |
+
aarsbeffer
|
476 |
+
ectoplasma
|
477 |
+
paardenpenislikker
|
478 |
+
kaashoer
|
479 |
+
nageboorte
|
480 |
+
reetveger
|
481 |
+
queerhole
|
482 |
+
pokkenlijder
|
483 |
+
makako
|
484 |
+
scrote
|
485 |
+
darmcoureur
|
486 |
+
gajeskop
|
487 |
+
apekop
|
488 |
+
ossenkop
|
489 |
+
hondenneuker
|
490 |
+
nasivreter
|
491 |
+
lawaaipapegaai
|
492 |
+
bezem
|
493 |
+
flapkut
|
494 |
+
asshopper
|
495 |
+
djuka
|
496 |
+
shitass
|
497 |
+
jp
|
498 |
+
uil
|
499 |
+
zaadschieter
|
500 |
+
knijpkont
|
501 |
+
kreng
|
502 |
+
zandhaas
|
503 |
+
zenuwenlijer
|
504 |
+
lesbian
|
505 |
+
ballen
|
506 |
+
asshead
|
507 |
+
hol
|
508 |
+
aarsketser
|
509 |
+
schurk
|
510 |
+
inktviskut
|
511 |
+
bonk
|
512 |
+
indoending
|
513 |
+
wanker
|
514 |
+
goddam
|
515 |
+
snitch
|
516 |
+
huppelkut
|
517 |
+
coronalijer
|
518 |
+
kike
|
519 |
+
naaien
|
520 |
+
buttfucka
|
521 |
+
zaadvrager
|
522 |
+
dòlò
|
523 |
+
kuttenkop
|
524 |
+
hamerhomo
|
525 |
+
vaginalehangflapper
|
526 |
+
haaibaai
|
527 |
+
munging
|
528 |
+
dickhead
|
529 |
+
klafte
|
530 |
+
netenvreter
|
531 |
+
kaashaas
|
532 |
+
rampenbek
|
533 |
+
prick
|
534 |
+
befborstel
|
535 |
+
labbekak
|
536 |
+
vjayjay
|
537 |
+
manwijf
|
538 |
+
dumbshit
|
539 |
+
apengaper
|
540 |
+
aardworst
|
541 |
+
dickwad
|
542 |
+
boeler
|
543 |
+
peniskokerlikker
|
544 |
+
klapluis
|
545 |
+
clitfuck
|
546 |
+
chink
|
547 |
+
kippenneuker
|
548 |
+
klier
|
549 |
+
mafkikker
|
550 |
+
konthond
|
551 |
+
asslick
|
552 |
+
spintmijt
|
553 |
+
bugger
|
554 |
+
tonto
|
555 |
+
palingstekker
|
556 |
+
gaytard
|
557 |
+
afrotten
|
558 |
+
aalscholver
|
559 |
+
phenomeen
|
560 |
+
ouwehoeren
|
561 |
+
twatlips
|
562 |
+
opsodemieteren
|
563 |
+
cocknugget
|
564 |
+
kaapstander
|
565 |
+
schaamhaaretendebaffer
|
566 |
+
schijtlaars
|
567 |
+
bille-likkers
|
568 |
+
nutsack
|
569 |
+
piaprisma
|
570 |
+
zakbreuk
|
571 |
+
kloothommel
|
572 |
+
janlul
|
573 |
+
jizz
|
574 |
+
fuckersucker
|
575 |
+
triootje
|
576 |
+
aarsatleet
|
577 |
+
shitface
|
578 |
+
kluns
|
579 |
+
bollox
|
580 |
+
buffelen
|
581 |
+
kontol
|
582 |
+
krielkip
|
583 |
+
familycocktail
|
584 |
+
babaloekoe
|
585 |
+
piemelwijf
|
586 |
+
ito
|
587 |
+
roetmop
|
588 |
+
assbite
|
589 |
+
fucked
|
590 |
+
bhenchod
|
591 |
+
flikken
|
592 |
+
walvisganger
|
593 |
+
fabricagefout
|
594 |
+
botterik
|
595 |
+
droogkloot
|
596 |
+
aso-big
|
597 |
+
cocksmith
|
598 |
+
wipkip
|
599 |
+
pindachinees
|
600 |
+
rekel
|
601 |
+
darmstad
|
602 |
+
sukkel
|
603 |
+
populist
|
604 |
+
verdomme
|
605 |
+
beheime
|
606 |
+
pijpen
|
607 |
+
shitty
|
608 |
+
nazi
|
609 |
+
kutvent
|
610 |
+
hamel
|
611 |
+
potver
|
612 |
+
wuftje
|
613 |
+
achterbaksestoephoer
|
614 |
+
papist
|
615 |
+
rampdebiel
|
616 |
+
dickfucker
|
617 |
+
fuckwit
|
618 |
+
pleurislijder
|
619 |
+
kakbroek
|
620 |
+
sandnigger
|
621 |
+
penispijper
|
622 |
+
perebek
|
623 |
+
smeerkanis
|
624 |
+
apekloot
|
625 |
+
lantarenslet
|
626 |
+
lomperik
|
627 |
+
clitface
|
628 |
+
kapoen
|
629 |
+
pestboer
|
630 |
+
boerenkinkel
|
631 |
+
uit-de-baarmoeder-geruktedeurknop
|
632 |
+
watervlo
|
633 |
+
tittyfuck
|
634 |
+
anuspapegaai
|
635 |
+
johny
|
636 |
+
lezzie
|
637 |
+
miljaar
|
638 |
+
vlerk
|
639 |
+
ass-hat
|
640 |
+
palen
|
641 |
+
proleet
|
642 |
+
zeurpiet
|
643 |
+
yvesmoeder
|
644 |
+
hersenlijer
|
645 |
+
balls
|
646 |
+
tyfuslijder
|
647 |
+
whorebag
|
648 |
+
pedocyclomasturbant
|
649 |
+
zaadsnuiver
|
650 |
+
fascist
|
651 |
+
arroganteklitzuigende-wrattenhoofd
|
652 |
+
shitcanned
|
653 |
+
afrukken
|
654 |
+
grobbejanus
|
655 |
+
zakkenvuller
|
656 |
+
kettingzeugg
|
657 |
+
fagbag
|
658 |
+
baarmoeder
|
659 |
+
daaps
|
660 |
+
naadje
|
661 |
+
dicksucker
|
662 |
+
remsporen
|
663 |
+
pollock
|
664 |
+
schaap
|
665 |
+
zaksel
|
666 |
+
toeter
|
667 |
+
kakmadam
|
668 |
+
holtor
|
669 |
+
dronkenlap
|
670 |
+
maaghond
|
671 |
+
lamzak
|
672 |
+
zeikbeer
|
673 |
+
aarsbanaan
|
674 |
+
stinkbok
|
675 |
+
djoeka
|
676 |
+
fucker
|
677 |
+
zaadheks
|
678 |
+
paki
|
679 |
+
aftrekken
|
680 |
+
sambalburger
|
681 |
+
satan
|
682 |
+
konjo
|
683 |
+
faggot
|
684 |
+
zeiken
|
685 |
+
raamneger
|
686 |
+
kraut
|
687 |
+
shitting
|
688 |
+
maaghoer
|
689 |
+
paardenpenis
|
690 |
+
palingkop
|
691 |
+
bekken
|
692 |
+
biljartbal
|
693 |
+
breezerslet
|
694 |
+
fellatio
|
695 |
+
cunnilingus
|
696 |
+
appelflap
|
697 |
+
cow
|
698 |
+
aartschurk
|
699 |
+
autocraat
|
700 |
+
pietlut
|
701 |
+
stomkop
|
702 |
+
pussies
|
703 |
+
shitbagger
|
704 |
+
gash
|
705 |
+
dago
|
706 |
+
fuckoff
|
707 |
+
boner
|
708 |
+
motherfucking
|
709 |
+
matennaaier
|
710 |
+
puistenkop
|
711 |
+
tangelteef
|
712 |
+
clunge
|
713 |
+
klootviool
|
714 |
+
quasimodom
|
715 |
+
assclown
|
716 |
+
naaimachine
|
717 |
+
trien
|
718 |
+
pantoffelneuker
|
719 |
+
wratzwijn
|
720 |
+
cumjockey
|
721 |
+
axwound
|
722 |
+
inktlul
|
723 |
+
éénoog
|
724 |
+
engerling
|
725 |
+
hoerenkind
|
726 |
+
piemelpatser
|
727 |
+
shiz
|
728 |
+
rapalje
|
729 |
+
geit
|
730 |
+
specht
|
731 |
+
darmtoerist
|
732 |
+
hapsnurker
|
733 |
+
mispunt
|
734 |
+
polesmoker
|
735 |
+
kinkel
|
736 |
+
hoerenjager
|
737 |
+
cuntrag
|
738 |
+
spleetoog
|
739 |
+
aambeienbeffer
|
740 |
+
baby
|
741 |
+
carnavals-inca
|
742 |
+
kaaskop
|
743 |
+
wafelijzer
|
744 |
+
zaadnaad
|
745 |
+
lekkerpieper
|
746 |
+
sabberkut
|
747 |
+
chesticle
|
748 |
+
pielesnuiver
|
749 |
+
okselharenbillereetzweetzuur
|
750 |
+
schietmot
|
751 |
+
nonce
|
752 |
+
assshole
|
753 |
+
fuckbutter
|
754 |
+
bampot
|
755 |
+
lafbek
|
756 |
+
zaadhoer
|
757 |
+
assnigger
|
758 |
+
pinda
|
759 |
+
piek
|
760 |
+
assfuck
|
761 |
+
gesodemieter
|
762 |
+
vlooienzak
|
763 |
+
latexnicht
|
764 |
+
nigaboo
|
765 |
+
tits
|
766 |
+
tard
|
767 |
+
zeikerd
|
768 |
+
guido
|
769 |
+
egoist
|
770 |
+
noppeshoer
|
771 |
+
cockknoker
|
772 |
+
gluiperd
|
773 |
+
nageboortegezwe
|
774 |
+
jandoedel
|
775 |
+
cocksmoker
|
776 |
+
bolle
|
777 |
+
wannabee
|
778 |
+
taboetstabiske
|
779 |
+
aardworm
|
780 |
+
wank
|
781 |
+
aarsneger
|
782 |
+
kankerlijer
|
783 |
+
ganzeboard
|
784 |
+
uit-de-baarmoeder-gerukte-tien-tepelige-kut-trut
|
785 |
+
passoatrut
|
786 |
+
cocksniffer
|
787 |
+
carnavalskaper
|
788 |
+
campusnerd
|
789 |
+
babypoedersnuiver
|
790 |
+
totebel
|
791 |
+
vleesroos
|
792 |
+
penishoofd
|
793 |
+
schobbejak
|
794 |
+
wandelende-reklame-voor-kachelglans
|
795 |
+
paardelullo
|
796 |
+
aarsgarnaal
|
797 |
+
poon
|
798 |
+
carpetmuncher
|
799 |
+
anus
|
800 |
+
lelijkerd
|
801 |
+
coon
|
802 |
+
git
|
803 |
+
assshit
|
804 |
+
fanny
|
805 |
+
kakteef
|
806 |
+
badgast
|
807 |
+
flauwerd
|
808 |
+
lamagezicht
|
809 |
+
hockeytut
|
810 |
+
cockmongler
|
811 |
+
oelewap
|
812 |
+
parel
|
813 |
+
dickweed
|
814 |
+
patagonier
|
815 |
+
kankerlijder
|
816 |
+
buttfucker
|
817 |
+
barslet
|
818 |
+
watje
|
819 |
+
dickweasel
|
820 |
+
lèrchi
|
821 |
+
reet
|
822 |
+
dòndòn
|
823 |
+
mothafucka
|
824 |
+
cazzo
|
825 |
+
besodemieteren
|
826 |
+
geteisem
|
827 |
+
nanoneuker
|
828 |
+
klojo
|
829 |
+
olifantenschimmelslurf
|
830 |
+
flessentrekker
|
831 |
+
strontzak
|
832 |
+
karonje
|
833 |
+
poep
|
834 |
+
zakcel
|
835 |
+
zwakbegaafde
|
836 |
+
fuckass
|
837 |
+
hoempert
|
838 |
+
zaadstaafteler
|
839 |
+
kontos
|
840 |
+
coochie
|
841 |
+
kontkruiper
|
842 |
+
armoedverspreider
|
843 |
+
shiznit
|
844 |
+
aarsworm
|
845 |
+
pandapikkenpijper
|
846 |
+
deuggleuf
|
847 |
+
tapijtnek
|
848 |
+
waailap
|
849 |
+
cockjockey
|
850 |
+
piezer
|
851 |
+
cumslut
|
852 |
+
fietspompenlullegezicht
|
853 |
+
azteek
|
854 |
+
kechie
|
855 |
+
soepkip
|
856 |
+
barg
|
857 |
+
aarsvijand
|
858 |
+
schijtluis
|
859 |
+
cockhead
|
860 |
+
sapcentrifuge
|
861 |
+
apenkut
|
862 |
+
prij
|
863 |
+
smeerlap
|
864 |
+
heks
|
865 |
+
dipshit
|
866 |
+
mikrodebiel
|
867 |
+
baarmoeder-discokip
|
868 |
+
heaumeau
|
869 |
+
truthola
|
870 |
+
mietje
|
871 |
+
asfaltridder
|
872 |
+
cumdumpster
|
873 |
+
aarslikker
|
874 |
+
klotenklapper
|
875 |
+
afzuigen
|
876 |
+
eeltneus
|
877 |
+
shag
|
878 |
+
heikneuter
|
879 |
+
sal
|
880 |
+
edeldarm
|
881 |
+
kakker
|
882 |
+
paardenpikkenpoetser
|
883 |
+
variétéhoer
|
884 |
+
cockface
|
885 |
+
koeskoesvreter
|
886 |
+
galsmoel
|
887 |
+
paardekut
|
888 |
+
galkankerdebiel
|
889 |
+
pleurislaaier
|
890 |
+
dickhole
|
891 |
+
imbeciel
|
892 |
+
lamstraal
|
893 |
+
assmuncher
|
894 |
+
goddamnit
|
895 |
+
leuter
|
896 |
+
rabauter
|
897 |
+
jigaboo
|
898 |
+
gannef
|
899 |
+
sakkers
|
900 |
+
jetonslikker
|
901 |
+
viruswappie
|
902 |
+
fok
|
903 |
+
eend
|
904 |
+
anuspenetreerder
|
905 |
+
kamerolifant
|
906 |
+
macaronivreter
|
907 |
+
gans
|
908 |
+
sacoche
|
909 |
+
bint
|
910 |
+
boerenkarhengst
|
911 |
+
baardaap
|
912 |
+
gooch
|
913 |
+
spast
|
914 |
+
wasbakkenpisser
|
915 |
+
zaadje
|
916 |
+
shithouse
|
917 |
+
secreet
|
918 |
+
lullo
|
919 |
+
badjufbeffer
|
920 |
+
linkmiegel
|
921 |
+
zeur
|
922 |
+
ass-jabber
|
923 |
+
pukkelbek
|
924 |
+
sambalvreter
|
925 |
+
wanhopigedarmrukker
|
926 |
+
del
|
927 |
+
incel
|
928 |
+
maan-rovers
|
929 |
+
klapperaap
|
930 |
+
kamelenneuker
|
931 |
+
zanikert
|
932 |
+
anuspieper
|
933 |
+
fuckbrain
|
934 |
+
flikker
|
935 |
+
pijpenkoplikker
|
936 |
+
beest
|
937 |
+
charlatan
|
938 |
+
wetback
|
939 |
+
bullshit
|
940 |
+
cockwaffle
|
941 |
+
krijslijster
|
942 |
+
aarsaap
|
943 |
+
langstaart
|
944 |
+
kaalkop
|
945 |
+
pijphoer
|
946 |
+
zatmuil
|
947 |
+
zeiklijster
|
948 |
+
doos
|
949 |
+
penisbanger
|
950 |
+
anussabbelaar
|
951 |
+
shit
|
952 |
+
tampontrekker
|
953 |
+
schijthoofd
|
954 |
+
reetridder
|
955 |
+
coochy
|
956 |
+
griet
|
957 |
+
fleer
|
958 |
+
afwerkplaats
|
959 |
+
aardvarken
|
960 |
+
balhaar
|
961 |
+
gambaslet
|
962 |
+
spook
|
963 |
+
pik
|
964 |
+
pummel
|
965 |
+
assjacker
|
966 |
+
daaglaker
|
967 |
+
uilskuiken
|
968 |
+
luiskop
|
969 |
+
jeannette
|
970 |
+
waterhoofd
|
971 |
+
dagblinde
|
972 |
+
zoubi
|
973 |
+
zemel
|
974 |
+
fuckboy
|
975 |
+
shitcunt
|
976 |
+
pijpdeuramateur
|
977 |
+
hangjas
|
978 |
+
naakt
|
979 |
+
aansteller
|
980 |
+
snoever
|
981 |
+
twatwaffle
|
982 |
+
vaginavarken
|
983 |
+
patser
|
984 |
+
douche
|
985 |
+
cretin
|
986 |
+
kyke
|
987 |
+
darmneuker
|
988 |
+
hond
|
989 |
+
pierewipper
|
990 |
+
kwakzalver
|
991 |
+
giechelkut
|
992 |
+
feltch
|
993 |
+
taig
|
994 |
+
nondeju
|
995 |
+
krentekakker
|
996 |
+
piesen
|
997 |
+
takenboeker
|
998 |
+
vetklep
|
999 |
+
wrattenzwijn
|
1000 |
+
nietsnut
|
1001 |
+
pijpelullebakkes
|
1002 |
+
lamlul
|
1003 |
+
galgenbrok
|
1004 |
+
jagoff
|
1005 |
+
mierenneuker
|
1006 |
+
kaasrandknabbelaar
|
1007 |
+
pijpmond
|
1008 |
+
graftyfuskankerpleuris
|
1009 |
+
teringlijder
|
1010 |
+
aarsmade
|
1011 |
+
hode
|
1012 |
+
godver
|
1013 |
+
kantoorpik
|
1014 |
+
piemelboxer
|
1015 |
+
mosselhoer
|
1016 |
+
kakmaker
|
1017 |
+
kontengezicht
|
1018 |
+
zwartjoekel
|
1019 |
+
cracker
|
1020 |
+
apache
|
1021 |
+
okselschimmelhoer
|
1022 |
+
pias
|
1023 |
+
piel
|
1024 |
+
klere
|
1025 |
+
capsonestrut
|
1026 |
+
zeekbakkes
|
1027 |
+
shithole
|
1028 |
+
publiciteitsgeil
|
1029 |
+
debiel
|
1030 |
+
judas
|
1031 |
+
mispruim
|
1032 |
+
kanker
|
1033 |
+
hangbuikvarkenkonteneuker
|
1034 |
+
zakkenwasser
|
1035 |
+
lardass
|
1036 |
+
godverdomme
|
1037 |
+
kloot
|
1038 |
+
flapdrol
|
1039 |
+
maajen
|
1040 |
+
assbandit
|
1041 |
+
asssucker
|
1042 |
+
baccilledrager
|
1043 |
+
shitdick
|
1044 |
+
fucknut
|
1045 |
+
asperge
|
1046 |
+
gat
|
1047 |
+
anusuitscheiding
|
1048 |
+
beaver
|
1049 |
+
cuntface
|
1050 |
+
shitstain
|
1051 |
+
darmafval
|
1052 |
+
zapoteek
|
1053 |
+
draaikont
|
1054 |
+
apenbatser
|
1055 |
+
baffer
|
1056 |
+
hamsterpampe
|
1057 |
+
chinc
|
1058 |
+
ruigpoot
|
1059 |
+
aardappelluis
|
1060 |
+
spic
|
1061 |
+
schijterd
|
1062 |
+
cocksucker
|
1063 |
+
pantyneuker
|
1064 |
+
titfuck
|
1065 |
+
dickwod
|
1066 |
+
motherfucker
|
1067 |
+
pedopenis
|
1068 |
+
niglet
|
1069 |
+
waterdrinker
|
1070 |
+
tering
|
1071 |
+
quikteef
|
1072 |
+
humping
|
1073 |
+
brotherfucker
|
1074 |
+
pampaman
|
1075 |
+
snatch
|
1076 |
+
zaadstengel
|
1077 |
+
shitspitter
|
1078 |
+
penispuffer
|
1079 |
+
oelewapper
|
1080 |
+
galbak
|
1081 |
+
olijfneuker
|
1082 |
+
butt-pirate
|
1083 |
+
hondsvot
|
1084 |
+
aarsfreter
|
1085 |
+
pekelharing
|
1086 |
+
naaidoos
|
1087 |
+
zaadvreter
|
1088 |
+
pendeu
|
1089 |
+
aarshaarluis
|
1090 |
+
farmer
|
1091 |
+
vandaal
|
1092 |
+
tongzoeng
|
1093 |
+
dickmonger
|
1094 |
+
okselhaas
|
1095 |
+
uitgekotstekamelenkut
|
1096 |
+
hondenkut
|
1097 |
+
ouwehoedendoos
|
1098 |
+
cunthole
|
1099 |
+
pazop
|
1100 |
+
clusterfuck
|
1101 |
+
hoerenzoon
|
1102 |
+
camperhoer
|
1103 |
+
palurk
|
1104 |
+
quartrozo
|
1105 |
+
chode
|
1106 |
+
aardpeer
|
1107 |
+
pussy
|
1108 |
+
klaplul
|
1109 |
+
munter
|
1110 |
+
pagadder
|
1111 |
+
penozeg
|
1112 |
+
pindapoepchinees
|
1113 |
+
crap
|
1114 |
+
aars
|
1115 |
+
fatass
|
1116 |
+
asswad
|
1117 |
+
bengel
|
1118 |
+
paashaasschaamhaarverzamelaar
|
1119 |
+
patin
|
1120 |
+
carnavals-mussolini
|
1121 |
+
polak
|
1122 |
+
flamer
|
1123 |
+
neger
|
1124 |
+
hamsterpijper
|
1125 |
+
gringo
|
1126 |
+
etterbak
|
1127 |
+
autodidact
|
1128 |
+
slutbag
|
1129 |
+
zaadslet
|
1130 |
+
flathoer
|
1131 |
+
pissed
|
1132 |
+
arse
|
1133 |
+
takkehoer
|
1134 |
+
keutelkut
|
1135 |
+
lambak
|
1136 |
+
rapey
|
1137 |
+
aarskind
|
1138 |
+
haas
|
1139 |
+
porchmonkey
|
1140 |
+
jambekslikker
|
1141 |
+
dumshit
|
1142 |
+
satéboer
|
1143 |
+
campingslet
|
1144 |
+
bosneuker
|
1145 |
+
hamsteraar
|
1146 |
+
aardappelhoer
|
1147 |
+
bukkake
|
1148 |
+
merde
|
1149 |
+
kokosmakroon
|
1150 |
+
pijpslet
|
1151 |
+
nicht
|
1152 |
+
covidioot
|
1153 |
+
tyfus
|
1154 |
+
gedoogzone
|
1155 |
+
tokkie
|
1156 |
+
nigger
|
1157 |
+
belazeren
|
1158 |
+
dickbag
|
1159 |
+
pijpgeit
|
1160 |
+
dickslap
|
1161 |
+
bitchass
|
1162 |
+
kontelikker
|
1163 |
+
bloody
|
1164 |
+
pijpbek
|
1165 |
+
stootje
|
1166 |
+
taart
|
1167 |
+
kaasknobbel
|
1168 |
+
bangerd
|
1169 |
+
pierlalla
|
1170 |
+
zandvlo
|
1171 |
+
uit-de-baarmoeder-gerukte-mislukte-kuttekop
|
1172 |
+
palmboomneuker
|
1173 |
+
cum
|
1174 |
+
cuntslut
|
1175 |
+
moffenhoer
|
1176 |
+
schaamlipgezwel
|
1177 |
+
troelala
|
1178 |
+
oppleuren
|
1179 |
+
doochbag
|
1180 |
+
kutlul
|
1181 |
+
braadvarken
|
1182 |
+
klooien
|
1183 |
+
hockeytrut
|
1184 |
+
snotolf
|
1185 |
+
sodeflikker
|
1186 |
+
asspirate
|
1187 |
+
kroeskop
|
1188 |
+
zattekop
|
1189 |
+
cockmongruel
|
1190 |
+
lampekap
|
1191 |
+
wasknijper
|
1192 |
+
zaagselkop
|
1193 |
+
shitfaced
|
1194 |
+
douchewaffle
|
1195 |
+
lavabosmoel
|
1196 |
+
paardenneuker
|
1197 |
+
aarshaarverzamelaar
|
1198 |
+
aambeihaarwasser
|
1199 |
+
vullis
|
1200 |
+
smiecht
|
1201 |
+
asielzoeker
|
1202 |
+
gratenbaal
|
1203 |
+
rimjob
|
1204 |
+
boerenheikneuter
|
1205 |
+
gabber
|
1206 |
+
gluiper
|
1207 |
+
apenjong
|
1208 |
+
apenwaffel
|
1209 |
+
ossenlul
|
1210 |
+
pedo
|
1211 |
+
pecker
|
1212 |
+
qwarf
|
1213 |
+
parkeerkut
|
1214 |
+
wop
|
1215 |
+
zuiplap
|
1216 |
+
greppeldel
|
1217 |
+
tart
|
1218 |
+
sodeju
|
1219 |
+
kruiper
|
1220 |
+
ramenlikker
|
1221 |
+
huzarenhoop
|
1222 |
+
eikellikker
|
1223 |
+
aarslog
|
1224 |
+
klotenbijter
|
1225 |
+
galgengebroed
|
1226 |
+
pakul
|
1227 |
+
dicktickler
|
1228 |
+
satraap
|
1229 |
+
tankreet
|
1230 |
+
palingvel
|
1231 |
+
anusridder
|
1232 |
+
baggerduiker
|
1233 |
+
matje
|
1234 |
+
lap
|
1235 |
+
snotaap
|
1236 |
+
babok
|
1237 |
+
ebverzuiper
|
1238 |
+
hangbuikzwijn
|
1239 |
+
uit-de-baarmoeder-gerukte-tien-tepelige-koeie-stront
|
1240 |
+
truus
|
1241 |
+
etter
|
1242 |
+
assfucker
|
1243 |
+
gaywad
|
1244 |
+
kankerhoer
|
1245 |
+
boomer
|
1246 |
+
aarshaarknabbelaar
|
1247 |
+
bloedlijer
|
1248 |
+
damn
|
1249 |
+
eendekweker
|
1250 |
+
lantaarnpaalhoer
|
1251 |
+
pissflaps
|
1252 |
+
piemelaar
|
1253 |
+
zemmel
|
1254 |
+
cumguzzler
|
1255 |
+
fagtard
|
1256 |
+
varken
|
1257 |
+
dumass
|
1258 |
+
batti
|
1259 |
+
dick-sneeze
|
1260 |
+
cocksmoke
|
1261 |
+
gayfuckist
|
1262 |
+
paplap
|
1263 |
+
aardbanaan
|
1264 |
+
raketpiraat
|
1265 |
+
stinkerd
|
1266 |
+
vagina
|
1267 |
+
yoghurtpisser
|
1268 |
+
miegels
|
1269 |
+
geilneef
|
1270 |
+
baarmoederbuikwandschimmel
|
1271 |
+
makreel
|
1272 |
+
cockass
|
1273 |
+
kaasnicht
|
1274 |
+
schimmelkut
|
1275 |
+
negro
|
1276 |
+
cockmaster
|
1277 |
+
ho
|
1278 |
+
kootch
|
1279 |
+
shithead
|
1280 |
+
fucks
|
1281 |
+
penisworstelaar
|
1282 |
+
twat
|
1283 |
+
sloerie
|
1284 |
+
jenevertrut
|
1285 |
+
patjepeeër
|
1286 |
+
abortuskind
|
1287 |
+
dickbeaters
|
1288 |
+
schlong
|
1289 |
+
augurkenneuker
|
1290 |
+
schaapskop
|
1291 |
+
ploert
|
1292 |
+
klooi
|
1293 |
+
welzijnsmafia
|
1294 |
+
asbak
|
1295 |
+
assmonkey
|
1296 |
+
drol
|
1297 |
+
druiloor
|
1298 |
+
cijferneuker
|
1299 |
+
gluipsnor
|
1300 |
+
shitbreath
|
1301 |
+
kwakkieg
|
1302 |
+
fuckwitt
|
1303 |
+
bollocks
|
1304 |
+
skank
|
1305 |
+
fuckface
|
1306 |
+
peniskoker
|
1307 |
+
cockbite
|
1308 |
+
cocknose
|
1309 |
+
gratenkut
|
1310 |
+
campesneurdo
|
1311 |
+
pijpzuiger
|
1312 |
+
schaamlikkerkip
|
1313 |
+
plurk
|
1314 |
+
schapenneuker
|
1315 |
+
addergebroed
|
1316 |
+
zeurzak
|
1317 |
+
moffrika
|
1318 |
+
kanen
|
1319 |
+
rukken
|
1320 |
+
piemelpiraat
|
1321 |
+
aso
|
1322 |
+
mariku
|
1323 |
+
zak
|
1324 |
+
cretino
|
1325 |
+
satehoer
|
1326 |
+
kont
|
1327 |
+
pommel
|
1328 |
+
pedopyrofilatomaan
|
1329 |
+
taalnazi
|
1330 |
+
eikel
|
1331 |
+
absjaar
|
1332 |
+
ginger
|
1333 |
+
internetverslaafde-neuker
|
1334 |
+
ponylul
|
1335 |
+
pot
|
1336 |
+
eendenkont
|
1337 |
+
koe
|
1338 |
+
poepen
|
1339 |
+
patjakker
|
1340 |
+
kleumer
|
1341 |
+
koño
|
1342 |
+
zuigen
|
1343 |
+
zadelruiker
|
1344 |
+
offroadie
|
1345 |
+
apenrukker
|
1346 |
+
schweinhund
|
1347 |
+
niggers
|
1348 |
+
klaarkomen
|
1349 |
+
hammehapper
|
1350 |
+
augurkenturk
|
1351 |
+
zwartjan
|
1352 |
+
skullfuck
|
1353 |
+
bruisend
|
1354 |
+
sikbille
|
1355 |
+
kebber
|
1356 |
+
anusvreter
|
1357 |
+
handvat
|
1358 |
+
quist
|
1359 |
+
dickfuck
|
1360 |
+
paardebrei
|
1361 |
+
pedomeester
|
1362 |
+
spaghettivreter
|
1363 |
+
mapangpang
|
1364 |
+
chocoladehoer
|
1365 |
+
gadverdamme
|
1366 |
+
babbalipviskikker
|
1367 |
+
klote
|
1368 |
+
oelepoeper
|
1369 |
+
minge
|
1370 |
+
aarstulp
|
1371 |
+
ransaap
|
1372 |
+
slut
|
1373 |
+
bitchy
|
1374 |
+
tampontoerist
|
1375 |
+
graftakketeef
|
1376 |
+
penislikker
|
1377 |
+
queef
|
1378 |
+
dyke
|
1379 |
+
oezo
|
1380 |
+
paardelid
|
1381 |
+
tiefus
|
1382 |
+
krimper
|
1383 |
+
klerebeer
|
1384 |
+
etterbuil
|
1385 |
+
jood
|
1386 |
+
spick
|
1387 |
+
handjob
|
1388 |
+
malaka
|
1389 |
+
paardenpikkerslikker
|
1390 |
+
aapjesviller
|
1391 |
+
papagaaiekuttenbefkonijn
|
1392 |
+
aarsbaars
|
1393 |
+
pakhuisrat
|
1394 |
+
bitches
|
1395 |
+
flamingo
|
1396 |
+
aarshaarfohner
|
1397 |
+
armoedzaaier
|
1398 |
+
schaampik
|
1399 |
+
mafklapper
|
1400 |
+
piemelteef
|
1401 |
+
piemelpunt
|
1402 |
+
gayass
|
1403 |
+
flatslet
|
1404 |
+
maffer
|
1405 |
+
belhamel
|
1406 |
+
vajayjay
|
1407 |
+
capsoneslijer
|
1408 |
+
pestbek
|
1409 |
+
eikelkluiver
|
1410 |
+
tifus
|
1411 |
+
whoreface
|
1412 |
+
potverdorie
|
1413 |
+
ouwehoer
|
1414 |
+
yoyolul
|
1415 |
+
bulderbast
|
1416 |
+
potverdikkeme
|
1417 |
+
jackass
|
1418 |
+
broodaap
|
1419 |
+
raceplee
|
1420 |
+
poonany
|
1421 |
+
schuinsmarcheerder
|
1422 |
+
pestlijder
|
1423 |
+
dildo
|
1424 |
+
aasgier
|
1425 |
+
kak
|
1426 |
+
zeekmem
|
1427 |
+
apenflikkerneuker
|
1428 |
+
fileklever
|
1429 |
+
peukenneuker
|
1430 |
+
kwal
|
1431 |
+
cholera
|
1432 |
+
kontonbonker
|
1433 |
+
darmfloraflirter
|
1434 |
+
nakko
|
1435 |
+
vingeren
|
1436 |
+
bleekscheet
|
1437 |
+
papabaf
|
1438 |
+
dicks
|
1439 |
+
vaselinevriend
|
1440 |
+
dick
|
1441 |
+
klootjesvolk
|
1442 |
+
varkenshol
|
1443 |
+
peniszuiger
|
1444 |
+
cabaretlul
|
1445 |
+
adder
|
1446 |
+
pedomaan
|
1447 |
+
eendeneikel
|
1448 |
+
klerelijer
|
1449 |
+
va-j-j
|
1450 |
+
brugpieper
|
1451 |
+
blowjob
|
1452 |
+
invertebraat
|
1453 |
+
unclefucker
|
1454 |
+
zuurpruim
|
1455 |
+
punta
|
1456 |
+
sloef
|
1457 |
+
gaydo
|
1458 |
+
smous
|
1459 |
+
fudgepacker
|
1460 |
+
bosneger
|
1461 |
+
asses
|
1462 |
+
kakhuis
|
1463 |
+
parelridder
|
1464 |
+
vaking
|
1465 |
+
downie
|
1466 |
+
dikke
|
1467 |
+
dikzak
|
src/rule_based_system/word_lists/strictly_bad_words.csv
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
klootzak
|
2 |
+
hoer
|
3 |
+
bitch
|
4 |
+
slet
|
5 |
+
trut
|
6 |
+
kutwijf
|
7 |
+
mongool
|
8 |
+
aflebberen
|
9 |
+
beffen
|
10 |
+
bef
|
11 |
+
bokkelul
|
12 |
+
botergeil
|
13 |
+
boerelul
|
14 |
+
dombo
|
15 |
+
boerenpummel
|
16 |
+
godverdamme
|
17 |
+
hoer
|
18 |
+
hoerenbuurt
|
19 |
+
hoerenloper
|
20 |
+
hoerig
|
21 |
+
hufter
|
22 |
+
klootzak
|
23 |
+
kloten
|
24 |
+
kontneuken
|
25 |
+
kut
|
26 |
+
kuttelikkertje
|
27 |
+
lul
|
28 |
+
lul-de-behanger
|
29 |
+
lulhannes
|
30 |
+
lummel
|
31 |
+
mof
|
32 |
+
neuken
|
33 |
+
neukstier
|
34 |
+
oetlul
|
35 |
+
opgeilen
|
36 |
+
opkankeren
|
37 |
+
oprotten
|
38 |
+
paardelul
|
39 |
+
portiekslet
|
40 |
+
rothoer
|
41 |
+
rotzak
|
42 |
+
rukhond
|
43 |
+
schijt
|
44 |
+
schijten
|
45 |
+
schoft
|
46 |
+
slet
|
47 |
+
sletterig
|
48 |
+
slik mijn zaad
|
49 |
+
snol
|
50 |
+
standje-69
|
51 |
+
stoephoer
|
52 |
+
stront
|
53 |
+
sufferd
|
54 |
+
teringlijer
|
55 |
+
trottoir prostituée
|
56 |
+
trottoirteef
|
57 |
+
verkloten
|
58 |
+
verneuken
|
59 |
+
wijf
|
60 |
+
fuck
|
61 |
+
fuk
|
62 |
+
fack
|
63 |
+
fak
|
64 |
+
fock
|
65 |
+
afgebefte del
|
66 |
+
prickteaser
|
67 |
+
aarshaar
|
68 |
+
paardekloot
|
69 |
+
thundercunt
|
70 |
+
aarsbeer
|
71 |
+
feck
|
72 |
+
quikhoer
|
73 |
+
galgenaas
|
74 |
+
bafaap
|
75 |
+
fagfucker
|
76 |
+
pussylicking
|
77 |
+
apeneuker
|
78 |
+
internetslet
|
79 |
+
takkenteef
|
80 |
+
augurklul
|
81 |
+
queerbait
|
82 |
+
aarsslijm
|
83 |
+
paashaasneuker
|
84 |
+
pijpendemonkyboy
|
85 |
+
fuckstick
|
86 |
+
piemelpuist
|
87 |
+
penispukkelpuist
|
88 |
+
schaamluisbedrieger
|
89 |
+
eikelbijter
|
90 |
+
kech
|
91 |
+
kahba
|
92 |
+
dikoe
|
93 |
+
conjo
|
94 |
+
kaulo
|
95 |
+
kowed
|
96 |
+
poeta
|
97 |
+
puta
|
98 |
+
tabon
|
99 |
+
uchi
|
100 |
+
ucci
|
src/start_up/__init__.py
ADDED
File without changes
|
src/start_up/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (153 Bytes). View file
|
|
src/start_up/__pycache__/start_up_bad_words_rule.cpython-39.pyc
ADDED
Binary file (497 Bytes). View file
|
|
src/start_up/__pycache__/start_up_gibberish.cpython-39.pyc
ADDED
Binary file (530 Bytes). View file
|
|
src/start_up/__pycache__/start_up_rbs.cpython-39.pyc
ADDED
Binary file (1.8 kB). View file
|
|
src/start_up/start_up_bad_words_rule.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
from src.rule_based_system.BadWordRule import BadWordRule
|
4 |
+
|
5 |
+
|
6 |
+
def create_bad_word_rule(path, is_strict):
|
7 |
+
df = pd.read_csv(path, header=None)
|
8 |
+
df.columns = ['word']
|
9 |
+
|
10 |
+
bad_words = df['word'].values
|
11 |
+
|
12 |
+
return BadWordRule(bad_words, is_strict)
|
src/start_up/start_up_gibberish.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.gibberish_detection.GibberishDetector import GibberishDetector
|
2 |
+
from gibberish_detector import detector
|
3 |
+
from src.config import config
|
4 |
+
|
5 |
+
|
6 |
+
def create_gibberish_detector():
|
7 |
+
model = detector.create_from_model(config['gibberish_model'])
|
8 |
+
|
9 |
+
return GibberishDetector(model)
|
src/start_up/start_up_rbs.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
from urlextract import URLExtract
|
4 |
+
|
5 |
+
from src.start_up.start_up_bad_words_rule import create_bad_word_rule
|
6 |
+
from src.config import config
|
7 |
+
from src.rule_based_system.HTMLRule import HTMLRule
|
8 |
+
from src.rule_based_system.PersonalDetailsRule import PersonalDetailsRule
|
9 |
+
from src.rule_based_system.RuleBasedSystem import RuleBasedSystem
|
10 |
+
from src.rule_based_system.TextLengthRule import TextLengthRule
|
11 |
+
from src.rule_based_system.UrlRule import UrlRule
|
12 |
+
|
13 |
+
|
14 |
+
def create_strong_rbs() -> RuleBasedSystem:
|
15 |
+
text_length_rule = TextLengthRule()
|
16 |
+
|
17 |
+
url_rule = UrlRule(URLExtract())
|
18 |
+
|
19 |
+
mail_rule = PersonalDetailsRule([r'[\w.+-]+@[\w-]+\.[\w.-]+'], True)
|
20 |
+
|
21 |
+
strict_bad_word_rule = create_bad_word_rule(config['bad_words_strict'], True)
|
22 |
+
|
23 |
+
return RuleBasedSystem([
|
24 |
+
text_length_rule, # todo: check if this make sense to add here, 500 was our own chosen max length
|
25 |
+
url_rule,
|
26 |
+
mail_rule,
|
27 |
+
strict_bad_word_rule
|
28 |
+
])
|
29 |
+
|
30 |
+
|
31 |
+
def create_weak_rbs() -> RuleBasedSystem:
|
32 |
+
phone_regex = r"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"
|
33 |
+
phone_home_local = re.compile(r".*?(\(?\d{3}\D{0,3}\d{2}\D{0,3}\d{2}).*?", re.S)
|
34 |
+
phone_home = re.compile(r".*?(\(?\d{3}\D{0,3}\d{3}\D{0,3}\d{2}\D{0,3}\d{2}).*?", re.S)
|
35 |
+
phone_mobile = re.compile(r".*?(\(?\d{2}\D{0,3}\d{3}\D{0,3}\d{3}\D{0,3}\d{2}).*?", re.S)
|
36 |
+
phone_mobile_international = re.compile(r".*?(\(?\d{3}\D{0,3}\d{3}\D{0,3}\d{3}\D{0,3}\d{2}).*?", re.S)
|
37 |
+
|
38 |
+
phone_regexes = [phone_regex, phone_home_local, phone_home, phone_mobile, phone_mobile_international]
|
39 |
+
phone_number_rule = PersonalDetailsRule(phone_regexes, False)
|
40 |
+
|
41 |
+
html_rule = HTMLRule()
|
42 |
+
|
43 |
+
ambiguous_bad_word_rule = create_bad_word_rule(config['bad_words_ambiguous'], False)
|
44 |
+
|
45 |
+
# rule systems
|
46 |
+
return RuleBasedSystem([
|
47 |
+
phone_number_rule,
|
48 |
+
html_rule,
|
49 |
+
ambiguous_bad_word_rule
|
50 |
+
])
|