nosdigitalmedia commited on
Commit
4e0f321
·
1 Parent(s): e8ff41c

Attempt to set up application

Browse files
Files changed (41) hide show
  1. README.md +5 -6
  2. app.py +71 -0
  3. requirements.txt +9 -0
  4. src/__init__.py +0 -0
  5. src/__pycache__/__init__.cpython-39.pyc +0 -0
  6. src/__pycache__/config.cpython-39.pyc +0 -0
  7. src/config.py +5 -0
  8. src/gibberish_detection/GibberishDetector.py +25 -0
  9. src/gibberish_detection/__init__.py +0 -0
  10. src/gibberish_detection/__pycache__/GibberishDetector.cpython-39.pyc +0 -0
  11. src/gibberish_detection/__pycache__/__init__.cpython-39.pyc +0 -0
  12. src/gibberish_detection/approved_comments.model +3 -0
  13. src/rule_based_system/BadWordRule.py +47 -0
  14. src/rule_based_system/HTMLRule.py +27 -0
  15. src/rule_based_system/LanguageRule.py +19 -0
  16. src/rule_based_system/PersonalDetailsRule.py +34 -0
  17. src/rule_based_system/Rule.py +25 -0
  18. src/rule_based_system/RuleBasedSystem.py +17 -0
  19. src/rule_based_system/TextLengthRule.py +29 -0
  20. src/rule_based_system/UrlRule.py +31 -0
  21. src/rule_based_system/Verdict.py +5 -0
  22. src/rule_based_system/__init__.py +0 -0
  23. src/rule_based_system/__pycache__/BadWordRule.cpython-39.pyc +0 -0
  24. src/rule_based_system/__pycache__/HTMLRule.cpython-39.pyc +0 -0
  25. src/rule_based_system/__pycache__/PersonalDetailsRule.cpython-39.pyc +0 -0
  26. src/rule_based_system/__pycache__/Rule.cpython-39.pyc +0 -0
  27. src/rule_based_system/__pycache__/RuleBasedSystem.cpython-39.pyc +0 -0
  28. src/rule_based_system/__pycache__/TextLengthRule.cpython-39.pyc +0 -0
  29. src/rule_based_system/__pycache__/UrlRule.cpython-39.pyc +0 -0
  30. src/rule_based_system/__pycache__/Verdict.cpython-39.pyc +0 -0
  31. src/rule_based_system/__pycache__/__init__.cpython-39.pyc +0 -0
  32. src/rule_based_system/word_lists/ambiguous_bad_words.csv +1467 -0
  33. src/rule_based_system/word_lists/strictly_bad_words.csv +100 -0
  34. src/start_up/__init__.py +0 -0
  35. src/start_up/__pycache__/__init__.cpython-39.pyc +0 -0
  36. src/start_up/__pycache__/start_up_bad_words_rule.cpython-39.pyc +0 -0
  37. src/start_up/__pycache__/start_up_gibberish.cpython-39.pyc +0 -0
  38. src/start_up/__pycache__/start_up_rbs.cpython-39.pyc +0 -0
  39. src/start_up/start_up_bad_words_rule.py +12 -0
  40. src/start_up/start_up_gibberish.py +9 -0
  41. src/start_up/start_up_rbs.py +50 -0
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Dutch Youth Comment Classifier
3
- emoji: 📉
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 3.33.1
8
  app_file: app.py
9
  pinned: false
10
- license: cc-by-nc-nd-4.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Comment Classifier
3
+ emoji: ☑️
4
+ colorFrom: pink
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.28.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import from_pretrained_fastai
3
+
4
+ from src.start_up.start_up_rbs import create_weak_rbs, create_strong_rbs
5
+ from src.start_up.start_up_gibberish import create_gibberish_detector
6
+
7
+ # Start up modules
8
+ # Initiate model
9
+ learner = from_pretrained_fastai("felixolivier/comment-classifier") # TODO rename this
10
+ verdict_map = {
11
+ 0: 'Inappropriate',
12
+ 1: 'Allowed'
13
+ }
14
+
15
+ # Initiate rule based systems (RBS)
16
+ weak_rbs = create_weak_rbs()
17
+ strong_rbs = create_strong_rbs()
18
+
19
+ # Initiate gibberish detector
20
+ gibberish_detector = create_gibberish_detector()
21
+
22
+
23
+ # Define function to judge comment
24
+ def predict(input_text):
25
+
26
+ # Get model predictions
27
+ predictions = learner.predict(input_text)
28
+
29
+ # Pass through weak RBS
30
+ allows_weak, reasons_weak, highlights_weak = weak_rbs.allows(input_text)
31
+
32
+ # Pass through strong RBS
33
+ allows_strong, reasons_strong, highlights_strong = strong_rbs.allows(input_text)
34
+
35
+ # Pass through gibberish detector
36
+ is_gibberish_free = gibberish_detector.predict(input_text)
37
+
38
+ # Construct json respons
39
+ to_return = {
40
+ 'model': {
41
+ 'allowed': bool(int(predictions[0])),
42
+ 'verdict': verdict_map[int(predictions[0])],
43
+ 'highlights': [],
44
+ 'reasons': ['Machine learning model does not approve' for el in [predictions[0]] if not int(el)]
45
+ },
46
+ 'gibberish': {
47
+ 'allowed': bool(is_gibberish_free),
48
+ 'verdict': verdict_map[int(is_gibberish_free)],
49
+ 'highlights': [],
50
+ 'reasons': ['Comment text contains gibberish' for el in [is_gibberish_free] if not el]
51
+ },
52
+ 'weak_rbs': {
53
+ 'allowed': allows_weak,
54
+ 'verdict': verdict_map[allows_weak],
55
+ 'highlights': highlights_weak,
56
+ 'reasons': reasons_weak
57
+ },
58
+ 'strong_rbs': {
59
+ 'allowed': allows_strong,
60
+ 'verdict': verdict_map[allows_strong],
61
+ 'highlights': highlights_strong,
62
+ 'reasons': reasons_strong
63
+ },
64
+ }
65
+
66
+ return to_return
67
+
68
+
69
+ # Set up app
70
+ iface = gr.Interface(fn=predict, inputs="text", outputs="text")
71
+ iface.launch(share=False)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ fastai
4
+ fastcore
5
+ toml
6
+ gibberish-detector
7
+ pandas
8
+ urlextract
9
+ bs4
src/__init__.py ADDED
File without changes
src/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (144 Bytes). View file
 
src/__pycache__/config.cpython-39.pyc ADDED
Binary file (387 Bytes). View file
 
src/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ config = {
2
+ 'gibberish_model': 'src/gibberish_detection/approved_comments.model',
3
+ 'bad_words_strict': 'src/rule_based_system/word_lists/strictly_bad_words.csv',
4
+ 'bad_words_ambiguous': 'src/rule_based_system/word_lists/ambiguous_bad_words.csv',
5
+ }
src/gibberish_detection/GibberishDetector.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class GibberishDetector:
2
+ """
3
+ Detects whether the comment is actual text or a random sequence of characters.
4
+ The model has been trained to learn character sequence probabilities from a set of approximately 4 million
5
+ approved comments.
6
+ """
7
+
8
+ model = None
9
+ skip_sequences = ['duh', 'ah'] # tokens that are allowed even tough the model might see these as gibberish
10
+
11
+ def __init__(self, model):
12
+ self.model = model
13
+
14
+ def predict(self, text):
15
+ return not self.contains_gibberish(text)
16
+
17
+ def contains_gibberish(self, text):
18
+
19
+ # Skip over tokens that often are misjudged by the model
20
+ for skip in self.skip_sequences:
21
+ if skip in text:
22
+ return False
23
+
24
+ # Pass text through model
25
+ return self.model.is_gibberish(text)
src/gibberish_detection/__init__.py ADDED
File without changes
src/gibberish_detection/__pycache__/GibberishDetector.cpython-39.pyc ADDED
Binary file (1.14 kB). View file
 
src/gibberish_detection/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (164 Bytes). View file
 
src/gibberish_detection/approved_comments.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c786281ab9642063f9b6c0ad2b07d6122cd00b5c574aec0d7be63d3d1438f6
3
+ size 29971
src/rule_based_system/BadWordRule.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.rule_based_system.Rule import Rule
2
+
3
+ from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
4
+ from src.rule_based_system.Verdict import Verdict
5
+
6
+
7
+ class BadWordRule(Rule):
8
+ """
9
+ Bad words obtained from corners of the internet you do not want to visit:
10
+ - https://www.ensie.nl/scheldwoordenboek#
11
+ - https://scheldwoorden.goedbegin.nl/
12
+ - https://nl.wiktionary.org/wiki/Categorie:Scheldwoord_in_het_Nederlands
13
+ - https://www.lannoo.be/sites/default/files/books/issuu/9789401453417.pdf
14
+ - https://www.dutchmultimedia.nl/meest-verschrikkelijke-engelse-scheldwoorden/
15
+ - https://www.dutchmultimedia.nl/scheldwoordenboek-1-000-den-nederlandse-scheldwoorden/
16
+ - https://www.henkyspapiamento.com/10-papiaments-scheldwoorden-die-we-liever-niet-horen/
17
+ - https://volkabulaire.nl/tag/scheldwoorden/
18
+ - https://data.world/wordlists/dirty-naughty-obscene-and-otherwise-bad-words-in-dutch
19
+ """
20
+
21
+ bad_words = None
22
+
23
+ def __init__(self, bad_words: list, strict: bool):
24
+ self.bad_words = bad_words
25
+ self.strict = strict
26
+
27
+ def get_verdict(self, comment_text: str) -> Verdict:
28
+ comment_text = comment_text[0:TEXT_SIZE_LIMIT]
29
+
30
+ bad_words = self.find_bad_words(comment_text.split())
31
+
32
+ return Verdict(len(bad_words) == 0, bad_words)
33
+
34
+ def find_bad_words(self, text: list) -> list:
35
+ detected_bad_words = []
36
+ for word in text:
37
+ if word in self.bad_words:
38
+ detected_bad_words.append(word)
39
+
40
+ return detected_bad_words
41
+
42
+ def is_strict(self) -> bool:
43
+ return self.strict
44
+
45
+ def get_rule_description(self) -> str:
46
+ return "Comment text contained %s inappropriate words" % \
47
+ ('strictly' if self.is_strict() else 'ambiguous')
src/rule_based_system/HTMLRule.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from bs4 import BeautifulSoup
2
+
3
+ from src.rule_based_system.Rule import Rule
4
+ from src.rule_based_system.Verdict import Verdict
5
+
6
+
7
+ class HTMLRule(Rule):
8
+
9
+ def get_verdict(self, comment_text: str) -> Verdict:
10
+ html = self.find_html(comment_text)
11
+
12
+ return Verdict(len(html) == 0, html)
13
+
14
+ @staticmethod
15
+ def find_html(text: str) -> list:
16
+ html = BeautifulSoup(text, "html.parser").find_all()
17
+
18
+ return [str(tag) for tag in html]
19
+
20
+ def is_strict(self) -> bool:
21
+ """
22
+ This rule occasionally removes appropriate comments when names are enclosed in triangular brackets e.g. <name>
23
+ """
24
+ return False
25
+
26
+ def get_rule_description(self) -> str:
27
+ return 'HTML used in comment text'
src/rule_based_system/LanguageRule.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.rule_based_system.Rule import Rule
2
+
3
+
4
+ class LanguageRule(Rule):
5
+
6
+ def get_verdict(self, comment_text: str) -> bool:
7
+ """
8
+ This rule requires further research. The library "langdetect" allows for language detection,
9
+ but when applied to jeugdjournaal comments returns unstable results.
10
+ A simpler approach could be by checking the percentage of non latin characters,
11
+ but not taking into account smileys and punctuation
12
+ """
13
+ raise NotImplementedError()
14
+
15
+ def is_strict(self) -> bool:
16
+ return False
17
+
18
+
19
+ language_rule = LanguageRule()
src/rule_based_system/PersonalDetailsRule.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from src.rule_based_system.Rule import Rule
4
+ from src.rule_based_system.TextLengthRule import TEXT_SIZE_LIMIT
5
+ from src.rule_based_system.Verdict import Verdict
6
+
7
+
8
+ class PersonalDetailsRule(Rule):
9
+
10
+ def __init__(self, regexes: list, strict: bool):
11
+ self.regexes = regexes
12
+ self.strict = strict
13
+
14
+ def get_verdict(self, comment_text: str) -> Verdict:
15
+ comment_text = comment_text[0:TEXT_SIZE_LIMIT]
16
+
17
+ personal_details = self.find_personal_details(comment_text)
18
+
19
+ return Verdict(len(personal_details) == 0, personal_details)
20
+
21
+ def find_personal_details(self, text: str) -> list:
22
+ details = []
23
+ for regex in self.regexes:
24
+ matches = re.findall(regex, text)
25
+ details += matches
26
+
27
+ return details
28
+
29
+ def is_strict(self) -> bool:
30
+ return self.strict
31
+
32
+ @staticmethod
33
+ def get_rule_description() -> str:
34
+ return 'Personal details were mentioned in text'
src/rule_based_system/Rule.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC
2
+
3
+ from src.rule_based_system.Verdict import Verdict
4
+
5
+
6
+ class Rule(ABC):
7
+
8
+ def get_verdict(self, comment_text: str) -> Verdict:
9
+ """
10
+ Takes the comment text as input, tests a specific rule and returns a verdict,
11
+ which contains whether the comment is allowed according to the specific rule and
12
+ contains a list of substrings in the comment that may explain why a comment was
13
+ marked as inappropriate.
14
+ """
15
+ pass
16
+
17
+ def is_strict(self) -> bool:
18
+ """
19
+ Returns True if rule can be used directly. False if results may be ambiguous.
20
+ """
21
+ pass
22
+
23
+ @staticmethod
24
+ def get_rule_description() -> str:
25
+ pass
src/rule_based_system/RuleBasedSystem.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class RuleBasedSystem:
2
+ rules = []
3
+
4
+ def __init__(self, rules: list):
5
+ self.rules = rules
6
+
7
+ def allows(self, comment_text: str) -> (bool, list):
8
+ allows, reasons, highlights = True, [], []
9
+
10
+ for rule in self.rules:
11
+ verdict = rule.get_verdict(comment_text)
12
+ if not verdict.allowed:
13
+ allows = False
14
+ reasons.append(rule.get_rule_description())
15
+ highlights += verdict.highlights
16
+
17
+ return allows, reasons, highlights
src/rule_based_system/TextLengthRule.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.rule_based_system.Rule import Rule
2
+ from src.rule_based_system.Verdict import Verdict
3
+
4
+ TEXT_SIZE_LIMIT = 500
5
+
6
+
7
+ class TextLengthRule(Rule):
8
+
9
+ def get_verdict(self, comment_text: str) -> Verdict:
10
+ allows = True \
11
+ and not self.is_empty(comment_text) \
12
+ and not self.is_too_long(comment_text, TEXT_SIZE_LIMIT)
13
+
14
+ return Verdict(allows, [])
15
+
16
+ @staticmethod
17
+ def is_empty(text):
18
+ return len(text) == 0
19
+
20
+ @staticmethod
21
+ def is_too_long(text, limit):
22
+ return len(text) > limit
23
+
24
+ def is_strict(self) -> bool:
25
+ return True
26
+
27
+ @staticmethod
28
+ def get_rule_description() -> str:
29
+ return 'Inappropriate text length'
src/rule_based_system/UrlRule.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from src.rule_based_system.Rule import Rule
2
+ from src.rule_based_system.Verdict import Verdict
3
+
4
+
5
+ class UrlRule(Rule):
6
+
7
+ url_extractor = None
8
+
9
+ def __init__(self, url_extractor):
10
+ self.url_extractor = url_extractor
11
+
12
+ def get_verdict(self, comment_text: str) -> Verdict:
13
+ urls = self.find_urls(comment_text)
14
+
15
+ return Verdict(len(urls) == 0, urls)
16
+
17
+ def find_urls(self, text: str) -> list:
18
+ urls = self.url_extractor.find_urls(text)
19
+
20
+ # url_extractor does not find url with spaces. Add extra check for urls like http:// goatse info
21
+ if len(urls) == 0 and 'http' in text:
22
+ urls = ['http']
23
+
24
+ return urls
25
+
26
+ def is_strict(self) -> bool:
27
+ return True
28
+
29
+ @staticmethod
30
+ def get_rule_description() -> str:
31
+ return 'Url was mentioned in text'
src/rule_based_system/Verdict.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ class Verdict:
2
+
3
+ def __init__(self, allowed: bool, highlights: list = []):
4
+ self.allowed = allowed
5
+ self.highlights = highlights
src/rule_based_system/__init__.py ADDED
File without changes
src/rule_based_system/__pycache__/BadWordRule.cpython-39.pyc ADDED
Binary file (2.38 kB). View file
 
src/rule_based_system/__pycache__/HTMLRule.cpython-39.pyc ADDED
Binary file (1.51 kB). View file
 
src/rule_based_system/__pycache__/PersonalDetailsRule.cpython-39.pyc ADDED
Binary file (1.64 kB). View file
 
src/rule_based_system/__pycache__/Rule.cpython-39.pyc ADDED
Binary file (1.3 kB). View file
 
src/rule_based_system/__pycache__/RuleBasedSystem.cpython-39.pyc ADDED
Binary file (862 Bytes). View file
 
src/rule_based_system/__pycache__/TextLengthRule.cpython-39.pyc ADDED
Binary file (1.37 kB). View file
 
src/rule_based_system/__pycache__/UrlRule.cpython-39.pyc ADDED
Binary file (1.36 kB). View file
 
src/rule_based_system/__pycache__/Verdict.cpython-39.pyc ADDED
Binary file (498 Bytes). View file
 
src/rule_based_system/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (162 Bytes). View file
 
src/rule_based_system/word_lists/ambiguous_bad_words.csv ADDED
@@ -0,0 +1,1467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ parasiet
2
+ varkenskop
3
+ halvezool
4
+ pannekoek
5
+ makak
6
+ dameskapper
7
+ pennelikker
8
+ papomslag
9
+ lafaard
10
+ takketrol
11
+ lolo
12
+ flaps
13
+ steenezel
14
+ aalskakker
15
+ aarselaar
16
+ ass
17
+ gamefreak
18
+ afzeiken
19
+ zwijn
20
+ kechba
21
+ yoghurtventje
22
+ homo
23
+ tollie
24
+ knor
25
+ koolgalmug
26
+ dike
27
+ zandloper
28
+ cunnie
29
+ anita
30
+ vot
31
+ klootoog
32
+ masque
33
+ railnicht
34
+ zalfpot
35
+ zeiksnor
36
+ kapsoneslijer
37
+ kaffer
38
+ zeurkous
39
+ pasjakroet
40
+ piemel
41
+ boerenhufter
42
+ baarmoederslijmvlieg
43
+ bedrijfspoedel
44
+ fatsich
45
+ kouloh
46
+ wankjob
47
+ kaketoe
48
+ kelerelijer
49
+ lijpo
50
+ achterbuurtfossiel
51
+ camslet
52
+ gangster
53
+ kloefkapper
54
+ fuckwad
55
+ kankernicht
56
+ schaamluis
57
+ geitenbreier
58
+ muilezelin
59
+ flappie
60
+ choad
61
+ perenplukker
62
+ uiersnuiver
63
+ cockshit
64
+ aarslander
65
+ parkschijter
66
+ kuthoer
67
+ onderkruipsel
68
+ apenkont
69
+ zaadbal
70
+ aarsvis
71
+ douche-fag
72
+ olifantenlul
73
+ prod
74
+ aapmens
75
+ optyfen
76
+ makaak
77
+ piggelmee
78
+ sodemieter
79
+ naadschurftsmurfigebeftekkel
80
+ prostituee
81
+ tampony
82
+ dumbfuck
83
+ mofo
84
+ droplul
85
+ apenhaar
86
+ dakhaas
87
+ parkinghomo
88
+ mafkaas
89
+ twats
90
+ zeiker
91
+ rapenschijter
92
+ zaadzwerver
93
+ dickjuice
94
+ paardenplasserwasser
95
+ karhengst
96
+ inktfis
97
+ mafketel
98
+ assbanger
99
+ okkenootje
100
+ anita
101
+ neetoor
102
+ eierzak
103
+ jerkass
104
+ geitenneuker
105
+ papegaai
106
+ snikkel
107
+ aarsmaad
108
+ afberen
109
+ takkentrut
110
+ dakduif
111
+ badaap
112
+ muts
113
+ autist
114
+ puto
115
+ raaskallen
116
+ schaamliplikker
117
+ baarvader
118
+ faggotcock
119
+ poontang
120
+ cuntlicker
121
+ koffieboon
122
+ padoog
123
+ vuilak
124
+ drooggeiler
125
+ fag
126
+ beurt
127
+ kakkie
128
+ afrossen
129
+ pekelteef
130
+ olalul
131
+ loser
132
+ kloothannes
133
+ augurk
134
+ fuckhole
135
+ avocadoneuker
136
+ kooch
137
+ panooch
138
+ piemelhoofdje
139
+ lapzwans
140
+ fielt
141
+ graftak
142
+ idioot
143
+ kurwa
144
+ whore
145
+ fuckbag
146
+ paardenlul
147
+ schadewijf
148
+ gladjakker
149
+ bep
150
+ cockfucker
151
+ broekhoesten
152
+ kriel
153
+ cockmuncher
154
+ gladiool
155
+ schijtlijster
156
+ lomperd
157
+ loeder
158
+ pekelhoer
159
+ sanka
160
+ sekreet
161
+ dweil
162
+ oesterkut
163
+ pothoer
164
+ skeet
165
+ uit-de-baarmoeder-gerukte-tien-tepelige-homofiel
166
+ fuckin
167
+ clit
168
+ piss
169
+ aarsklodder
170
+ natkut
171
+ leipo
172
+ boemelen
173
+ facadeklasher
174
+ harpij
175
+ jam-bek
176
+ paardebeffer
177
+ aardgeest
178
+ reetkever
179
+ aap
180
+ rugridder
181
+ cock
182
+ eiermix-imitatie
183
+ haai
184
+ muffdiver
185
+ paardenanus
186
+ campinghoer
187
+ kechies
188
+ vetzak
189
+ assgoblin
190
+ bedonderen
191
+ knob
192
+ jetslet
193
+ woeshoem
194
+ homodumbshit
195
+ zalf
196
+ boekenwurm
197
+ asscracker
198
+ minger
199
+ ini-mini-scheefgepoepte-pornokabouter
200
+ halvegare
201
+ zeikstraal
202
+ gook
203
+ ruski
204
+ yogoteef
205
+ zaadvarken
206
+ sodomiet
207
+ retenlikker
208
+ aambeienlikker
209
+ kakbal
210
+ kutstreek
211
+ caveman
212
+ oester
213
+ tit
214
+ achterpoter
215
+ bellend
216
+ heeb
217
+ assbag
218
+ smeerpijp
219
+ janettescheter
220
+ ragkut
221
+ zakrat
222
+ bouwdoos
223
+ gaylord
224
+ pielemans
225
+ peul
226
+ zemmer
227
+ bloodclaat
228
+ liefdesgrot
229
+ ass-pirate
230
+ dookie
231
+ penislijder
232
+ quasimodo
233
+ slettenbak
234
+ muff
235
+ penisfucker
236
+ anustoerist
237
+ fucknutt
238
+ neukpaal
239
+ pijphomo
240
+ bumblefuck
241
+ asshole
242
+ pruim
243
+ heihaas
244
+ vaginavochtabsorbeerder
245
+ schatje
246
+ peckerhead
247
+ cunt
248
+ piggel
249
+ luibuis
250
+ anussap-verzuring
251
+ zenuwlijer
252
+ mothafuckin'
253
+ paal
254
+ hell
255
+ shitter
256
+ billelikkertje
257
+ mafkees
258
+ flamoes
259
+ parkiet
260
+ uienreet
261
+ achterlader
262
+ bitchtits
263
+ honkey
264
+ splooge
265
+ slag
266
+ adderengebroedsels
267
+ janksnor
268
+ fucktard
269
+ fucktart
270
+ rotmof
271
+ insekt
272
+ kringspiermusketier
273
+ cockburger
274
+ shitbag
275
+ naarling
276
+ luibak
277
+ cooter
278
+ sod-off
279
+ racechinees
280
+ bruinwerker
281
+ racesjinees
282
+ gaybob
283
+ superkut
284
+ hondenlul
285
+ klapkut
286
+ foefkop
287
+ lijer
288
+ pigmentvreter
289
+ stinker
290
+ paarsedruiveneikel
291
+ fuckup
292
+ baanduivel
293
+ lauwbikker
294
+ gay
295
+ dicksucking
296
+ hakker
297
+ vag
298
+ kontneuker
299
+ nul
300
+ kruimelbuik
301
+ assface
302
+ oen
303
+ junglebunny
304
+ aaprikaan
305
+ anuspiloot
306
+ landloper
307
+ punani
308
+ pijpbekkieg
309
+ lameass
310
+ penis
311
+ tollielikker
312
+ dònchin
313
+ pampa
314
+ huisdealer
315
+ pierlala
316
+ zwijnjak
317
+ pepermuntvreter
318
+ gadoodengeefmijbrood
319
+ viespeuk
320
+ anuspiraat
321
+ pijpert
322
+ pijpmonk
323
+ egocentrischebullebak
324
+ ezel
325
+ kenau
326
+ vergallen
327
+ assmunch
328
+ zeikhannes
329
+ queer
330
+ arsehole
331
+ yid
332
+ raas
333
+ infanterist
334
+ dickmilk
335
+ tamponkut
336
+ ettertje
337
+ mcfagget
338
+ labrat
339
+ picorniekop
340
+ darmgas-wildebras
341
+ wabipi
342
+ cumtart
343
+ acrobaat
344
+ centenschraper
345
+ aarsbaard
346
+ autokraat
347
+ suckass
348
+ nigga
349
+ choco
350
+ tang
351
+ pijppiloot
352
+ beaner
353
+ morsebel
354
+ takketut
355
+ dirkdoos
356
+ bastard
357
+ faggit
358
+ modderduivel
359
+ asbestmuis
360
+ damhoer
361
+ cuntass
362
+ oksel
363
+ passoa-slet
364
+ kakkerlak
365
+ zeekbos
366
+ mick
367
+ edammer
368
+ darmkikker
369
+ pezewever
370
+ zibi
371
+ plucheplakker
372
+ nachtuil
373
+ xylofoonneuker
374
+ lampenhoer
375
+ anusreflector
376
+ fuckhead
377
+ billig
378
+ wippen
379
+ poonani
380
+ politiemuts
381
+ schuurmeid
382
+ naadhopper
383
+ jap
384
+ palenlaaier
385
+ dwaas
386
+ spuiten
387
+ renob
388
+ pokkenwijf
389
+ lampelul
390
+ naaiaap
391
+ dreuzel
392
+ aardpiraat
393
+ doshi
394
+ arrefakker
395
+ schaamhaarverzamelaar
396
+ piemelbrie
397
+ randdebiel
398
+ lesbo
399
+ chocoladesnol
400
+ kechje
401
+ testicle
402
+ nitwit
403
+ reetroeier
404
+ kadaver
405
+ shitbrains
406
+ asslicker
407
+ crapuul
408
+ vampier
409
+ bitchboy
410
+ baarmoederscheefgepoept-kuttekind
411
+ goddamn
412
+ zaadvragende-budcontainer
413
+ asswipe
414
+ gayfuck
415
+ olifantebabyface
416
+ standje
417
+ fucking
418
+ dumbass
419
+ kankerhond
420
+ slapper
421
+ maf
422
+ poot
423
+ rammelhoofd
424
+ maangezicht
425
+ reutelen
426
+ kakboer
427
+ opzouten
428
+ paardenpijper
429
+ cumbubble
430
+ douchebag
431
+ temeier
432
+ boerenkaffer
433
+ cockmonkey
434
+ teef
435
+ slempen
436
+ zaadhamsteraar
437
+ bokkenrijder
438
+ aambeienschoffelaar
439
+ pushi
440
+ deggo
441
+ brilsmurf
442
+ anuspuntverknetteraar
443
+ asscock
444
+ piemelgymmer
445
+ dickface
446
+ fuckbutt
447
+ penisspuiter
448
+ appelzwijn
449
+ pensklepper
450
+ schlemiel
451
+ badmuts
452
+ slijmerd
453
+ asshat
454
+ aarsridder
455
+ schelm
456
+ hoerenjong
457
+ schurftkop
458
+ keilef
459
+ punanny
460
+ moederneuker
461
+ zakkendroller
462
+ bermslet
463
+ smeg
464
+ lauwwaterdrinker
465
+ zaadspuitende
466
+ ragnicht
467
+ zaadcontainer
468
+ tietvlieg
469
+ engerd
470
+ zaadslikker
471
+ achterlijke
472
+ shittiest
473
+ meretrix
474
+ papzak
475
+ aarsbeffer
476
+ ectoplasma
477
+ paardenpenislikker
478
+ kaashoer
479
+ nageboorte
480
+ reetveger
481
+ queerhole
482
+ pokkenlijder
483
+ makako
484
+ scrote
485
+ darmcoureur
486
+ gajeskop
487
+ apekop
488
+ ossenkop
489
+ hondenneuker
490
+ nasivreter
491
+ lawaaipapegaai
492
+ bezem
493
+ flapkut
494
+ asshopper
495
+ djuka
496
+ shitass
497
+ jp
498
+ uil
499
+ zaadschieter
500
+ knijpkont
501
+ kreng
502
+ zandhaas
503
+ zenuwenlijer
504
+ lesbian
505
+ ballen
506
+ asshead
507
+ hol
508
+ aarsketser
509
+ schurk
510
+ inktviskut
511
+ bonk
512
+ indoending
513
+ wanker
514
+ goddam
515
+ snitch
516
+ huppelkut
517
+ coronalijer
518
+ kike
519
+ naaien
520
+ buttfucka
521
+ zaadvrager
522
+ dòlò
523
+ kuttenkop
524
+ hamerhomo
525
+ vaginalehangflapper
526
+ haaibaai
527
+ munging
528
+ dickhead
529
+ klafte
530
+ netenvreter
531
+ kaashaas
532
+ rampenbek
533
+ prick
534
+ befborstel
535
+ labbekak
536
+ vjayjay
537
+ manwijf
538
+ dumbshit
539
+ apengaper
540
+ aardworst
541
+ dickwad
542
+ boeler
543
+ peniskokerlikker
544
+ klapluis
545
+ clitfuck
546
+ chink
547
+ kippenneuker
548
+ klier
549
+ mafkikker
550
+ konthond
551
+ asslick
552
+ spintmijt
553
+ bugger
554
+ tonto
555
+ palingstekker
556
+ gaytard
557
+ afrotten
558
+ aalscholver
559
+ phenomeen
560
+ ouwehoeren
561
+ twatlips
562
+ opsodemieteren
563
+ cocknugget
564
+ kaapstander
565
+ schaamhaaretendebaffer
566
+ schijtlaars
567
+ bille-likkers
568
+ nutsack
569
+ piaprisma
570
+ zakbreuk
571
+ kloothommel
572
+ janlul
573
+ jizz
574
+ fuckersucker
575
+ triootje
576
+ aarsatleet
577
+ shitface
578
+ kluns
579
+ bollox
580
+ buffelen
581
+ kontol
582
+ krielkip
583
+ familycocktail
584
+ babaloekoe
585
+ piemelwijf
586
+ ito
587
+ roetmop
588
+ assbite
589
+ fucked
590
+ bhenchod
591
+ flikken
592
+ walvisganger
593
+ fabricagefout
594
+ botterik
595
+ droogkloot
596
+ aso-big
597
+ cocksmith
598
+ wipkip
599
+ pindachinees
600
+ rekel
601
+ darmstad
602
+ sukkel
603
+ populist
604
+ verdomme
605
+ beheime
606
+ pijpen
607
+ shitty
608
+ nazi
609
+ kutvent
610
+ hamel
611
+ potver
612
+ wuftje
613
+ achterbaksestoephoer
614
+ papist
615
+ rampdebiel
616
+ dickfucker
617
+ fuckwit
618
+ pleurislijder
619
+ kakbroek
620
+ sandnigger
621
+ penispijper
622
+ perebek
623
+ smeerkanis
624
+ apekloot
625
+ lantarenslet
626
+ lomperik
627
+ clitface
628
+ kapoen
629
+ pestboer
630
+ boerenkinkel
631
+ uit-de-baarmoeder-geruktedeurknop
632
+ watervlo
633
+ tittyfuck
634
+ anuspapegaai
635
+ johny
636
+ lezzie
637
+ miljaar
638
+ vlerk
639
+ ass-hat
640
+ palen
641
+ proleet
642
+ zeurpiet
643
+ yvesmoeder
644
+ hersenlijer
645
+ balls
646
+ tyfuslijder
647
+ whorebag
648
+ pedocyclomasturbant
649
+ zaadsnuiver
650
+ fascist
651
+ arroganteklitzuigende-wrattenhoofd
652
+ shitcanned
653
+ afrukken
654
+ grobbejanus
655
+ zakkenvuller
656
+ kettingzeugg
657
+ fagbag
658
+ baarmoeder
659
+ daaps
660
+ naadje
661
+ dicksucker
662
+ remsporen
663
+ pollock
664
+ schaap
665
+ zaksel
666
+ toeter
667
+ kakmadam
668
+ holtor
669
+ dronkenlap
670
+ maaghond
671
+ lamzak
672
+ zeikbeer
673
+ aarsbanaan
674
+ stinkbok
675
+ djoeka
676
+ fucker
677
+ zaadheks
678
+ paki
679
+ aftrekken
680
+ sambalburger
681
+ satan
682
+ konjo
683
+ faggot
684
+ zeiken
685
+ raamneger
686
+ kraut
687
+ shitting
688
+ maaghoer
689
+ paardenpenis
690
+ palingkop
691
+ bekken
692
+ biljartbal
693
+ breezerslet
694
+ fellatio
695
+ cunnilingus
696
+ appelflap
697
+ cow
698
+ aartschurk
699
+ autocraat
700
+ pietlut
701
+ stomkop
702
+ pussies
703
+ shitbagger
704
+ gash
705
+ dago
706
+ fuckoff
707
+ boner
708
+ motherfucking
709
+ matennaaier
710
+ puistenkop
711
+ tangelteef
712
+ clunge
713
+ klootviool
714
+ quasimodom
715
+ assclown
716
+ naaimachine
717
+ trien
718
+ pantoffelneuker
719
+ wratzwijn
720
+ cumjockey
721
+ axwound
722
+ inktlul
723
+ éénoog
724
+ engerling
725
+ hoerenkind
726
+ piemelpatser
727
+ shiz
728
+ rapalje
729
+ geit
730
+ specht
731
+ darmtoerist
732
+ hapsnurker
733
+ mispunt
734
+ polesmoker
735
+ kinkel
736
+ hoerenjager
737
+ cuntrag
738
+ spleetoog
739
+ aambeienbeffer
740
+ baby
741
+ carnavals-inca
742
+ kaaskop
743
+ wafelijzer
744
+ zaadnaad
745
+ lekkerpieper
746
+ sabberkut
747
+ chesticle
748
+ pielesnuiver
749
+ okselharenbillereetzweetzuur
750
+ schietmot
751
+ nonce
752
+ assshole
753
+ fuckbutter
754
+ bampot
755
+ lafbek
756
+ zaadhoer
757
+ assnigger
758
+ pinda
759
+ piek
760
+ assfuck
761
+ gesodemieter
762
+ vlooienzak
763
+ latexnicht
764
+ nigaboo
765
+ tits
766
+ tard
767
+ zeikerd
768
+ guido
769
+ egoist
770
+ noppeshoer
771
+ cockknoker
772
+ gluiperd
773
+ nageboortegezwe
774
+ jandoedel
775
+ cocksmoker
776
+ bolle
777
+ wannabee
778
+ taboetstabiske
779
+ aardworm
780
+ wank
781
+ aarsneger
782
+ kankerlijer
783
+ ganzeboard
784
+ uit-de-baarmoeder-gerukte-tien-tepelige-kut-trut
785
+ passoatrut
786
+ cocksniffer
787
+ carnavalskaper
788
+ campusnerd
789
+ babypoedersnuiver
790
+ totebel
791
+ vleesroos
792
+ penishoofd
793
+ schobbejak
794
+ wandelende-reklame-voor-kachelglans
795
+ paardelullo
796
+ aarsgarnaal
797
+ poon
798
+ carpetmuncher
799
+ anus
800
+ lelijkerd
801
+ coon
802
+ git
803
+ assshit
804
+ fanny
805
+ kakteef
806
+ badgast
807
+ flauwerd
808
+ lamagezicht
809
+ hockeytut
810
+ cockmongler
811
+ oelewap
812
+ parel
813
+ dickweed
814
+ patagonier
815
+ kankerlijder
816
+ buttfucker
817
+ barslet
818
+ watje
819
+ dickweasel
820
+ lèrchi
821
+ reet
822
+ dòndòn
823
+ mothafucka
824
+ cazzo
825
+ besodemieteren
826
+ geteisem
827
+ nanoneuker
828
+ klojo
829
+ olifantenschimmelslurf
830
+ flessentrekker
831
+ strontzak
832
+ karonje
833
+ poep
834
+ zakcel
835
+ zwakbegaafde
836
+ fuckass
837
+ hoempert
838
+ zaadstaafteler
839
+ kontos
840
+ coochie
841
+ kontkruiper
842
+ armoedverspreider
843
+ shiznit
844
+ aarsworm
845
+ pandapikkenpijper
846
+ deuggleuf
847
+ tapijtnek
848
+ waailap
849
+ cockjockey
850
+ piezer
851
+ cumslut
852
+ fietspompenlullegezicht
853
+ azteek
854
+ kechie
855
+ soepkip
856
+ barg
857
+ aarsvijand
858
+ schijtluis
859
+ cockhead
860
+ sapcentrifuge
861
+ apenkut
862
+ prij
863
+ smeerlap
864
+ heks
865
+ dipshit
866
+ mikrodebiel
867
+ baarmoeder-discokip
868
+ heaumeau
869
+ truthola
870
+ mietje
871
+ asfaltridder
872
+ cumdumpster
873
+ aarslikker
874
+ klotenklapper
875
+ afzuigen
876
+ eeltneus
877
+ shag
878
+ heikneuter
879
+ sal
880
+ edeldarm
881
+ kakker
882
+ paardenpikkenpoetser
883
+ variétéhoer
884
+ cockface
885
+ koeskoesvreter
886
+ galsmoel
887
+ paardekut
888
+ galkankerdebiel
889
+ pleurislaaier
890
+ dickhole
891
+ imbeciel
892
+ lamstraal
893
+ assmuncher
894
+ goddamnit
895
+ leuter
896
+ rabauter
897
+ jigaboo
898
+ gannef
899
+ sakkers
900
+ jetonslikker
901
+ viruswappie
902
+ fok
903
+ eend
904
+ anuspenetreerder
905
+ kamerolifant
906
+ macaronivreter
907
+ gans
908
+ sacoche
909
+ bint
910
+ boerenkarhengst
911
+ baardaap
912
+ gooch
913
+ spast
914
+ wasbakkenpisser
915
+ zaadje
916
+ shithouse
917
+ secreet
918
+ lullo
919
+ badjufbeffer
920
+ linkmiegel
921
+ zeur
922
+ ass-jabber
923
+ pukkelbek
924
+ sambalvreter
925
+ wanhopigedarmrukker
926
+ del
927
+ incel
928
+ maan-rovers
929
+ klapperaap
930
+ kamelenneuker
931
+ zanikert
932
+ anuspieper
933
+ fuckbrain
934
+ flikker
935
+ pijpenkoplikker
936
+ beest
937
+ charlatan
938
+ wetback
939
+ bullshit
940
+ cockwaffle
941
+ krijslijster
942
+ aarsaap
943
+ langstaart
944
+ kaalkop
945
+ pijphoer
946
+ zatmuil
947
+ zeiklijster
948
+ doos
949
+ penisbanger
950
+ anussabbelaar
951
+ shit
952
+ tampontrekker
953
+ schijthoofd
954
+ reetridder
955
+ coochy
956
+ griet
957
+ fleer
958
+ afwerkplaats
959
+ aardvarken
960
+ balhaar
961
+ gambaslet
962
+ spook
963
+ pik
964
+ pummel
965
+ assjacker
966
+ daaglaker
967
+ uilskuiken
968
+ luiskop
969
+ jeannette
970
+ waterhoofd
971
+ dagblinde
972
+ zoubi
973
+ zemel
974
+ fuckboy
975
+ shitcunt
976
+ pijpdeuramateur
977
+ hangjas
978
+ naakt
979
+ aansteller
980
+ snoever
981
+ twatwaffle
982
+ vaginavarken
983
+ patser
984
+ douche
985
+ cretin
986
+ kyke
987
+ darmneuker
988
+ hond
989
+ pierewipper
990
+ kwakzalver
991
+ giechelkut
992
+ feltch
993
+ taig
994
+ nondeju
995
+ krentekakker
996
+ piesen
997
+ takenboeker
998
+ vetklep
999
+ wrattenzwijn
1000
+ nietsnut
1001
+ pijpelullebakkes
1002
+ lamlul
1003
+ galgenbrok
1004
+ jagoff
1005
+ mierenneuker
1006
+ kaasrandknabbelaar
1007
+ pijpmond
1008
+ graftyfuskankerpleuris
1009
+ teringlijder
1010
+ aarsmade
1011
+ hode
1012
+ godver
1013
+ kantoorpik
1014
+ piemelboxer
1015
+ mosselhoer
1016
+ kakmaker
1017
+ kontengezicht
1018
+ zwartjoekel
1019
+ cracker
1020
+ apache
1021
+ okselschimmelhoer
1022
+ pias
1023
+ piel
1024
+ klere
1025
+ capsonestrut
1026
+ zeekbakkes
1027
+ shithole
1028
+ publiciteitsgeil
1029
+ debiel
1030
+ judas
1031
+ mispruim
1032
+ kanker
1033
+ hangbuikvarkenkonteneuker
1034
+ zakkenwasser
1035
+ lardass
1036
+ godverdomme
1037
+ kloot
1038
+ flapdrol
1039
+ maajen
1040
+ assbandit
1041
+ asssucker
1042
+ baccilledrager
1043
+ shitdick
1044
+ fucknut
1045
+ asperge
1046
+ gat
1047
+ anusuitscheiding
1048
+ beaver
1049
+ cuntface
1050
+ shitstain
1051
+ darmafval
1052
+ zapoteek
1053
+ draaikont
1054
+ apenbatser
1055
+ baffer
1056
+ hamsterpampe
1057
+ chinc
1058
+ ruigpoot
1059
+ aardappelluis
1060
+ spic
1061
+ schijterd
1062
+ cocksucker
1063
+ pantyneuker
1064
+ titfuck
1065
+ dickwod
1066
+ motherfucker
1067
+ pedopenis
1068
+ niglet
1069
+ waterdrinker
1070
+ tering
1071
+ quikteef
1072
+ humping
1073
+ brotherfucker
1074
+ pampaman
1075
+ snatch
1076
+ zaadstengel
1077
+ shitspitter
1078
+ penispuffer
1079
+ oelewapper
1080
+ galbak
1081
+ olijfneuker
1082
+ butt-pirate
1083
+ hondsvot
1084
+ aarsfreter
1085
+ pekelharing
1086
+ naaidoos
1087
+ zaadvreter
1088
+ pendeu
1089
+ aarshaarluis
1090
+ farmer
1091
+ vandaal
1092
+ tongzoeng
1093
+ dickmonger
1094
+ okselhaas
1095
+ uitgekotstekamelenkut
1096
+ hondenkut
1097
+ ouwehoedendoos
1098
+ cunthole
1099
+ pazop
1100
+ clusterfuck
1101
+ hoerenzoon
1102
+ camperhoer
1103
+ palurk
1104
+ quartrozo
1105
+ chode
1106
+ aardpeer
1107
+ pussy
1108
+ klaplul
1109
+ munter
1110
+ pagadder
1111
+ penozeg
1112
+ pindapoepchinees
1113
+ crap
1114
+ aars
1115
+ fatass
1116
+ asswad
1117
+ bengel
1118
+ paashaasschaamhaarverzamelaar
1119
+ patin
1120
+ carnavals-mussolini
1121
+ polak
1122
+ flamer
1123
+ neger
1124
+ hamsterpijper
1125
+ gringo
1126
+ etterbak
1127
+ autodidact
1128
+ slutbag
1129
+ zaadslet
1130
+ flathoer
1131
+ pissed
1132
+ arse
1133
+ takkehoer
1134
+ keutelkut
1135
+ lambak
1136
+ rapey
1137
+ aarskind
1138
+ haas
1139
+ porchmonkey
1140
+ jambekslikker
1141
+ dumshit
1142
+ satéboer
1143
+ campingslet
1144
+ bosneuker
1145
+ hamsteraar
1146
+ aardappelhoer
1147
+ bukkake
1148
+ merde
1149
+ kokosmakroon
1150
+ pijpslet
1151
+ nicht
1152
+ covidioot
1153
+ tyfus
1154
+ gedoogzone
1155
+ tokkie
1156
+ nigger
1157
+ belazeren
1158
+ dickbag
1159
+ pijpgeit
1160
+ dickslap
1161
+ bitchass
1162
+ kontelikker
1163
+ bloody
1164
+ pijpbek
1165
+ stootje
1166
+ taart
1167
+ kaasknobbel
1168
+ bangerd
1169
+ pierlalla
1170
+ zandvlo
1171
+ uit-de-baarmoeder-gerukte-mislukte-kuttekop
1172
+ palmboomneuker
1173
+ cum
1174
+ cuntslut
1175
+ moffenhoer
1176
+ schaamlipgezwel
1177
+ troelala
1178
+ oppleuren
1179
+ doochbag
1180
+ kutlul
1181
+ braadvarken
1182
+ klooien
1183
+ hockeytrut
1184
+ snotolf
1185
+ sodeflikker
1186
+ asspirate
1187
+ kroeskop
1188
+ zattekop
1189
+ cockmongruel
1190
+ lampekap
1191
+ wasknijper
1192
+ zaagselkop
1193
+ shitfaced
1194
+ douchewaffle
1195
+ lavabosmoel
1196
+ paardenneuker
1197
+ aarshaarverzamelaar
1198
+ aambeihaarwasser
1199
+ vullis
1200
+ smiecht
1201
+ asielzoeker
1202
+ gratenbaal
1203
+ rimjob
1204
+ boerenheikneuter
1205
+ gabber
1206
+ gluiper
1207
+ apenjong
1208
+ apenwaffel
1209
+ ossenlul
1210
+ pedo
1211
+ pecker
1212
+ qwarf
1213
+ parkeerkut
1214
+ wop
1215
+ zuiplap
1216
+ greppeldel
1217
+ tart
1218
+ sodeju
1219
+ kruiper
1220
+ ramenlikker
1221
+ huzarenhoop
1222
+ eikellikker
1223
+ aarslog
1224
+ klotenbijter
1225
+ galgengebroed
1226
+ pakul
1227
+ dicktickler
1228
+ satraap
1229
+ tankreet
1230
+ palingvel
1231
+ anusridder
1232
+ baggerduiker
1233
+ matje
1234
+ lap
1235
+ snotaap
1236
+ babok
1237
+ ebverzuiper
1238
+ hangbuikzwijn
1239
+ uit-de-baarmoeder-gerukte-tien-tepelige-koeie-stront
1240
+ truus
1241
+ etter
1242
+ assfucker
1243
+ gaywad
1244
+ kankerhoer
1245
+ boomer
1246
+ aarshaarknabbelaar
1247
+ bloedlijer
1248
+ damn
1249
+ eendekweker
1250
+ lantaarnpaalhoer
1251
+ pissflaps
1252
+ piemelaar
1253
+ zemmel
1254
+ cumguzzler
1255
+ fagtard
1256
+ varken
1257
+ dumass
1258
+ batti
1259
+ dick-sneeze
1260
+ cocksmoke
1261
+ gayfuckist
1262
+ paplap
1263
+ aardbanaan
1264
+ raketpiraat
1265
+ stinkerd
1266
+ vagina
1267
+ yoghurtpisser
1268
+ miegels
1269
+ geilneef
1270
+ baarmoederbuikwandschimmel
1271
+ makreel
1272
+ cockass
1273
+ kaasnicht
1274
+ schimmelkut
1275
+ negro
1276
+ cockmaster
1277
+ ho
1278
+ kootch
1279
+ shithead
1280
+ fucks
1281
+ penisworstelaar
1282
+ twat
1283
+ sloerie
1284
+ jenevertrut
1285
+ patjepeeër
1286
+ abortuskind
1287
+ dickbeaters
1288
+ schlong
1289
+ augurkenneuker
1290
+ schaapskop
1291
+ ploert
1292
+ klooi
1293
+ welzijnsmafia
1294
+ asbak
1295
+ assmonkey
1296
+ drol
1297
+ druiloor
1298
+ cijferneuker
1299
+ gluipsnor
1300
+ shitbreath
1301
+ kwakkieg
1302
+ fuckwitt
1303
+ bollocks
1304
+ skank
1305
+ fuckface
1306
+ peniskoker
1307
+ cockbite
1308
+ cocknose
1309
+ gratenkut
1310
+ campesneurdo
1311
+ pijpzuiger
1312
+ schaamlikkerkip
1313
+ plurk
1314
+ schapenneuker
1315
+ addergebroed
1316
+ zeurzak
1317
+ moffrika
1318
+ kanen
1319
+ rukken
1320
+ piemelpiraat
1321
+ aso
1322
+ mariku
1323
+ zak
1324
+ cretino
1325
+ satehoer
1326
+ kont
1327
+ pommel
1328
+ pedopyrofilatomaan
1329
+ taalnazi
1330
+ eikel
1331
+ absjaar
1332
+ ginger
1333
+ internetverslaafde-neuker
1334
+ ponylul
1335
+ pot
1336
+ eendenkont
1337
+ koe
1338
+ poepen
1339
+ patjakker
1340
+ kleumer
1341
+ koño
1342
+ zuigen
1343
+ zadelruiker
1344
+ offroadie
1345
+ apenrukker
1346
+ schweinhund
1347
+ niggers
1348
+ klaarkomen
1349
+ hammehapper
1350
+ augurkenturk
1351
+ zwartjan
1352
+ skullfuck
1353
+ bruisend
1354
+ sikbille
1355
+ kebber
1356
+ anusvreter
1357
+ handvat
1358
+ quist
1359
+ dickfuck
1360
+ paardebrei
1361
+ pedomeester
1362
+ spaghettivreter
1363
+ mapangpang
1364
+ chocoladehoer
1365
+ gadverdamme
1366
+ babbalipviskikker
1367
+ klote
1368
+ oelepoeper
1369
+ minge
1370
+ aarstulp
1371
+ ransaap
1372
+ slut
1373
+ bitchy
1374
+ tampontoerist
1375
+ graftakketeef
1376
+ penislikker
1377
+ queef
1378
+ dyke
1379
+ oezo
1380
+ paardelid
1381
+ tiefus
1382
+ krimper
1383
+ klerebeer
1384
+ etterbuil
1385
+ jood
1386
+ spick
1387
+ handjob
1388
+ malaka
1389
+ paardenpikkerslikker
1390
+ aapjesviller
1391
+ papagaaiekuttenbefkonijn
1392
+ aarsbaars
1393
+ pakhuisrat
1394
+ bitches
1395
+ flamingo
1396
+ aarshaarfohner
1397
+ armoedzaaier
1398
+ schaampik
1399
+ mafklapper
1400
+ piemelteef
1401
+ piemelpunt
1402
+ gayass
1403
+ flatslet
1404
+ maffer
1405
+ belhamel
1406
+ vajayjay
1407
+ capsoneslijer
1408
+ pestbek
1409
+ eikelkluiver
1410
+ tifus
1411
+ whoreface
1412
+ potverdorie
1413
+ ouwehoer
1414
+ yoyolul
1415
+ bulderbast
1416
+ potverdikkeme
1417
+ jackass
1418
+ broodaap
1419
+ raceplee
1420
+ poonany
1421
+ schuinsmarcheerder
1422
+ pestlijder
1423
+ dildo
1424
+ aasgier
1425
+ kak
1426
+ zeekmem
1427
+ apenflikkerneuker
1428
+ fileklever
1429
+ peukenneuker
1430
+ kwal
1431
+ cholera
1432
+ kontonbonker
1433
+ darmfloraflirter
1434
+ nakko
1435
+ vingeren
1436
+ bleekscheet
1437
+ papabaf
1438
+ dicks
1439
+ vaselinevriend
1440
+ dick
1441
+ klootjesvolk
1442
+ varkenshol
1443
+ peniszuiger
1444
+ cabaretlul
1445
+ adder
1446
+ pedomaan
1447
+ eendeneikel
1448
+ klerelijer
1449
+ va-j-j
1450
+ brugpieper
1451
+ blowjob
1452
+ invertebraat
1453
+ unclefucker
1454
+ zuurpruim
1455
+ punta
1456
+ sloef
1457
+ gaydo
1458
+ smous
1459
+ fudgepacker
1460
+ bosneger
1461
+ asses
1462
+ kakhuis
1463
+ parelridder
1464
+ vaking
1465
+ downie
1466
+ dikke
1467
+ dikzak
src/rule_based_system/word_lists/strictly_bad_words.csv ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ klootzak
2
+ hoer
3
+ bitch
4
+ slet
5
+ trut
6
+ kutwijf
7
+ mongool
8
+ aflebberen
9
+ beffen
10
+ bef
11
+ bokkelul
12
+ botergeil
13
+ boerelul
14
+ dombo
15
+ boerenpummel
16
+ godverdamme
17
+ hoer
18
+ hoerenbuurt
19
+ hoerenloper
20
+ hoerig
21
+ hufter
22
+ klootzak
23
+ kloten
24
+ kontneuken
25
+ kut
26
+ kuttelikkertje
27
+ lul
28
+ lul-de-behanger
29
+ lulhannes
30
+ lummel
31
+ mof
32
+ neuken
33
+ neukstier
34
+ oetlul
35
+ opgeilen
36
+ opkankeren
37
+ oprotten
38
+ paardelul
39
+ portiekslet
40
+ rothoer
41
+ rotzak
42
+ rukhond
43
+ schijt
44
+ schijten
45
+ schoft
46
+ slet
47
+ sletterig
48
+ slik mijn zaad
49
+ snol
50
+ standje-69
51
+ stoephoer
52
+ stront
53
+ sufferd
54
+ teringlijer
55
+ trottoir prostituée
56
+ trottoirteef
57
+ verkloten
58
+ verneuken
59
+ wijf
60
+ fuck
61
+ fuk
62
+ fack
63
+ fak
64
+ fock
65
+ afgebefte del
66
+ prickteaser
67
+ aarshaar
68
+ paardekloot
69
+ thundercunt
70
+ aarsbeer
71
+ feck
72
+ quikhoer
73
+ galgenaas
74
+ bafaap
75
+ fagfucker
76
+ pussylicking
77
+ apeneuker
78
+ internetslet
79
+ takkenteef
80
+ augurklul
81
+ queerbait
82
+ aarsslijm
83
+ paashaasneuker
84
+ pijpendemonkyboy
85
+ fuckstick
86
+ piemelpuist
87
+ penispukkelpuist
88
+ schaamluisbedrieger
89
+ eikelbijter
90
+ kech
91
+ kahba
92
+ dikoe
93
+ conjo
94
+ kaulo
95
+ kowed
96
+ poeta
97
+ puta
98
+ tabon
99
+ uchi
100
+ ucci
src/start_up/__init__.py ADDED
File without changes
src/start_up/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (153 Bytes). View file
 
src/start_up/__pycache__/start_up_bad_words_rule.cpython-39.pyc ADDED
Binary file (497 Bytes). View file
 
src/start_up/__pycache__/start_up_gibberish.cpython-39.pyc ADDED
Binary file (530 Bytes). View file
 
src/start_up/__pycache__/start_up_rbs.cpython-39.pyc ADDED
Binary file (1.8 kB). View file
 
src/start_up/start_up_bad_words_rule.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ from src.rule_based_system.BadWordRule import BadWordRule
4
+
5
+
6
+ def create_bad_word_rule(path, is_strict):
7
+ df = pd.read_csv(path, header=None)
8
+ df.columns = ['word']
9
+
10
+ bad_words = df['word'].values
11
+
12
+ return BadWordRule(bad_words, is_strict)
src/start_up/start_up_gibberish.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from src.gibberish_detection.GibberishDetector import GibberishDetector
2
+ from gibberish_detector import detector
3
+ from src.config import config
4
+
5
+
6
+ def create_gibberish_detector():
7
+ model = detector.create_from_model(config['gibberish_model'])
8
+
9
+ return GibberishDetector(model)
src/start_up/start_up_rbs.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ from urlextract import URLExtract
4
+
5
+ from src.start_up.start_up_bad_words_rule import create_bad_word_rule
6
+ from src.config import config
7
+ from src.rule_based_system.HTMLRule import HTMLRule
8
+ from src.rule_based_system.PersonalDetailsRule import PersonalDetailsRule
9
+ from src.rule_based_system.RuleBasedSystem import RuleBasedSystem
10
+ from src.rule_based_system.TextLengthRule import TextLengthRule
11
+ from src.rule_based_system.UrlRule import UrlRule
12
+
13
+
14
+ def create_strong_rbs() -> RuleBasedSystem:
15
+ text_length_rule = TextLengthRule()
16
+
17
+ url_rule = UrlRule(URLExtract())
18
+
19
+ mail_rule = PersonalDetailsRule([r'[\w.+-]+@[\w-]+\.[\w.-]+'], True)
20
+
21
+ strict_bad_word_rule = create_bad_word_rule(config['bad_words_strict'], True)
22
+
23
+ return RuleBasedSystem([
24
+ text_length_rule, # todo: check if this make sense to add here, 500 was our own chosen max length
25
+ url_rule,
26
+ mail_rule,
27
+ strict_bad_word_rule
28
+ ])
29
+
30
+
31
+ def create_weak_rbs() -> RuleBasedSystem:
32
+ phone_regex = r"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)"
33
+ phone_home_local = re.compile(r".*?(\(?\d{3}\D{0,3}\d{2}\D{0,3}\d{2}).*?", re.S)
34
+ phone_home = re.compile(r".*?(\(?\d{3}\D{0,3}\d{3}\D{0,3}\d{2}\D{0,3}\d{2}).*?", re.S)
35
+ phone_mobile = re.compile(r".*?(\(?\d{2}\D{0,3}\d{3}\D{0,3}\d{3}\D{0,3}\d{2}).*?", re.S)
36
+ phone_mobile_international = re.compile(r".*?(\(?\d{3}\D{0,3}\d{3}\D{0,3}\d{3}\D{0,3}\d{2}).*?", re.S)
37
+
38
+ phone_regexes = [phone_regex, phone_home_local, phone_home, phone_mobile, phone_mobile_international]
39
+ phone_number_rule = PersonalDetailsRule(phone_regexes, False)
40
+
41
+ html_rule = HTMLRule()
42
+
43
+ ambiguous_bad_word_rule = create_bad_word_rule(config['bad_words_ambiguous'], False)
44
+
45
+ # rule systems
46
+ return RuleBasedSystem([
47
+ phone_number_rule,
48
+ html_rule,
49
+ ambiguous_bad_word_rule
50
+ ])