Ordenador commited on
Commit
a3ae041
·
1 Parent(s): 8318be4

feat: Add code and module for Hate Speech classification with bert

Browse files
Files changed (6) hide show
  1. .gitignore +130 -0
  2. Makefile +24 -0
  3. app.py +40 -0
  4. classifier_model.h5 +3 -0
  5. requirements.in +3 -0
  6. requirements.txt +431 -0
.gitignore ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+ flagged/
Makefile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SHELL=/bin/sh
2
+ export PATH := ./venv/bin:$(PATH)
3
+ .PHONY: help
4
+ help: ## This help.
5
+ @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
6
+
7
+ .DEFAULT_GOAL := help
8
+
9
+ venv:
10
+ touch requirements.txt ;\
11
+ test -d venv || virtualenv --python=$$PYTHON3 venv
12
+
13
+ pip-compile: venv
14
+ python -m pip install --upgrade pip;\
15
+ pip install pip-tools;\
16
+ touch requirements.in ;\
17
+ pip-compile --output-file requirements.txt requirements.in;\
18
+ pip install -r requirements.txt
19
+
20
+ autopep8:
21
+ autopep8 -i *.py
22
+
23
+ clean:
24
+ rm -fr venv
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from official.nlp.optimization import AdamWeightDecay, WarmUp
3
+ import tensorflow as tf
4
+ import tensorflow_hub as hub
5
+ import tensorflow_text as text
6
+ import numpy as np
7
+ np.set_printoptions(suppress=True)
8
+ # https://www.kaggle.com/datasets/mrmorj/hate-speech-and-offensive-language-dataset
9
+ labels = [
10
+ "hate speech",
11
+ "offensive language",
12
+ "neither"
13
+ ]
14
+
15
+ # model
16
+ with tf.keras.utils.custom_object_scope({'AdamWeightDecay': AdamWeightDecay(), 'WarmUp': WarmUp}):
17
+ classifier_model = tf.keras.models.load_model('classifier_model.h5',
18
+ custom_objects={'KerasLayer': hub.KerasLayer})
19
+
20
+
21
+ def run_model(text):
22
+ prediction = classifier_model.predict([text])[0]
23
+ confidences = {labels[i]: float(prediction[i]) for i in range(len(labels))}
24
+ return confidences
25
+
26
+
27
+ examples = [
28
+ ["This is wonderful!"],
29
+ ]
30
+
31
+ hate_speech = gr.Interface(
32
+ fn=run_model,
33
+ inputs=gr.Textbox(lines=5,
34
+ placeholder="Enter a positive or negative sentence here...",
35
+ label="Input Text"),
36
+ outputs=gr.outputs.Label(),
37
+ examples=examples
38
+ )
39
+
40
+ hate_speech.launch()
classifier_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d99fc350bebdf0b50e75ea22ae405312d2e583192530565059ba42efe7353f6
3
+ size 348612864
requirements.in ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ tensorflow-text
3
+ tf-models-official
requirements.txt ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.10
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=requirements.txt requirements.in
6
+ #
7
+ absl-py==1.4.0
8
+ # via
9
+ # tensorboard
10
+ # tensorflow
11
+ # tensorflow-datasets
12
+ # tensorflow-metadata
13
+ # tf-slim
14
+ aiofiles==23.1.0
15
+ # via gradio
16
+ aiohttp==3.8.4
17
+ # via gradio
18
+ aiosignal==1.3.1
19
+ # via aiohttp
20
+ altair==4.2.2
21
+ # via gradio
22
+ anyio==3.6.2
23
+ # via
24
+ # httpcore
25
+ # starlette
26
+ astunparse==1.6.3
27
+ # via tensorflow
28
+ async-timeout==4.0.2
29
+ # via aiohttp
30
+ attrs==22.2.0
31
+ # via
32
+ # aiohttp
33
+ # jsonschema
34
+ cachetools==5.3.0
35
+ # via google-auth
36
+ certifi==2022.12.7
37
+ # via
38
+ # httpcore
39
+ # httpx
40
+ # kaggle
41
+ # requests
42
+ charset-normalizer==3.1.0
43
+ # via
44
+ # aiohttp
45
+ # requests
46
+ click==8.1.3
47
+ # via
48
+ # tensorflow-datasets
49
+ # uvicorn
50
+ colorama==0.4.6
51
+ # via sacrebleu
52
+ contourpy==1.0.7
53
+ # via matplotlib
54
+ cycler==0.11.0
55
+ # via matplotlib
56
+ cython==0.29.33
57
+ # via tf-models-official
58
+ dm-tree==0.1.8
59
+ # via
60
+ # tensorflow-datasets
61
+ # tensorflow-model-optimization
62
+ entrypoints==0.4
63
+ # via altair
64
+ etils[enp,epath]==1.1.0
65
+ # via tensorflow-datasets
66
+ fastapi==0.94.1
67
+ # via gradio
68
+ ffmpy==0.3.0
69
+ # via gradio
70
+ filelock==3.9.0
71
+ # via huggingface-hub
72
+ flatbuffers==23.3.3
73
+ # via tensorflow
74
+ fonttools==4.39.0
75
+ # via matplotlib
76
+ frozenlist==1.3.3
77
+ # via
78
+ # aiohttp
79
+ # aiosignal
80
+ fsspec==2023.3.0
81
+ # via gradio
82
+ gast==0.4.0
83
+ # via tensorflow
84
+ gin-config==0.5.0
85
+ # via tf-models-official
86
+ google-api-core==2.11.0
87
+ # via google-api-python-client
88
+ google-api-python-client==2.81.0
89
+ # via tf-models-official
90
+ google-auth==2.16.2
91
+ # via
92
+ # google-api-core
93
+ # google-api-python-client
94
+ # google-auth-httplib2
95
+ # google-auth-oauthlib
96
+ # tensorboard
97
+ google-auth-httplib2==0.1.0
98
+ # via google-api-python-client
99
+ google-auth-oauthlib==0.4.6
100
+ # via tensorboard
101
+ google-pasta==0.2.0
102
+ # via tensorflow
103
+ googleapis-common-protos==1.58.0
104
+ # via
105
+ # google-api-core
106
+ # tensorflow-metadata
107
+ gradio==3.21.0
108
+ # via -r requirements.in
109
+ grpcio==1.51.3
110
+ # via
111
+ # tensorboard
112
+ # tensorflow
113
+ h11==0.14.0
114
+ # via
115
+ # httpcore
116
+ # uvicorn
117
+ h5py==3.8.0
118
+ # via tensorflow
119
+ httpcore==0.16.3
120
+ # via httpx
121
+ httplib2==0.21.0
122
+ # via
123
+ # google-api-python-client
124
+ # google-auth-httplib2
125
+ # oauth2client
126
+ httpx==0.23.3
127
+ # via gradio
128
+ huggingface-hub==0.13.2
129
+ # via gradio
130
+ idna==3.4
131
+ # via
132
+ # anyio
133
+ # requests
134
+ # rfc3986
135
+ # yarl
136
+ immutabledict==2.2.3
137
+ # via tf-models-official
138
+ importlib-resources==5.12.0
139
+ # via etils
140
+ jinja2==3.1.2
141
+ # via
142
+ # altair
143
+ # gradio
144
+ joblib==1.2.0
145
+ # via scikit-learn
146
+ jsonschema==4.17.3
147
+ # via altair
148
+ kaggle==1.5.13
149
+ # via tf-models-official
150
+ keras==2.11.0
151
+ # via tensorflow
152
+ kiwisolver==1.4.4
153
+ # via matplotlib
154
+ libclang==15.0.6.1
155
+ # via tensorflow
156
+ linkify-it-py==2.0.0
157
+ # via markdown-it-py
158
+ lxml==4.9.2
159
+ # via sacrebleu
160
+ markdown==3.4.1
161
+ # via tensorboard
162
+ markdown-it-py[linkify]==2.2.0
163
+ # via
164
+ # gradio
165
+ # mdit-py-plugins
166
+ markupsafe==2.1.2
167
+ # via
168
+ # gradio
169
+ # jinja2
170
+ # werkzeug
171
+ matplotlib==3.7.1
172
+ # via
173
+ # gradio
174
+ # pycocotools
175
+ # tf-models-official
176
+ mdit-py-plugins==0.3.3
177
+ # via gradio
178
+ mdurl==0.1.2
179
+ # via markdown-it-py
180
+ multidict==6.0.4
181
+ # via
182
+ # aiohttp
183
+ # yarl
184
+ numpy==1.24.2
185
+ # via
186
+ # altair
187
+ # contourpy
188
+ # etils
189
+ # gradio
190
+ # h5py
191
+ # matplotlib
192
+ # opencv-python-headless
193
+ # opt-einsum
194
+ # pandas
195
+ # pycocotools
196
+ # sacrebleu
197
+ # scikit-learn
198
+ # scipy
199
+ # seqeval
200
+ # tensorboard
201
+ # tensorflow
202
+ # tensorflow-datasets
203
+ # tensorflow-hub
204
+ # tensorflow-model-optimization
205
+ # tf-models-official
206
+ oauth2client==4.1.3
207
+ # via tf-models-official
208
+ oauthlib==3.2.2
209
+ # via requests-oauthlib
210
+ opencv-python-headless==4.7.0.72
211
+ # via tf-models-official
212
+ opt-einsum==3.3.0
213
+ # via tensorflow
214
+ orjson==3.8.7
215
+ # via gradio
216
+ packaging==23.0
217
+ # via
218
+ # huggingface-hub
219
+ # matplotlib
220
+ # tensorflow
221
+ # tensorflow-addons
222
+ pandas==1.5.3
223
+ # via
224
+ # altair
225
+ # gradio
226
+ # tf-models-official
227
+ pillow==9.4.0
228
+ # via
229
+ # gradio
230
+ # matplotlib
231
+ # tf-models-official
232
+ portalocker==2.7.0
233
+ # via sacrebleu
234
+ promise==2.3
235
+ # via tensorflow-datasets
236
+ protobuf==3.19.6
237
+ # via
238
+ # google-api-core
239
+ # googleapis-common-protos
240
+ # tensorboard
241
+ # tensorflow
242
+ # tensorflow-datasets
243
+ # tensorflow-hub
244
+ # tensorflow-metadata
245
+ psutil==5.9.4
246
+ # via
247
+ # tensorflow-datasets
248
+ # tf-models-official
249
+ py-cpuinfo==9.0.0
250
+ # via tf-models-official
251
+ pyasn1==0.4.8
252
+ # via
253
+ # oauth2client
254
+ # pyasn1-modules
255
+ # rsa
256
+ pyasn1-modules==0.2.8
257
+ # via
258
+ # google-auth
259
+ # oauth2client
260
+ pycocotools==2.0.6
261
+ # via tf-models-official
262
+ pydantic==1.10.6
263
+ # via
264
+ # fastapi
265
+ # gradio
266
+ pydub==0.25.1
267
+ # via gradio
268
+ pyparsing==3.0.9
269
+ # via
270
+ # httplib2
271
+ # matplotlib
272
+ pyrsistent==0.19.3
273
+ # via jsonschema
274
+ python-dateutil==2.8.2
275
+ # via
276
+ # kaggle
277
+ # matplotlib
278
+ # pandas
279
+ python-multipart==0.0.6
280
+ # via gradio
281
+ python-slugify==8.0.1
282
+ # via kaggle
283
+ pytz==2022.7.1
284
+ # via pandas
285
+ pyyaml==5.4.1
286
+ # via
287
+ # gradio
288
+ # huggingface-hub
289
+ # tf-models-official
290
+ regex==2022.10.31
291
+ # via sacrebleu
292
+ requests==2.28.2
293
+ # via
294
+ # google-api-core
295
+ # gradio
296
+ # huggingface-hub
297
+ # kaggle
298
+ # requests-oauthlib
299
+ # tensorboard
300
+ # tensorflow-datasets
301
+ requests-oauthlib==1.3.1
302
+ # via google-auth-oauthlib
303
+ rfc3986[idna2008]==1.5.0
304
+ # via httpx
305
+ rsa==4.9
306
+ # via
307
+ # google-auth
308
+ # oauth2client
309
+ sacrebleu==2.3.1
310
+ # via tf-models-official
311
+ scikit-learn==1.2.2
312
+ # via seqeval
313
+ scipy==1.10.1
314
+ # via
315
+ # scikit-learn
316
+ # tf-models-official
317
+ sentencepiece==0.1.97
318
+ # via tf-models-official
319
+ seqeval==1.2.2
320
+ # via tf-models-official
321
+ six==1.16.0
322
+ # via
323
+ # astunparse
324
+ # google-auth
325
+ # google-auth-httplib2
326
+ # google-pasta
327
+ # kaggle
328
+ # oauth2client
329
+ # promise
330
+ # python-dateutil
331
+ # tensorflow
332
+ # tensorflow-model-optimization
333
+ # tf-models-official
334
+ sniffio==1.3.0
335
+ # via
336
+ # anyio
337
+ # httpcore
338
+ # httpx
339
+ starlette==0.26.1
340
+ # via fastapi
341
+ tabulate==0.9.0
342
+ # via sacrebleu
343
+ tensorboard==2.11.2
344
+ # via tensorflow
345
+ tensorboard-data-server==0.6.1
346
+ # via tensorboard
347
+ tensorboard-plugin-wit==1.8.1
348
+ # via tensorboard
349
+ tensorflow==2.11.0
350
+ # via
351
+ # tensorflow-text
352
+ # tf-models-official
353
+ tensorflow-addons==0.19.0
354
+ # via tf-models-official
355
+ tensorflow-datasets==4.8.3
356
+ # via tf-models-official
357
+ tensorflow-estimator==2.11.0
358
+ # via tensorflow
359
+ tensorflow-hub==0.12.0
360
+ # via
361
+ # tensorflow-text
362
+ # tf-models-official
363
+ tensorflow-io-gcs-filesystem==0.31.0
364
+ # via tensorflow
365
+ tensorflow-metadata==1.12.0
366
+ # via tensorflow-datasets
367
+ tensorflow-model-optimization==0.7.3
368
+ # via tf-models-official
369
+ tensorflow-text==2.11.0
370
+ # via
371
+ # -r requirements.in
372
+ # tf-models-official
373
+ termcolor==2.2.0
374
+ # via
375
+ # tensorflow
376
+ # tensorflow-datasets
377
+ text-unidecode==1.3
378
+ # via python-slugify
379
+ tf-models-official==2.11.3
380
+ # via -r requirements.in
381
+ tf-slim==1.1.0
382
+ # via tf-models-official
383
+ threadpoolctl==3.1.0
384
+ # via scikit-learn
385
+ toml==0.10.2
386
+ # via tensorflow-datasets
387
+ toolz==0.12.0
388
+ # via altair
389
+ tqdm==4.65.0
390
+ # via
391
+ # huggingface-hub
392
+ # kaggle
393
+ # tensorflow-datasets
394
+ typeguard==2.13.3
395
+ # via tensorflow-addons
396
+ typing-extensions==4.5.0
397
+ # via
398
+ # etils
399
+ # gradio
400
+ # huggingface-hub
401
+ # pydantic
402
+ # tensorflow
403
+ uc-micro-py==1.0.1
404
+ # via linkify-it-py
405
+ uritemplate==4.1.1
406
+ # via google-api-python-client
407
+ urllib3==1.26.15
408
+ # via
409
+ # kaggle
410
+ # requests
411
+ uvicorn==0.21.0
412
+ # via gradio
413
+ websockets==10.4
414
+ # via gradio
415
+ werkzeug==2.2.3
416
+ # via tensorboard
417
+ wheel==0.38.4
418
+ # via
419
+ # astunparse
420
+ # tensorboard
421
+ wrapt==1.15.0
422
+ # via
423
+ # tensorflow
424
+ # tensorflow-datasets
425
+ yarl==1.8.2
426
+ # via aiohttp
427
+ zipp==3.15.0
428
+ # via etils
429
+
430
+ # The following packages are considered to be unsafe in a requirements file:
431
+ # setuptools