Spaces:

Ordenador
/

classify-text-with-bert-hate-speech

Sleeping

App Files Files Community

Ordenador commited on Mar 14, 2023

Commit

a3ae041

1 Parent(s): 8318be4

feat: Add code and module for Hate Speech classification with bert

Browse files

Files changed (6) hide show

.gitignore +130 -0
Makefile +24 -0
app.py +40 -0
classifier_model.h5 +3 -0
requirements.in +3 -0
requirements.txt +431 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,130 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+flagged/

Makefile ADDED Viewed

	@@ -0,0 +1,24 @@

+SHELL=/bin/sh
+export PATH := ./venv/bin:$(PATH)
+.PHONY: help
+help: ## This help.
+	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " \033[36m%-20s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+.DEFAULT_GOAL := help
+venv:
+	touch requirements.txt ;\
+	test -d venv || virtualenv --python=$$PYTHON3 venv
+pip-compile: venv
+	python -m pip install --upgrade pip;\
+	pip install pip-tools;\
+	touch requirements.in ;\
+	pip-compile --output-file requirements.txt requirements.in;\
+	pip install -r requirements.txt
+autopep8:
+	autopep8 -i *.py
+clean:
+	rm -fr venv

app.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import gradio as gr
+from official.nlp.optimization import AdamWeightDecay, WarmUp
+import tensorflow as tf
+import tensorflow_hub as hub
+import tensorflow_text as text
+import numpy as np
+np.set_printoptions(suppress=True)
+# https://www.kaggle.com/datasets/mrmorj/hate-speech-and-offensive-language-dataset
+labels = [
+    "hate speech",
+    "offensive language",
+    "neither"
+]
+# model
+with tf.keras.utils.custom_object_scope({'AdamWeightDecay': AdamWeightDecay(), 'WarmUp': WarmUp}):
+    classifier_model = tf.keras.models.load_model('classifier_model.h5',
+                                                  custom_objects={'KerasLayer': hub.KerasLayer})
+def run_model(text):
+    prediction = classifier_model.predict([text])[0]
+    confidences = {labels[i]: float(prediction[i]) for i in range(len(labels))}
+    return confidences
+examples = [
+    ["This is wonderful!"],
+]
+hate_speech = gr.Interface(
+    fn=run_model,
+    inputs=gr.Textbox(lines=5,
+                      placeholder="Enter a positive or negative sentence here...",
+                      label="Input Text"),
+    outputs=gr.outputs.Label(),
+    examples=examples
+)
+hate_speech.launch()

classifier_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d99fc350bebdf0b50e75ea22ae405312d2e583192530565059ba42efe7353f6
+size 348612864

requirements.in ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+tensorflow-text
+tf-models-official

requirements.txt ADDED Viewed

	@@ -0,0 +1,431 @@

+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile --output-file=requirements.txt requirements.in
+#
+absl-py==1.4.0
+    # via
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tensorflow-metadata
+    #   tf-slim
+aiofiles==23.1.0
+    # via gradio
+aiohttp==3.8.4
+    # via gradio
+aiosignal==1.3.1
+    # via aiohttp
+altair==4.2.2
+    # via gradio
+anyio==3.6.2
+    # via
+    #   httpcore
+    #   starlette
+astunparse==1.6.3
+    # via tensorflow
+async-timeout==4.0.2
+    # via aiohttp
+attrs==22.2.0
+    # via
+    #   aiohttp
+    #   jsonschema
+cachetools==5.3.0
+    # via google-auth
+certifi==2022.12.7
+    # via
+    #   httpcore
+    #   httpx
+    #   kaggle
+    #   requests
+charset-normalizer==3.1.0
+    # via
+    #   aiohttp
+    #   requests
+click==8.1.3
+    # via
+    #   tensorflow-datasets
+    #   uvicorn
+colorama==0.4.6
+    # via sacrebleu
+contourpy==1.0.7
+    # via matplotlib
+cycler==0.11.0
+    # via matplotlib
+cython==0.29.33
+    # via tf-models-official
+dm-tree==0.1.8
+    # via
+    #   tensorflow-datasets
+    #   tensorflow-model-optimization
+entrypoints==0.4
+    # via altair
+etils[enp,epath]==1.1.0
+    # via tensorflow-datasets
+fastapi==0.94.1
+    # via gradio
+ffmpy==0.3.0
+    # via gradio
+filelock==3.9.0
+    # via huggingface-hub
+flatbuffers==23.3.3
+    # via tensorflow
+fonttools==4.39.0
+    # via matplotlib
+frozenlist==1.3.3
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2023.3.0
+    # via gradio
+gast==0.4.0
+    # via tensorflow
+gin-config==0.5.0
+    # via tf-models-official
+google-api-core==2.11.0
+    # via google-api-python-client
+google-api-python-client==2.81.0
+    # via tf-models-official
+google-auth==2.16.2
+    # via
+    #   google-api-core
+    #   google-api-python-client
+    #   google-auth-httplib2
+    #   google-auth-oauthlib
+    #   tensorboard
+google-auth-httplib2==0.1.0
+    # via google-api-python-client
+google-auth-oauthlib==0.4.6
+    # via tensorboard
+google-pasta==0.2.0
+    # via tensorflow
+googleapis-common-protos==1.58.0
+    # via
+    #   google-api-core
+    #   tensorflow-metadata
+gradio==3.21.0
+    # via -r requirements.in
+grpcio==1.51.3
+    # via
+    #   tensorboard
+    #   tensorflow
+h11==0.14.0
+    # via
+    #   httpcore
+    #   uvicorn
+h5py==3.8.0
+    # via tensorflow
+httpcore==0.16.3
+    # via httpx
+httplib2==0.21.0
+    # via
+    #   google-api-python-client
+    #   google-auth-httplib2
+    #   oauth2client
+httpx==0.23.3
+    # via gradio
+huggingface-hub==0.13.2
+    # via gradio
+idna==3.4
+    # via
+    #   anyio
+    #   requests
+    #   rfc3986
+    #   yarl
+immutabledict==2.2.3
+    # via tf-models-official
+importlib-resources==5.12.0
+    # via etils
+jinja2==3.1.2
+    # via
+    #   altair
+    #   gradio
+joblib==1.2.0
+    # via scikit-learn
+jsonschema==4.17.3
+    # via altair
+kaggle==1.5.13
+    # via tf-models-official
+keras==2.11.0
+    # via tensorflow
+kiwisolver==1.4.4
+    # via matplotlib
+libclang==15.0.6.1
+    # via tensorflow
+linkify-it-py==2.0.0
+    # via markdown-it-py
+lxml==4.9.2
+    # via sacrebleu
+markdown==3.4.1
+    # via tensorboard
+markdown-it-py[linkify]==2.2.0
+    # via
+    #   gradio
+    #   mdit-py-plugins
+markupsafe==2.1.2
+    # via
+    #   gradio
+    #   jinja2
+    #   werkzeug
+matplotlib==3.7.1
+    # via
+    #   gradio
+    #   pycocotools
+    #   tf-models-official
+mdit-py-plugins==0.3.3
+    # via gradio
+mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.0.4
+    # via
+    #   aiohttp
+    #   yarl
+numpy==1.24.2
+    # via
+    #   altair
+    #   contourpy
+    #   etils
+    #   gradio
+    #   h5py
+    #   matplotlib
+    #   opencv-python-headless
+    #   opt-einsum
+    #   pandas
+    #   pycocotools
+    #   sacrebleu
+    #   scikit-learn
+    #   scipy
+    #   seqeval
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tensorflow-hub
+    #   tensorflow-model-optimization
+    #   tf-models-official
+oauth2client==4.1.3
+    # via tf-models-official
+oauthlib==3.2.2
+    # via requests-oauthlib
+opencv-python-headless==4.7.0.72
+    # via tf-models-official
+opt-einsum==3.3.0
+    # via tensorflow
+orjson==3.8.7
+    # via gradio
+packaging==23.0
+    # via
+    #   huggingface-hub
+    #   matplotlib
+    #   tensorflow
+    #   tensorflow-addons
+pandas==1.5.3
+    # via
+    #   altair
+    #   gradio
+    #   tf-models-official
+pillow==9.4.0
+    # via
+    #   gradio
+    #   matplotlib
+    #   tf-models-official
+portalocker==2.7.0
+    # via sacrebleu
+promise==2.3
+    # via tensorflow-datasets
+protobuf==3.19.6
+    # via
+    #   google-api-core
+    #   googleapis-common-protos
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tensorflow-hub
+    #   tensorflow-metadata
+psutil==5.9.4
+    # via
+    #   tensorflow-datasets
+    #   tf-models-official
+py-cpuinfo==9.0.0
+    # via tf-models-official
+pyasn1==0.4.8
+    # via
+    #   oauth2client
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.2.8
+    # via
+    #   google-auth
+    #   oauth2client
+pycocotools==2.0.6
+    # via tf-models-official
+pydantic==1.10.6
+    # via
+    #   fastapi
+    #   gradio
+pydub==0.25.1
+    # via gradio
+pyparsing==3.0.9
+    # via
+    #   httplib2
+    #   matplotlib
+pyrsistent==0.19.3
+    # via jsonschema
+python-dateutil==2.8.2
+    # via
+    #   kaggle
+    #   matplotlib
+    #   pandas
+python-multipart==0.0.6
+    # via gradio
+python-slugify==8.0.1
+    # via kaggle
+pytz==2022.7.1
+    # via pandas
+pyyaml==5.4.1
+    # via
+    #   gradio
+    #   huggingface-hub
+    #   tf-models-official
+regex==2022.10.31
+    # via sacrebleu
+requests==2.28.2
+    # via
+    #   google-api-core
+    #   gradio
+    #   huggingface-hub
+    #   kaggle
+    #   requests-oauthlib
+    #   tensorboard
+    #   tensorflow-datasets
+requests-oauthlib==1.3.1
+    # via google-auth-oauthlib
+rfc3986[idna2008]==1.5.0
+    # via httpx
+rsa==4.9
+    # via
+    #   google-auth
+    #   oauth2client
+sacrebleu==2.3.1
+    # via tf-models-official
+scikit-learn==1.2.2
+    # via seqeval
+scipy==1.10.1
+    # via
+    #   scikit-learn
+    #   tf-models-official
+sentencepiece==0.1.97
+    # via tf-models-official
+seqeval==1.2.2
+    # via tf-models-official
+six==1.16.0
+    # via
+    #   astunparse
+    #   google-auth
+    #   google-auth-httplib2
+    #   google-pasta
+    #   kaggle
+    #   oauth2client
+    #   promise
+    #   python-dateutil
+    #   tensorflow
+    #   tensorflow-model-optimization
+    #   tf-models-official
+sniffio==1.3.0
+    # via
+    #   anyio
+    #   httpcore
+    #   httpx
+starlette==0.26.1
+    # via fastapi
+tabulate==0.9.0
+    # via sacrebleu
+tensorboard==2.11.2
+    # via tensorflow
+tensorboard-data-server==0.6.1
+    # via tensorboard
+tensorboard-plugin-wit==1.8.1
+    # via tensorboard
+tensorflow==2.11.0
+    # via
+    #   tensorflow-text
+    #   tf-models-official
+tensorflow-addons==0.19.0
+    # via tf-models-official
+tensorflow-datasets==4.8.3
+    # via tf-models-official
+tensorflow-estimator==2.11.0
+    # via tensorflow
+tensorflow-hub==0.12.0
+    # via
+    #   tensorflow-text
+    #   tf-models-official
+tensorflow-io-gcs-filesystem==0.31.0
+    # via tensorflow
+tensorflow-metadata==1.12.0
+    # via tensorflow-datasets
+tensorflow-model-optimization==0.7.3
+    # via tf-models-official
+tensorflow-text==2.11.0
+    # via
+    #   -r requirements.in
+    #   tf-models-official
+termcolor==2.2.0
+    # via
+    #   tensorflow
+    #   tensorflow-datasets
+text-unidecode==1.3
+    # via python-slugify
+tf-models-official==2.11.3
+    # via -r requirements.in
+tf-slim==1.1.0
+    # via tf-models-official
+threadpoolctl==3.1.0
+    # via scikit-learn
+toml==0.10.2
+    # via tensorflow-datasets
+toolz==0.12.0
+    # via altair
+tqdm==4.65.0
+    # via
+    #   huggingface-hub
+    #   kaggle
+    #   tensorflow-datasets
+typeguard==2.13.3
+    # via tensorflow-addons
+typing-extensions==4.5.0
+    # via
+    #   etils
+    #   gradio
+    #   huggingface-hub
+    #   pydantic
+    #   tensorflow
+uc-micro-py==1.0.1
+    # via linkify-it-py
+uritemplate==4.1.1
+    # via google-api-python-client
+urllib3==1.26.15
+    # via
+    #   kaggle
+    #   requests
+uvicorn==0.21.0
+    # via gradio
+websockets==10.4
+    # via gradio
+werkzeug==2.2.3
+    # via tensorboard
+wheel==0.38.4
+    # via
+    #   astunparse
+    #   tensorboard
+wrapt==1.15.0
+    # via
+    #   tensorflow
+    #   tensorflow-datasets
+yarl==1.8.2
+    # via aiohttp
+zipp==3.15.0
+    # via etils
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools