GitHub Action
commited on
Commit
•
8b414b0
1
Parent(s):
1579a76
refs/heads/ci-cd/hugging-face
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.example +3 -0
- .flake8 +33 -0
- .github/workflows/cd.yaml +48 -0
- .github/workflows/ci.yaml +41 -0
- .github/workflows/docs.yaml +46 -0
- .github/workflows/kaggle.yaml +32 -0
- .github/workflows/snyk.yaml +30 -0
- .gitignore +118 -0
- .pre-commit-config.yaml +24 -0
- Dockerfile +7 -0
- LICENSE +21 -0
- Makefile +23 -0
- codecov.yml +5 -0
- data/.gitkeep +0 -0
- data/weights/.gitignore +0 -0
- data/weights/config.yaml +9 -0
- data/weights/cv_fold_0/weights.ckpt +3 -0
- data/weights/cv_fold_1/weights.ckpt +3 -0
- data/weights/cv_fold_2/weights.ckpt +3 -0
- data/weights/cv_fold_3/weights.ckpt +3 -0
- data/weights/cv_fold_4/weights.ckpt +3 -0
- data/weights/cv_results.csv +7 -0
- data/weights/dataset-metadata.json +9 -0
- data/weights/submission.csv +4 -0
- data/word_frequencies/unigram_freq.csv +0 -0
- demo/__init__.py +0 -0
- demo/app.py +93 -0
- demo/utils.py +29 -0
- docs/Makefile +12 -0
- docs/images/demo.jpeg +0 -0
- docs/images/logo.png +0 -0
- docs/make.bat +35 -0
- docs/source/_templates/layout.html +31 -0
- docs/source/_templates/notused_packages.rst_t +52 -0
- docs/source/conf.py +78 -0
- docs/source/contribute.rst +12 -0
- docs/source/index.rst +14 -0
- docs/source/usage.rst +26 -0
- mypy.ini +7 -0
- poetry.lock +0 -0
- pyproject.toml +50 -0
- src/__init__.py +0 -0
- src/config/conf/config.yaml +11 -0
- src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml +19 -0
- src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml +13 -0
- src/config/conf/experiment/sanity_constant_predictor.yaml +9 -0
- src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml +15 -0
- src/config/conf/predictor/bert_finetuning_predictor.yaml +16 -0
- src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml +4 -0
- src/config/conf/predictor/constant_predictor.yaml +1 -0
.env.example
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
DATA_PATH = ""
|
2 |
+
BOT_TOKEN = ""
|
3 |
+
CHAT_ID =
|
.flake8
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
max-complexity = 10
|
3 |
+
max_line_length = 138
|
4 |
+
exclude =
|
5 |
+
# no need to traverse our git directory
|
6 |
+
.git,
|
7 |
+
# there's no value in checking cache directories
|
8 |
+
__pycache__,
|
9 |
+
# this directory is mostly auto-generated
|
10 |
+
docs/source,
|
11 |
+
extend-ignore =
|
12 |
+
# H101: use TODO(NAME)
|
13 |
+
H101,
|
14 |
+
# H202: assertRaises Exception too broad
|
15 |
+
H202,
|
16 |
+
# H233: python 3.x incompatible use of print operator
|
17 |
+
H233,
|
18 |
+
# H301: one import per line
|
19 |
+
H301,
|
20 |
+
# H306: imports not in alphabetical order (time, os)
|
21 |
+
H306,
|
22 |
+
# H401: docstring should not start with a space
|
23 |
+
H401,
|
24 |
+
# H403: multi line docstrings should end on a new line
|
25 |
+
H403,
|
26 |
+
# H404: multi line docstring should start without a leading new line
|
27 |
+
H404,
|
28 |
+
# H405: multi line docstring summary not separated with an empty line
|
29 |
+
H405,
|
30 |
+
# H501: do not use self.__dict__ for string formatting
|
31 |
+
H501,
|
32 |
+
# E203: colons should not have any space before them
|
33 |
+
E203,
|
.github/workflows/cd.yaml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: continuous-deployment
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- ci-cd/hugging-face
|
7 |
+
release:
|
8 |
+
types: [published]
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
continuous-deployment:
|
12 |
+
runs-on: ubuntu-22.04
|
13 |
+
steps:
|
14 |
+
- uses: actions/checkout@v2
|
15 |
+
- name: Set up Python 3.7.13
|
16 |
+
uses: actions/setup-python@v2
|
17 |
+
with:
|
18 |
+
python-version: 3.7.13
|
19 |
+
- name: Install dependencies
|
20 |
+
run: |
|
21 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
22 |
+
poetry --no-root install
|
23 |
+
- name: Clone Hugging Face repo
|
24 |
+
run: |
|
25 |
+
cd ..
|
26 |
+
git clone https://huggingface.co/spaces/Booguy/linguask
|
27 |
+
- name: Move files to repo
|
28 |
+
run: |
|
29 |
+
cd ..
|
30 |
+
mv ./linguask/.git/ .
|
31 |
+
mv ./linguask/README.md .
|
32 |
+
rm -rf ./linguask/*
|
33 |
+
mv .git ./linguask
|
34 |
+
mv README.md ./linguask
|
35 |
+
cd ./automatic-essay-evaluator
|
36 |
+
rm -rf ./.git
|
37 |
+
rm README.md
|
38 |
+
cd ..
|
39 |
+
cp -a ./automatic-essay-evaluator/. ./linguask
|
40 |
+
- name: Commit all files
|
41 |
+
run: |
|
42 |
+
cd ..
|
43 |
+
cd linguask
|
44 |
+
git add .
|
45 |
+
git config --local user.email "[email protected]"
|
46 |
+
git config --local user.name "GitHub Action"
|
47 |
+
git commit -m ${{ github.ref }}
|
48 |
+
git push https://Booguy:${{ secrets.HF_TOKEN }}@huggingface.co/spaces/Booguy/linguask.git
|
.github/workflows/ci.yaml
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: continuous-integration
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
pull_request:
|
8 |
+
branches:
|
9 |
+
- main
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
continuous-integration:
|
13 |
+
runs-on: ubuntu-22.04
|
14 |
+
steps:
|
15 |
+
- uses: actions/checkout@v2
|
16 |
+
- name: Set up Python 3.7.13
|
17 |
+
uses: actions/setup-python@v2
|
18 |
+
with:
|
19 |
+
python-version: 3.7.13
|
20 |
+
- name: Install dependencies
|
21 |
+
run: |
|
22 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
23 |
+
poetry --no-root install
|
24 |
+
- uses: jamescurtin/isort-action@master
|
25 |
+
- name: Lint with flake8
|
26 |
+
run: |
|
27 |
+
poetry run flake8
|
28 |
+
- name: Lint with static type checker mypy
|
29 |
+
run: |
|
30 |
+
poetry run mypy .
|
31 |
+
- name: Test with pytest
|
32 |
+
run: |
|
33 |
+
poetry run pytest
|
34 |
+
- name: Coverage report
|
35 |
+
run: |
|
36 |
+
poetry run coverage run -m pytest
|
37 |
+
poetry run coverage xml
|
38 |
+
- name: Upload Coverage Report to Codecov
|
39 |
+
uses: codecov/codecov-action@v3
|
40 |
+
with:
|
41 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
.github/workflows/docs.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: sphinx-documentation
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
build_job:
|
10 |
+
runs-on: ubuntu-22.04
|
11 |
+
env:
|
12 |
+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
|
13 |
+
|
14 |
+
steps:
|
15 |
+
- name: Checkout
|
16 |
+
uses: actions/[email protected]
|
17 |
+
- name: Set up Python 3.7.13
|
18 |
+
uses: actions/[email protected]
|
19 |
+
with:
|
20 |
+
python-version: 3.7.13
|
21 |
+
- name: Install dependencies
|
22 |
+
run: |
|
23 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
24 |
+
poetry lock
|
25 |
+
poetry --no-root install
|
26 |
+
- name: Make the sphinx docs
|
27 |
+
run: |
|
28 |
+
poetry run make -C docs clean
|
29 |
+
poetry run make -C docs html
|
30 |
+
- name: Commit generated files
|
31 |
+
run: |
|
32 |
+
cd docs/build/html
|
33 |
+
git init
|
34 |
+
touch .nojekyll
|
35 |
+
git add -A
|
36 |
+
git config --local user.email "[email protected]"
|
37 |
+
git config --local user.name "GitHub Action"
|
38 |
+
git config --global --add safe.directory '*'
|
39 |
+
git commit -m ${{ github.ref }}
|
40 |
+
- name: Push to destination branch
|
41 |
+
uses: ad-m/github-push-action@master
|
42 |
+
with:
|
43 |
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
44 |
+
branch: gh-pages
|
45 |
+
force: true
|
46 |
+
directory: ./docs/build/html
|
.github/workflows/kaggle.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: kaggle-submission
|
2 |
+
|
3 |
+
on:
|
4 |
+
pull_request:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
types: [closed]
|
8 |
+
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
kaggle-api:
|
12 |
+
if: ${{ github.event.pull_request.merged }}
|
13 |
+
runs-on: ubuntu-22.04
|
14 |
+
steps:
|
15 |
+
- uses: actions/checkout@v2
|
16 |
+
- name: Set up Python 3.7.13
|
17 |
+
uses: actions/setup-python@v2
|
18 |
+
with:
|
19 |
+
python-version: 3.7.13
|
20 |
+
- name: Install dependencies
|
21 |
+
run: |
|
22 |
+
curl -sSL https://install.python-poetry.org | python3 -
|
23 |
+
poetry lock
|
24 |
+
poetry --no-root install
|
25 |
+
- name: Authenticate to Kaggle
|
26 |
+
run: |
|
27 |
+
mkdir ~/.kaggle
|
28 |
+
echo ${{ secrets.KAGGLE }} >> ~/.kaggle/kaggle.json
|
29 |
+
- name: Push kernel
|
30 |
+
run: poetry run kaggle kernels push -p src/
|
31 |
+
- name: Update weights
|
32 |
+
run: poetry run kaggle datasets version -p data/weights --dir-mode zip -m ${{ github.ref }}
|
.github/workflows/snyk.yaml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: snyk-vulnerability-testing
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- main
|
7 |
+
pull_request:
|
8 |
+
branches:
|
9 |
+
- main
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
snyk-testing:
|
13 |
+
runs-on: ubuntu-22.04
|
14 |
+
steps:
|
15 |
+
- uses: actions/checkout@v2
|
16 |
+
- name: Set up Python 3.7.13
|
17 |
+
uses: actions/setup-python@v2
|
18 |
+
with:
|
19 |
+
python-version: 3.7.13
|
20 |
+
- name: Install snyk
|
21 |
+
run: |
|
22 |
+
curl https://static.snyk.io/cli/latest/snyk-linux -o snyk
|
23 |
+
chmod +x ./snyk
|
24 |
+
mv ./snyk /usr/local/bin/
|
25 |
+
- name: Authenticate in snyk
|
26 |
+
run: |
|
27 |
+
snyk auth ${{ secrets.SNYK_TOKEN }}
|
28 |
+
- name: Test via snyk
|
29 |
+
run: |
|
30 |
+
snyk test
|
.gitignore
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
*.egg-info/
|
24 |
+
.installed.cfg
|
25 |
+
*.egg
|
26 |
+
MANIFEST
|
27 |
+
|
28 |
+
# PyInstaller
|
29 |
+
# Usually these files are written by a python script from a template
|
30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
31 |
+
*.manifest
|
32 |
+
*.spec
|
33 |
+
|
34 |
+
# Installer logs
|
35 |
+
pip-log.txt
|
36 |
+
pip-delete-this-directory.txt
|
37 |
+
|
38 |
+
# Unit test / coverage reports
|
39 |
+
htmlcov/
|
40 |
+
.tox/
|
41 |
+
.coverage
|
42 |
+
.coverage.*
|
43 |
+
.cache
|
44 |
+
nosetests.xml
|
45 |
+
coverage.xml
|
46 |
+
*.cover
|
47 |
+
.hypothesis/
|
48 |
+
.pytest_cache/
|
49 |
+
|
50 |
+
# Translations
|
51 |
+
*.mo
|
52 |
+
*.pot
|
53 |
+
|
54 |
+
# Django stuff:
|
55 |
+
*.log
|
56 |
+
local_settings.py
|
57 |
+
db.sqlite3
|
58 |
+
|
59 |
+
# Flask stuff:
|
60 |
+
instance/
|
61 |
+
.webassets-cache
|
62 |
+
|
63 |
+
# Scrapy stuff:
|
64 |
+
.scrapy
|
65 |
+
|
66 |
+
# Sphinx documentation
|
67 |
+
docs/_build/
|
68 |
+
|
69 |
+
# PyBuilder
|
70 |
+
target/
|
71 |
+
|
72 |
+
# Jupyter Notebook
|
73 |
+
.ipynb_checkpoints
|
74 |
+
|
75 |
+
# pyenv
|
76 |
+
.python-version
|
77 |
+
|
78 |
+
# celery beat schedule file
|
79 |
+
celerybeat-schedule
|
80 |
+
|
81 |
+
# SageMath parsed files
|
82 |
+
*.sage.py
|
83 |
+
|
84 |
+
# Environments
|
85 |
+
.env
|
86 |
+
.venv
|
87 |
+
env/
|
88 |
+
venv/
|
89 |
+
ENV/
|
90 |
+
env.bak/
|
91 |
+
venv.bak/
|
92 |
+
|
93 |
+
# Spyder project settings
|
94 |
+
.spyderproject
|
95 |
+
.spyproject
|
96 |
+
|
97 |
+
# Rope project settings
|
98 |
+
.ropeproject
|
99 |
+
|
100 |
+
# mkdocs documentation
|
101 |
+
/site
|
102 |
+
|
103 |
+
# mypy
|
104 |
+
.mypy_cache/
|
105 |
+
|
106 |
+
# MACOS
|
107 |
+
.DS_Store
|
108 |
+
data/raw/*
|
109 |
+
catboost_info/
|
110 |
+
.idea/
|
111 |
+
requirements-dev.txt
|
112 |
+
|
113 |
+
# logging
|
114 |
+
checkpoints/
|
115 |
+
# hydra-outputs
|
116 |
+
outputs/
|
117 |
+
# weights
|
118 |
+
demo/model.ckpt
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
repos:
|
2 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
3 |
+
rev: v4.2.0
|
4 |
+
hooks:
|
5 |
+
- id: trailing-whitespace
|
6 |
+
- id: end-of-file-fixer
|
7 |
+
- id: check-yaml
|
8 |
+
- id: check-added-large-files
|
9 |
+
args: ['--maxkb=5000']
|
10 |
+
- repo: https://github.com/pre-commit/mirrors-autopep8
|
11 |
+
rev: v1.6.0
|
12 |
+
hooks:
|
13 |
+
- id: autopep8
|
14 |
+
- repo: https://github.com/pycqa/isort
|
15 |
+
rev: 5.10.1
|
16 |
+
hooks:
|
17 |
+
- id: isort
|
18 |
+
name: isort (python)
|
19 |
+
- repo: https://github.com/myint/autoflake
|
20 |
+
rev: v1.4
|
21 |
+
hooks:
|
22 |
+
- id: autoflake
|
23 |
+
entry: autoflake
|
24 |
+
args: [--in-place, --remove-all-unused-imports]
|
Dockerfile
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.8-slim
|
2 |
+
|
3 |
+
RUN curl -sSL https://install.python-poetry.org | python3 -
|
4 |
+
|
5 |
+
RUN poetry --no-root install
|
6 |
+
|
7 |
+
CMD ["poetry", "run", "make", "build"]
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2021 Linguask
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
Makefile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build: download_data download_weights run
|
2 |
+
|
3 |
+
download_data:
|
4 |
+
mkdir -p data/raw
|
5 |
+
cd data/raw; \
|
6 |
+
rm *; \
|
7 |
+
kaggle competitions download -c feedback-prize-english-language-learning; \
|
8 |
+
unzip feedback-prize-english-language-learning.zip; \
|
9 |
+
rm feedback-prize-english-language-learning.zip
|
10 |
+
|
11 |
+
test:
|
12 |
+
flake8
|
13 |
+
isort .
|
14 |
+
pytest -p no:cacheprovider
|
15 |
+
|
16 |
+
download_weights:
|
17 |
+
cd ./demo; \
|
18 |
+
kaggle datasets download -d alukaevdanis/feedback-prize-weights; \
|
19 |
+
unzip feedback-prize-weights.zip; \
|
20 |
+
rm feedback-prize-weights.zip
|
21 |
+
|
22 |
+
run:
|
23 |
+
PYTHONPATH=. streamlit run demo/app.py
|
codecov.yml
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
coverage:
|
2 |
+
status:
|
3 |
+
patch:
|
4 |
+
default:
|
5 |
+
target: 75%
|
data/.gitkeep
ADDED
File without changes
|
data/weights/.gitignore
ADDED
File without changes
|
data/weights/config.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
predictor:
|
2 |
+
_target_: src.solutions.constant_predictor.ConstantPredictorSolution
|
3 |
+
validator:
|
4 |
+
_target_: src.cross_validate.CrossValidation
|
5 |
+
saving_dir: checkpoints
|
6 |
+
n_splits: 5
|
7 |
+
timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
|
8 |
+
cwd: ${hydra:runtime.cwd}
|
9 |
+
name: sanity checking of ConstantPredictorSolution
|
data/weights/cv_fold_0/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
|
3 |
+
size 4
|
data/weights/cv_fold_1/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
|
3 |
+
size 4
|
data/weights/cv_fold_2/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
|
3 |
+
size 4
|
data/weights/cv_fold_3/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
|
3 |
+
size 4
|
data/weights/cv_fold_4/weights.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66e4c000f2000eb1cca164f97c441e14d438ca2599c8e79f4de6ecade0de8b32
|
3 |
+
size 4
|
data/weights/cv_results.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,cohesion,syntax,vocabulary,phraseology,grammar,conventions
|
2 |
+
0,0.6608945522512667,0.6302303497468349,0.641527949378647,0.6712010545904032,0.6961855568035836,0.688344859959337
|
3 |
+
1,0.6984637874679632,0.6770818988115173,0.6537810547264135,0.6873910050778178,0.7192102801753979,0.6977768863470648
|
4 |
+
2,0.667332361584851,0.6364362897377401,0.614977179207726,0.6768457760846118,0.6855281534006166,0.6721059014106314
|
5 |
+
3,0.6735313662174706,0.6424358218267724,0.6209266237831667,0.65206733997094,0.6982348955111474,0.6678112510340573
|
6 |
+
4,0.6718680299109107,0.6374401330898904,0.6123724356957945,0.6426845869171515,0.7030259769714595,0.654269863059915
|
7 |
+
overall,0.6744180194864924,0.644724898642551,0.6287170485583495,0.6660379525281849,0.7004369725724409,0.676061752362201
|
data/weights/dataset-metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"title": "Linguask by Vitsyn-Morgunov-Nikulin",
|
3 |
+
"id": "alukaevdanis/weights-linguask",
|
4 |
+
"licenses": [
|
5 |
+
{
|
6 |
+
"name": "CC0-1.0"
|
7 |
+
}
|
8 |
+
]
|
9 |
+
}
|
data/weights/submission.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
text_id,cohesion,syntax,vocabulary,phraseology,grammar,conventions
|
2 |
+
0000C359D63E,3.0,3.0,3.0,3.0,3.0,3.0
|
3 |
+
000BAD50D026,3.0,3.0,3.0,3.0,3.0,3.0
|
4 |
+
00367BB2546B,3.0,3.0,3.0,3.0,3.0,3.0
|
data/word_frequencies/unigram_freq.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
demo/__init__.py
ADDED
File without changes
|
demo/app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
import seaborn as sns
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from demo.utils import load_model, process_text
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title="BERT Keyword Extractor",
|
10 |
+
page_icon="🎈",
|
11 |
+
)
|
12 |
+
|
13 |
+
|
14 |
+
def _max_width_():
|
15 |
+
max_width_str = "max-width: 1400px;"
|
16 |
+
st.markdown(
|
17 |
+
f"""
|
18 |
+
<style>
|
19 |
+
.reportview-container .main .block-container{{
|
20 |
+
{max_width_str}
|
21 |
+
}}
|
22 |
+
</style>
|
23 |
+
""",
|
24 |
+
unsafe_allow_html=True,
|
25 |
+
)
|
26 |
+
|
27 |
+
|
28 |
+
st.header("🔑 Automated Essay Evaluator")
|
29 |
+
|
30 |
+
with st.expander("ℹ️ - About this app", expanded=True):
|
31 |
+
st.write(
|
32 |
+
"""
|
33 |
+
- This application demonstrates how automated essay evaluation works: given as an input text with max. \
|
34 |
+
length of 512, it scores it (from 1.0 to 4.0) for different criteria: cohesion, syntax, vocabulary, \
|
35 |
+
phraseology, grammar and conventions.
|
36 |
+
- This solution is based on fine-tuned deberta-v3-large model.
|
37 |
+
"""
|
38 |
+
)
|
39 |
+
|
40 |
+
st.markdown("")
|
41 |
+
|
42 |
+
st.markdown("")
|
43 |
+
st.markdown("## 📌 **Paste document**", unsafe_allow_html=True)
|
44 |
+
with st.form(key="my_form"):
|
45 |
+
_, c2, _ = st.columns([0.07, 5, 0.07])
|
46 |
+
|
47 |
+
with c2:
|
48 |
+
doc = st.text_area(
|
49 |
+
"Paste your text below (max 500 words)",
|
50 |
+
height=510,
|
51 |
+
)
|
52 |
+
|
53 |
+
MAX_WORDS = 500
|
54 |
+
|
55 |
+
res = len(re.findall(r"\w+", doc))
|
56 |
+
doc = doc[:MAX_WORDS]
|
57 |
+
|
58 |
+
submit_button = st.form_submit_button(label="✨ Assess my text!")
|
59 |
+
|
60 |
+
if not submit_button:
|
61 |
+
st.stop()
|
62 |
+
|
63 |
+
st.markdown("## 🎈 **Check results**")
|
64 |
+
|
65 |
+
st.header("")
|
66 |
+
|
67 |
+
cs, c1, c2, c3, cLast = st.columns([2, 1.5, 1.5, 1.5, 2])
|
68 |
+
|
69 |
+
st.header("")
|
70 |
+
|
71 |
+
model = load_model()
|
72 |
+
df = process_text(doc, model)
|
73 |
+
|
74 |
+
df.index += 1
|
75 |
+
|
76 |
+
# Add styling
|
77 |
+
cmGreen = sns.light_palette("green", as_cmap=True)
|
78 |
+
cmRed = sns.light_palette("red", as_cmap=True)
|
79 |
+
df = df.style.background_gradient(
|
80 |
+
cmap=cmGreen,
|
81 |
+
subset=[
|
82 |
+
"Grade",
|
83 |
+
],
|
84 |
+
)
|
85 |
+
|
86 |
+
|
87 |
+
format_dictionary = {
|
88 |
+
"Relevancy": "{:.1%}",
|
89 |
+
}
|
90 |
+
|
91 |
+
df = df.format(format_dictionary)
|
92 |
+
|
93 |
+
st.table(df)
|
demo/utils.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import torch
|
4 |
+
|
5 |
+
from src.model_finetuning.config import CONFIG
|
6 |
+
from src.model_finetuning.model import BertLightningModel
|
7 |
+
from src.utils import get_target_columns
|
8 |
+
|
9 |
+
|
10 |
+
@st.cache(allow_output_mutation=True)
|
11 |
+
def load_model() -> BertLightningModel:
|
12 |
+
|
13 |
+
ckpt_path = "demo/model.ckpt"
|
14 |
+
model = BertLightningModel.load_from_checkpoint(ckpt_path, config=CONFIG, map_location='cpu')
|
15 |
+
|
16 |
+
return model
|
17 |
+
|
18 |
+
|
19 |
+
@torch.no_grad()
|
20 |
+
def process_text(_text: str, _model: BertLightningModel) -> pd.DataFrame:
|
21 |
+
tokens = _model.tokenizer([_text], return_tensors='pt')
|
22 |
+
outputs = _model(tokens)[0].tolist()
|
23 |
+
|
24 |
+
df = pd.DataFrame({
|
25 |
+
'Criterion': get_target_columns(),
|
26 |
+
'Grade': outputs
|
27 |
+
})
|
28 |
+
|
29 |
+
return df
|
docs/Makefile
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
SPHINXOPTS ?=
|
2 |
+
SPHINXBUILD ?= sphinx-build
|
3 |
+
SOURCEDIR = source
|
4 |
+
BUILDDIR = build
|
5 |
+
|
6 |
+
help:
|
7 |
+
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
8 |
+
|
9 |
+
.PHONY: help Makefile
|
10 |
+
|
11 |
+
%: Makefile
|
12 |
+
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
docs/images/demo.jpeg
ADDED
docs/images/logo.png
ADDED
docs/make.bat
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@ECHO OFF
|
2 |
+
|
3 |
+
pushd %~dp0
|
4 |
+
|
5 |
+
REM Command file for Sphinx documentation
|
6 |
+
|
7 |
+
if "%SPHINXBUILD%" == "" (
|
8 |
+
set SPHINXBUILD=sphinx-build
|
9 |
+
)
|
10 |
+
set SOURCEDIR=source
|
11 |
+
set BUILDDIR=build
|
12 |
+
|
13 |
+
if "%1" == "" goto help
|
14 |
+
|
15 |
+
%SPHINXBUILD% >NUL 2>NUL
|
16 |
+
if errorlevel 9009 (
|
17 |
+
echo.
|
18 |
+
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
19 |
+
echo.installed, then set the SPHINXBUILD environment variable to point
|
20 |
+
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
21 |
+
echo.may add the Sphinx directory to PATH.
|
22 |
+
echo.
|
23 |
+
echo.If you don't have Sphinx installed, grab it from
|
24 |
+
echo.http://sphinx-doc.org/
|
25 |
+
exit /b 1
|
26 |
+
)
|
27 |
+
|
28 |
+
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
29 |
+
goto end
|
30 |
+
|
31 |
+
:help
|
32 |
+
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
33 |
+
|
34 |
+
:end
|
35 |
+
popd
|
docs/source/_templates/layout.html
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{% extends "!layout.html" %}
|
2 |
+
|
3 |
+
{% block menu %}
|
4 |
+
{{ super() }}
|
5 |
+
<p class="caption">
|
6 |
+
<span class="caption-text">Indices</span>
|
7 |
+
</p>
|
8 |
+
<ul>
|
9 |
+
<li class="toctree-l1"><a href= "{{pathto('genindex.html', 1)}}">Everything</a></li>
|
10 |
+
<li class="toctree-l1"><a href= "{{pathto('py-modindex.html', 1)}}">Module Index</a></li>
|
11 |
+
</ul>
|
12 |
+
|
13 |
+
{% if menu_links %}
|
14 |
+
<p class="caption">
|
15 |
+
<span class="caption-text">External links</span>
|
16 |
+
</p>
|
17 |
+
<ul>
|
18 |
+
{% for text, link in menu_links %}
|
19 |
+
<li class="toctree-l1"><a href="{{ link }}">{{ text }}</a></li>
|
20 |
+
{% endfor %}
|
21 |
+
</ul>
|
22 |
+
{% endif %}
|
23 |
+
{% endblock %}
|
24 |
+
|
25 |
+
{% block htmltitle %}
|
26 |
+
{% if title == '' or title == 'Home' %}
|
27 |
+
<title>{{ docstitle|e }}</title>
|
28 |
+
{% else %}
|
29 |
+
<title>{{ title|striptags|e }}{{ titlesuffix }}</title>
|
30 |
+
{% endif %}
|
31 |
+
{% endblock %}
|
docs/source/_templates/notused_packages.rst_t
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{%- macro automodule(modname, options) -%}
|
2 |
+
.. automodule:: {{ modname }}
|
3 |
+
{%- for option in options %}
|
4 |
+
:{{ option }}:
|
5 |
+
{%- endfor %}
|
6 |
+
{%- endmacro %}
|
7 |
+
|
8 |
+
{%- macro toctree(docnames) -%}
|
9 |
+
.. toctree::
|
10 |
+
:maxdepth: {{ maxdepth }}
|
11 |
+
{% for docname in docnames %}
|
12 |
+
{{ docname }}
|
13 |
+
{%- endfor %}
|
14 |
+
{%- endmacro %}
|
15 |
+
|
16 |
+
{%- if is_namespace %}
|
17 |
+
{{- [pkgname, "namespace"] | join(" ") | e | heading }}
|
18 |
+
{% else %}
|
19 |
+
{{- [pkgname, "package"] | join(" ") | e | heading }}
|
20 |
+
{% endif %}
|
21 |
+
|
22 |
+
{%- if is_namespace %}
|
23 |
+
.. py:module:: {{ pkgname }}
|
24 |
+
{% endif %}
|
25 |
+
|
26 |
+
{%- if modulefirst and not is_namespace %}
|
27 |
+
{{ automodule(pkgname, automodule_options) }}
|
28 |
+
{% endif %}
|
29 |
+
|
30 |
+
{%- if subpackages %}
|
31 |
+
Subpackages
|
32 |
+
-----------
|
33 |
+
|
34 |
+
{{ toctree(subpackages) }}
|
35 |
+
{% endif %}
|
36 |
+
|
37 |
+
{%- if submodules %}
|
38 |
+
Submodules
|
39 |
+
----------
|
40 |
+
{% if separatemodules %}
|
41 |
+
{{ toctree(submodules) }}
|
42 |
+
{% else %}
|
43 |
+
{%- for submodule in submodules %}
|
44 |
+
{% if show_headings %}
|
45 |
+
{{- [submodule, "module"] | join(" ") | e | heading(2) }}
|
46 |
+
{% endif %}
|
47 |
+
{{ automodule(submodule, automodule_options) }}
|
48 |
+
{% endfor %}
|
49 |
+
{%- endif %}
|
50 |
+
{%- endif %}
|
51 |
+
|
52 |
+
# https://github.com/sphinx-doc/sphinx/blob/master/sphinx/templates/apidoc
|
docs/source/conf.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Configuration file for the Sphinx documentation builder.
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
|
5 |
+
# Need this so sphinx can find lumache.py. Change is .py files are elsewhere than root.
|
6 |
+
sys.path.insert(0, os.path.abspath('../..'))
|
7 |
+
sys.path.insert(0, os.path.abspath('../../src'))
|
8 |
+
|
9 |
+
# -- Project information
|
10 |
+
|
11 |
+
project = 'Linguask'
|
12 |
+
copyright = '2022'
|
13 |
+
author = 'Multiple'
|
14 |
+
|
15 |
+
release = '0.1'
|
16 |
+
version = '0.1.0'
|
17 |
+
|
18 |
+
# -- General configuration
|
19 |
+
|
20 |
+
extensions = [
|
21 |
+
'sphinx.ext.duration',
|
22 |
+
'sphinx.ext.doctest',
|
23 |
+
'sphinx.ext.autodoc',
|
24 |
+
'sphinx.ext.autosummary',
|
25 |
+
'sphinx.ext.intersphinx',
|
26 |
+
'autoapi.extension',
|
27 |
+
# 'sphinxcontrib.apidoc',
|
28 |
+
]
|
29 |
+
|
30 |
+
autoapi_type = 'python'
|
31 |
+
autoapi_dirs = ['../../src']
|
32 |
+
|
33 |
+
intersphinx_mapping = {
|
34 |
+
'python': ('https://docs.python.org/3/', None),
|
35 |
+
'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
|
36 |
+
}
|
37 |
+
intersphinx_disabled_domains = ['std']
|
38 |
+
|
39 |
+
templates_path = ['_templates']
|
40 |
+
|
41 |
+
# -- Options for HTML output
|
42 |
+
|
43 |
+
html_theme = 'sphinx_rtd_theme'
|
44 |
+
|
45 |
+
# -- Options for EPUB output
|
46 |
+
epub_show_urls = 'footnote'
|
47 |
+
|
48 |
+
html_context = {
|
49 |
+
"display_github": True, # Integrate GitHub
|
50 |
+
"github_repo": "vitsyn-morgunov-and-nikulin/automatic-essay-evaluator", # Repo name
|
51 |
+
"github_version": "main", # Version
|
52 |
+
"conf_py_path": "docs/source/", # Path in the checkout to the docs root
|
53 |
+
}
|
54 |
+
|
55 |
+
# -- Options for HTML output -------------------------------------------------
|
56 |
+
|
57 |
+
html_short_title = "topobathy"
|
58 |
+
html_show_sourcelink = False
|
59 |
+
html_show_sphinx = True
|
60 |
+
html_show_copyright = True
|
61 |
+
|
62 |
+
# Add any paths that contain custom static files (such as style sheets) here,
|
63 |
+
# relative to this directory. They are copied after the builtin static files,
|
64 |
+
# so a file named "default.css" will overwrite the builtin "default.css".
|
65 |
+
html_static_path = ['_static']
|
66 |
+
repository_url = "https://github.com/Vitsyn-Morgunov-and-Nikulin/automatic-essay-evaluator"
|
67 |
+
html_context = {
|
68 |
+
"menu_links": [
|
69 |
+
(
|
70 |
+
'<i class="fa fa-github fa-fw"></i> Source Code',
|
71 |
+
repository_url,
|
72 |
+
),
|
73 |
+
(
|
74 |
+
'<i class="fa fa-book fa-fw"></i> License',
|
75 |
+
f"{repository_url}/blob/main/LICENSE",
|
76 |
+
),
|
77 |
+
],
|
78 |
+
}
|
docs/source/contribute.rst
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
How to contribute
|
2 |
+
=================
|
3 |
+
|
4 |
+
In our development process we followed practices described by Uncle Bob in his magnificent "Clean Code". Please, consult this book in case any trouble.
|
5 |
+
|
6 |
+
Make a fork of this repository, and develop your own tool. Make sure it is error-free and the test coverage is at least 60 percent. Update :code:`config` files accordingly, and check their operability.
|
7 |
+
|
8 |
+
While producing your code, use this famous `git workflow <https://nvie.com/posts/a-successful-git-branching-model/>`_. Also note that our branches use prefixes :code:`feature/`, :code:`fix/`, and :code:`ci-cd/`.
|
9 |
+
|
10 |
+
Further, send a pull request. In the comment, write the main features of the tool, the technology stack used, and a brief description of the algorithms. This should be enough for us to accept your code.
|
11 |
+
|
12 |
+
To check the quality of the code, we use :code:`flake8` and :code:`codacy`.
|
docs/source/index.rst
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Welcome to Linguask!
|
2 |
+
===================================
|
3 |
+
|
4 |
+
This page documents our solutions to `Feedback Prize Competition <https://www.kaggle.com/competitions/feedback-prize-english-language-learning>`_.
|
5 |
+
|
6 |
+
.. note::
|
7 |
+
|
8 |
+
This project is under active development... Meanwhile, check out our relevant version that is `available on this link <https://huggingface.co/spaces/Booguy/automatic-essay-evaluator>`_!
|
9 |
+
|
10 |
+
.. toctree::
|
11 |
+
:hidden:
|
12 |
+
|
13 |
+
usage
|
14 |
+
contribute
|
docs/source/usage.rst
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Getting Started
|
2 |
+
=====
|
3 |
+
|
4 |
+
|
5 |
+
.. _prerequisites:
|
6 |
+
|
7 |
+
Prerequisites
|
8 |
+
------------
|
9 |
+
1. GNU `make` utility (`link <https://www.gnu.org/software/make/>`_)
|
10 |
+
2. Python of version 3.7.13 (`link <https://www.python.org/downloads/release/python-3713/>`_)
|
11 |
+
3. Packaging manager `poetry` (`link <https://python-poetry.org>`_)
|
12 |
+
4. At least 2Gb on your hard disk
|
13 |
+
|
14 |
+
.. code-block:: console
|
15 |
+
|
16 |
+
poetry lock
|
17 |
+
poetry --no-root install
|
18 |
+
|
19 |
+
Run application locally
|
20 |
+
----------------
|
21 |
+
|
22 |
+
To your delight, it's done via a single command:
|
23 |
+
|
24 |
+
.. code-block:: console
|
25 |
+
|
26 |
+
poetry run make build
|
mypy.ini
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[mypy]
|
2 |
+
disallow_untyped_defs = False
|
3 |
+
ignore_missing_imports = True
|
4 |
+
ignore_errors = True
|
5 |
+
|
6 |
+
[mypy-src.*]
|
7 |
+
ignore_errors = False
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.pytest.ini_options]
|
2 |
+
pythonpath = "."
|
3 |
+
|
4 |
+
[tool.poetry]
|
5 |
+
name = "automatic-essay-evaluator"
|
6 |
+
version = "0.1.0"
|
7 |
+
description = "Automated tool for evaluation of natural language texts."
|
8 |
+
authors = ["Danis Alukaev <[email protected]>"]
|
9 |
+
license = "MIT"
|
10 |
+
readme = "README.md"
|
11 |
+
packages = [{include = "automatic_essay_evaluator"}]
|
12 |
+
|
13 |
+
[tool.poetry.dependencies]
|
14 |
+
python = "3.7.13"
|
15 |
+
flake8 = "^5.0.4"
|
16 |
+
hydra-core = "^1.3.0"
|
17 |
+
python-dotenv = "^0.21.0"
|
18 |
+
coverage = "^6.5.0"
|
19 |
+
streamlit = "^1.16.0"
|
20 |
+
transformers = {extras = ["sentencepiece"], version = "^4.25.1"}
|
21 |
+
sphinx = ">=4.0"
|
22 |
+
sphinx-rtd-theme = "^1.1.1"
|
23 |
+
sphinx-autoapi = "^2.0.0"
|
24 |
+
mypy = "^0.991"
|
25 |
+
types-requests = "^2.28.11.5"
|
26 |
+
pytest-cov = "^4.0.0"
|
27 |
+
|
28 |
+
[tool.poetry.group.dev.dependencies]
|
29 |
+
flake8 = "^5.0.4"
|
30 |
+
hydra-core = "^1.3.0"
|
31 |
+
isort = "^5.11.3"
|
32 |
+
kaggle = "^1.5.12"
|
33 |
+
numpy = "^1.21.6"
|
34 |
+
pre-commit = "^2.20.0"
|
35 |
+
pytest = "^7.2.0"
|
36 |
+
pytorch-lightning = "^1.8.5.post0"
|
37 |
+
tqdm = "^4.64.1"
|
38 |
+
torch = "^1.13.1"
|
39 |
+
transformers = "^4.25.1"
|
40 |
+
pyspellchecker = "^0.7.1"
|
41 |
+
catboost = "^1.1.1"
|
42 |
+
scikit-learn = "^1.0.2"
|
43 |
+
wandb = "^0.13.7"
|
44 |
+
easydict = "^1.10"
|
45 |
+
nltk = "^3.8"
|
46 |
+
seaborn = "^0.12.1"
|
47 |
+
|
48 |
+
[build-system]
|
49 |
+
requires = ["poetry-core"]
|
50 |
+
build-backend = "poetry.core.masonry.api"
|
src/__init__.py
ADDED
File without changes
|
src/config/conf/config.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
defaults:
|
2 |
+
- predictor: bert_with_handcrafted_feature_predictor
|
3 |
+
- validator: cross_validator
|
4 |
+
- _self_
|
5 |
+
|
6 |
+
timestamp: ${now:%Y-%m-%d}/${now:%H-%M-%S}
|
7 |
+
cwd: ${hydra:runtime.cwd}
|
8 |
+
|
9 |
+
hydra:
|
10 |
+
job:
|
11 |
+
chdir: True
|
src/config/conf/experiment/sanity_bert_finetuning_predictor.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
name: sanity checking of BertFinetuningPredictor
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- override /predictor: bert_finetuning_predictor
|
7 |
+
|
8 |
+
validator:
|
9 |
+
saving_dir: checkpoints
|
10 |
+
|
11 |
+
predictor:
|
12 |
+
batch_size: 8
|
13 |
+
num_workers: 8
|
14 |
+
max_length: 64
|
15 |
+
weight_decay: 0.01
|
16 |
+
accelerator: gpu
|
17 |
+
max_epochs: 2
|
18 |
+
train_size: 0.8
|
19 |
+
num_cross_val_splits: 5
|
src/config/conf/experiment/sanity_bert_with_handcrafted_feature_predictor.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
name: sanity checking of BertWithHandcraftedFeaturePredictor
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- override /predictor: bert_with_handcrafted_feature_predictor
|
7 |
+
|
8 |
+
validator:
|
9 |
+
saving_dir: checkpoints
|
10 |
+
n_splits: 2
|
11 |
+
|
12 |
+
predictor:
|
13 |
+
catboost_iter: 5
|
src/config/conf/experiment/sanity_constant_predictor.yaml
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
name: sanity checking of ConstantPredictorSolution
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- override /predictor: constant_predictor
|
7 |
+
|
8 |
+
validator:
|
9 |
+
saving_dir: checkpoints
|
src/config/conf/experiment/sanity_many_bert_with_handcrafted_feature_predictor.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# @package _global_
|
2 |
+
|
3 |
+
name: sanity checking of ManyBertWithHandcraftedFeaturePredictor
|
4 |
+
|
5 |
+
defaults:
|
6 |
+
- override /predictor: many_bert_with_handcrafted_feature_predictor
|
7 |
+
|
8 |
+
validator:
|
9 |
+
saving_dir: checkpoints
|
10 |
+
|
11 |
+
predictor:
|
12 |
+
model_names:
|
13 |
+
- bert-base-uncased
|
14 |
+
- bert-base-cased
|
15 |
+
catboost_iter: 5
|
src/config/conf/predictor/bert_finetuning_predictor.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: src.solutions.bert_finetune_solution.BertFinetuningPredictor
|
2 |
+
model_name: microsoft/deberta-v3-large
|
3 |
+
num_classes: 6
|
4 |
+
lr: 2e-5
|
5 |
+
batch_size: 8
|
6 |
+
num_workers: 8
|
7 |
+
max_length: 512
|
8 |
+
weight_decay: 0.01
|
9 |
+
accelerator: gpu
|
10 |
+
max_epochs: 5
|
11 |
+
accumulate_grad_batches: 4
|
12 |
+
precision: 16
|
13 |
+
gradient_clip_val: 1000
|
14 |
+
train_size: 0.8
|
15 |
+
num_cross_val_splits: 5
|
16 |
+
num_frozen_layers: 20
|
src/config/conf/predictor/bert_with_handcrafted_feature_predictor.yaml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_target_: src.solutions.bert_featurizer_solution.BertWithHandcraftedFeaturePredictor
|
2 |
+
model_name: bert-base-uncased
|
3 |
+
catboost_iter: 500
|
4 |
+
saving_dir: checkpoints
|
src/config/conf/predictor/constant_predictor.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
_target_: src.solutions.constant_predictor.ConstantPredictorSolution
|