Spaces:

Yehor
/

grammar-correction-uk

Sleeping

App Files Files Community

Yehor Smoliakov commited on Jan 1

Commit

b2fe391

1 Parent(s): 5c06b16

Init

Browse files

Files changed (7) hide show

.dockerignore +2 -0
.gitignore +5 -0
Dockerfile +61 -0
README.md +26 -3
app.py +188 -0
requirements-dev.txt +1 -0
requirements.txt +4 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .ruff_cache/
2	+ .venv/

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.idea/
+.venv/
+.ruff_cache/
+flagged/

Dockerfile ADDED Viewed

	@@ -0,0 +1,61 @@

+FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y --no-install-recommends \
+    git \
+    git-lfs \
+    wget \
+    curl \
+    # python build dependencies \
+    build-essential \
+    libssl-dev \
+    zlib1g-dev \
+    libbz2-dev \
+    libreadline-dev \
+    libsqlite3-dev \
+    libncursesw5-dev \
+    xz-utils \
+    tk-dev \
+    libxml2-dev \
+    libxmlsec1-dev \
+    libffi-dev \
+    liblzma-dev \
+    # gradio dependencies \
+    ffmpeg \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:${PATH}
+WORKDIR ${HOME}/app
+RUN curl https://pyenv.run | bash
+ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
+ARG PYTHON_VERSION=3.10.12
+RUN pyenv install ${PYTHON_VERSION} && \
+    pyenv global ${PYTHON_VERSION} && \
+    pyenv rehash && \
+    pip install --no-cache-dir -U pip setuptools wheel && \
+    pip install packaging ninja
+COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
+RUN git clone --depth 1 https://huggingface.co/Pravopysnyk/best-unlp ${HOME}/app/best-unlp
+COPY --chown=1000 . ${HOME}/app
+ENV PYTHONPATH=${HOME}/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,33 @@
 ---
-title: Grammar Correction Uk
-emoji: 🌍
 colorFrom: purple
 colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Grammar Correction for Ukrainian
+emoji: 📝
 colorFrom: purple
 colorTo: pink
 sdk: docker
 pinned: false
 ---
+## Install
+```shell
+uv venv --python 3.10
+source .venv/bin/activate
+uv pip install -r requirements.txt
+# in development mode
+uv pip install -r requirements-dev.txt
+```
+## Build image
+```shell
+docker build -t grammar-correction-uk .
+```
+## Run
+```shell
+docker run -it --rm -p 8888:7860 grammar-correction-uk
+```

app.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import sys
+import time
+from importlib.metadata import version
+import torch
+import gradio as gr
+from transformers import MBartForConditionalGeneration, AutoTokenizer
+# Config
+model_name = "/home/user/app/best-unlp"
+concurrency_limit = 5
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the model
+model = MBartForConditionalGeneration.from_pretrained(
+    model_name,
+    low_cpu_mem_usage=True,
+    device_map=device,
+)
+model.eval()
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer.src_lang = "uk_UA"
+tokenizer.tgt_lang = "uk_UA"
+examples = [
+    "привіт як справі?",
+    "як твої дела?",
+]
+title = "Grammar Correction for Ukrainian"
+# https://www.tablesgenerator.com/markdown_tables
+authors_table = """
+## Authors
+Follow them on social networks and **contact** if you need any help or have any questions:
+| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
+|-------------------------------------------------------------------------------------------------|
+| https://t.me/smlkw in Telegram                                                                  |
+| https://x.com/yehor_smoliakov at X                                                              |
+| https://github.com/egorsmkv at GitHub                                                           |
+| https://huggingface.co/Yehor at Hugging Face                                                    |
+| or use [email protected]                                                                       |
+""".strip()
+description_head = f"""
+# {title}
+## Overview
+This space uses https://huggingface.co/Pravopysnyk/best-unlp model.
+Paste the text you want to enhance.
+""".strip()
+description_foot = f"""
+{authors_table}
+""".strip()
+normalized_text_value = """
+Normalized text will appear here.
+Choose **an example** below the Normalize button or paste **your text**.
+""".strip()
+tech_env = f"""
+#### Environment
+- Python: {sys.version}
+""".strip()
+tech_libraries = f"""
+#### Libraries
+- torch: {version('torch')}
+- gradio: {version('gradio')}
+- transformers: {version('transformers')}
+""".strip()
+def inference(text, progress=gr.Progress()):
+    if not text:
+        raise gr.Error("Please paste your text.")
+    gr.Info("Starting normalizing", duration=2)
+    progress(0, desc="Normalizing...")
+    results = []
+    sentences = [
+        text,
+    ]
+    for sentence in progress.tqdm(sentences, desc="Normalizing...", unit="sentence"):
+        sentence = sentence.strip()
+        if len(sentence) == 0:
+            continue
+        t0 = time.time()
+        input_text = "<verbalization>:" + sentence
+        encoded_input = tokenizer(
+            input_text,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=1024,
+        ).to(device)
+        output_ids = model.generate(
+            **encoded_input, max_length=1024, num_beams=5, early_stopping=True
+        )
+        normalized_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        if not normalized_text:
+            normalized_text = "-"
+        elapsed_time = round(time.time() - t0, 2)
+        normalized_text = normalized_text.strip()
+        results.append(
+            {
+                "sentence": sentence,
+                "normalized_text": normalized_text,
+                "elapsed_time": elapsed_time,
+            }
+        )
+    gr.Info("Finished!", duration=2)
+    result_texts = []
+    for result in results:
+        result_texts.append(f'> {result["normalized_text"]}')
+        result_texts.append("\n")
+    sum_elapsed_text = sum([result["elapsed_time"] for result in results])
+    result_texts.append(f"Elapsed time: {sum_elapsed_text} seconds")
+    return "\n".join(result_texts)
+demo = gr.Blocks(
+    title=title,
+    analytics_enabled=False,
+    # theme="huggingface",
+    theme=gr.themes.Base(),
+)
+with demo:
+    gr.Markdown(description_head)
+    gr.Markdown("## Usage")
+    with gr.Row():
+        text = gr.Textbox(label="Text", autofocus=True, max_lines=1)
+        normalized_text = gr.Textbox(
+            label="Normalized text",
+            placeholder=normalized_text_value,
+            show_copy_button=True,
+        )
+    gr.Button("Normalize").click(
+        inference,
+        concurrency_limit=concurrency_limit,
+        inputs=text,
+        outputs=normalized_text,
+    )
+    with gr.Row():
+        gr.Examples(label="Choose an example", inputs=text, examples=examples)
+    gr.Markdown(description_foot)
+    gr.Markdown("### Gradio app uses:")
+    gr.Markdown(tech_env)
+    gr.Markdown(tech_libraries)
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch()

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ruff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio
+transformers
+accelerate