Yehor commited on
Commit
770c54a
ยท
1 Parent(s): b1ab41d
Files changed (5) hide show
  1. .dockerignore +2 -0
  2. .gitignore +2 -0
  3. Dockerfile +0 -63
  4. app.py +386 -67
  5. requirements.txt +5 -0
.dockerignore CHANGED
@@ -1,2 +1,4 @@
1
  .ruff_cache/
2
  .venv/
 
 
 
1
  .ruff_cache/
2
  .venv/
3
+
4
+ .DS_Store
.gitignore CHANGED
@@ -3,3 +3,5 @@
3
  .ruff_cache/
4
 
5
  flagged/
 
 
 
3
  .ruff_cache/
4
 
5
  flagged/
6
+
7
+ .DS_Store
Dockerfile DELETED
@@ -1,63 +0,0 @@
1
- FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
2
-
3
- ENV DEBIAN_FRONTEND=noninteractive
4
-
5
- RUN apt-get update && \
6
- apt-get upgrade -y && \
7
- apt-get install -y --no-install-recommends \
8
- git \
9
- git-lfs \
10
- wget \
11
- curl \
12
- # python build dependencies \
13
- build-essential \
14
- libssl-dev \
15
- zlib1g-dev \
16
- libbz2-dev \
17
- libreadline-dev \
18
- libsqlite3-dev \
19
- libncursesw5-dev \
20
- xz-utils \
21
- tk-dev \
22
- libxml2-dev \
23
- libxmlsec1-dev \
24
- libffi-dev \
25
- liblzma-dev \
26
- # gradio dependencies \
27
- ffmpeg \
28
- && apt-get clean \
29
- && rm -rf /var/lib/apt/lists/*
30
-
31
-
32
- RUN useradd -m -u 1000 user
33
- USER user
34
- ENV HOME=/home/user \
35
- PATH=/home/user/.local/bin:${PATH}
36
- WORKDIR ${HOME}/app
37
-
38
- RUN curl https://pyenv.run | bash
39
- ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
40
- ARG PYTHON_VERSION=3.10.12
41
- RUN pyenv install ${PYTHON_VERSION} && \
42
- pyenv global ${PYTHON_VERSION} && \
43
- pyenv rehash && \
44
- pip install --no-cache-dir -U pip setuptools wheel && \
45
- pip install packaging ninja
46
-
47
- COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
48
- RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
49
-
50
- RUN pip install "transformers @ git+https://github.com/huggingface/transformers.git@main"
51
-
52
- RUN git clone --depth 1 https://huggingface.co/Yehor/kulyk-en-uk ${HOME}/app/en-uk-translator
53
-
54
- COPY --chown=1000 . ${HOME}/app
55
- ENV PYTHONPATH=${HOME}/app \
56
- PYTHONUNBUFFERED=1 \
57
- GRADIO_ALLOW_FLAGGING=never \
58
- GRADIO_NUM_PORTS=1 \
59
- GRADIO_SERVER_NAME=0.0.0.0 \
60
- GRADIO_THEME=huggingface \
61
- SYSTEM=spaces
62
-
63
- CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -3,6 +3,7 @@ import time
3
 
4
  from importlib.metadata import version
5
  from gradio.utils import is_zero_gpu_space
 
6
 
7
  try:
8
  import spaces
@@ -10,22 +11,34 @@ except ImportError:
10
  print("ZeroGPU is not available, skipping...")
11
 
12
  import torch
 
13
  import gradio as gr
 
14
 
15
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
16
 
17
- try:
 
 
 
 
 
 
18
  spaces_version = version("spaces")
19
  print("ZeroGPU is available, changing inference call.")
20
- except PackageNotFoundError:
21
  spaces_version = "N/A"
22
  print("ZeroGPU is not available, skipping...")
23
 
24
- use_zero_gpu = is_zero_gpu_space()
25
- use_cuda = torch.cuda.is_available()
26
 
27
  if use_cuda:
28
- print("CUDA is available, setting correct inference_device variable.")
29
  device = "cuda"
30
  torch_dtype = torch.bfloat16
31
  else:
@@ -33,9 +46,9 @@ else:
33
  torch_dtype = torch.bfloat16
34
 
35
  # Config
36
- model_name = 'Yehor/kulyk-uk-en'
37
- # model_name = "/home/user/app/en-uk-translator"
38
  concurrency_limit = 5
 
39
 
40
  # Load the model
41
  model = AutoModelForCausalLM.from_pretrained(
@@ -45,10 +58,22 @@ model = AutoModelForCausalLM.from_pretrained(
45
  trust_remote_code=True,
46
  )
47
  model.eval()
48
-
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
50
 
51
- examples = [
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  "WP: F-16 ะฝะฐะฒั€ัะด ั‡ะธ ััƒั‚ั‚ั”ะฒะพ ะทะผั–ะฝัั‚ัŒ ัะธั‚ัƒะฐั†ั–ัŽ ะฝะฐ ะฟะพะปั– ะฑะพัŽ",
53
  "ะะฐะด ะฃะบั€ะฐั—ะฝะพัŽ ะทะฑะธั‚ะพ ั€ะฐะบะตั‚ัƒ ั‚ะฐ 7 ั–ะท 8 ยซะจะฐั…ะตะดั–ะฒยป",
54
  "ะžะปั–ะผะฟั–ะนััŒะบั– ั–ะณั€ะธ 2024. ะ ะพะทะบะปะฐะด ะทะผะฐะณะฐะฝัŒ ัƒะบั€ะฐั—ะฝััŒะบะธั… ัะฟะพั€ั‚ัะผะตะฝั–ะฒ ะฝะฐ 28 ะปะธะฟะฝั",
@@ -56,10 +81,26 @@ examples = [
56
  "ะ—ะฐ ั‚ะธะถะดะตะฝัŒ ะะ‘ะฃ ะฟั€ะพะดะฐะฒ ะผะฐะนะถะต 800 ะผั–ะปัŒะนะพะฝั–ะฒ ะดะพะปะฐั€ั–ะฒ ะฝะฐ ะผั–ะถะฑะฐะฝะบั–ะฒััŒะบะพะผัƒ ั€ะธะฝะบัƒ",
57
  "ะŸะฐั€ะธะถ 2024. ะ”ะตะฝัŒ 2: ะขะตะบัั‚ะพะฒะฐ ั‚ั€ะฐะฝัะปัั†ั–ั",
58
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  title = "UK-EN Translator"
61
 
62
- # https://www.tablesgenerator.com/markdown_tables
63
  authors_table = """
64
  ## Authors
65
 
@@ -77,37 +118,43 @@ Follow them on social networks and **contact** if you need any help or have any
77
  description_head = f"""
78
  # {title}
79
 
80
- ## Overview
81
-
82
- Paste the text you want to translate from Ukrainian to English.
83
  """.strip()
84
 
85
- description_foot = f"""
86
- {authors_table}
87
- """.strip()
88
 
89
  translated_text_value = """
90
- Translated text will appear here.
 
91
 
92
- Choose **an example** below the Translate button or paste **your text**.
 
 
 
 
 
93
  """.strip()
94
 
95
  tech_env = f"""
96
  #### Environment
97
 
98
  - Python: {sys.version}
 
 
 
 
99
  """.strip()
100
 
101
  tech_libraries = f"""
102
  #### Libraries
103
 
104
- - torch: {version('torch')}
105
- - gradio: {version('gradio')}
106
- - transformers: {version('transformers')}
107
  """.strip()
108
 
 
109
  @spaces.GPU
110
- def inference(text, progress=gr.Progress()):
111
  if not text:
112
  raise gr.Error("Please paste your text.")
113
 
@@ -115,7 +162,7 @@ def inference(text, progress=gr.Progress()):
115
 
116
  results = []
117
 
118
- sentences = text.split('\n')
119
 
120
  non_empty_sentences = []
121
  for sentence in sentences:
@@ -123,10 +170,12 @@ def inference(text, progress=gr.Progress()):
123
  if len(s) != 0:
124
  non_empty_sentences.append(s)
125
 
126
- for sentence in progress.tqdm(non_empty_sentences, desc="Translating...", unit="sentence"):
 
 
127
  t0 = time.time()
128
 
129
- prompt = "Translate the text to Ukrainian:\n" + sentence
130
 
131
  input_ids = tokenizer.apply_chat_template(
132
  [{"role": "user", "content": prompt}],
@@ -138,11 +187,120 @@ def inference(text, progress=gr.Progress()):
138
  output = model.generate(
139
  input_ids,
140
  max_new_tokens=2048,
141
-
142
  # Greedy Search
143
  do_sample=False,
144
  repetition_penalty=1.05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # Sampling
147
  # do_sample=True,
148
  # temperature=0.1,
@@ -153,7 +311,9 @@ def inference(text, progress=gr.Progress()):
153
 
154
  prompt_len = input_ids.shape[1]
155
  generated_tokens = output[:, prompt_len:]
156
- translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
 
 
157
 
158
  elapsed_time = round(time.time() - t0, 2)
159
 
@@ -171,60 +331,219 @@ def inference(text, progress=gr.Progress()):
171
  result_texts = []
172
 
173
  for result in results:
174
- result_texts.append(f'> {result["translated_text"]}\n')
175
 
176
  sum_elapsed_text = sum([result["elapsed_time"] for result in results])
177
- result_texts.append(f"Elapsed time: {round(sum_elapsed_text, 4)} seconds")
178
 
179
  return "\n".join(result_texts)
180
 
181
- """
182
- if use_zero_gpu:
183
- @spaces.GPU
184
- def inference_gpu(text, progress=gr.Progress()):
185
- return inference(text, progress)
186
-
187
- inference_func = inference_gpu
188
- else:
189
- inference_func = inference
190
- """
191
-
192
- demo = gr.Blocks(
193
- title=title,
194
- analytics_enabled=False,
195
- # theme="huggingface",
196
- theme=gr.themes.Base(),
197
- )
198
 
199
- with demo:
200
- gr.Markdown(description_head)
 
 
201
 
202
- gr.Markdown("## Usage")
203
 
204
- translated_text = gr.Textbox(
205
- label="Translated text",
206
- placeholder=translated_text_value,
207
- show_copy_button=True,
208
- )
 
 
 
 
 
 
 
209
 
210
- text = gr.Textbox(label="Text", autofocus=True, lines=5)
211
 
212
- gr.Button("Translate").click(
213
- inference,
214
- concurrency_limit=concurrency_limit,
215
- inputs=text,
216
- outputs=translated_text,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  )
218
 
219
- with gr.Row():
220
- gr.Examples(label="Choose an example", inputs=text, examples=examples)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
- gr.Markdown(description_foot)
223
 
224
- gr.Markdown("### Gradio app uses:")
225
- gr.Markdown(tech_env)
226
- gr.Markdown(tech_libraries)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
  if __name__ == "__main__":
 
229
  demo.queue()
230
  demo.launch()
 
3
 
4
  from importlib.metadata import version
5
  from gradio.utils import is_zero_gpu_space
6
+ from gradio.themes import Base
7
 
8
  try:
9
  import spaces
 
11
  print("ZeroGPU is not available, skipping...")
12
 
13
  import torch
14
+ import torchaudio
15
  import gradio as gr
16
+ import torchaudio.transforms as T
17
 
18
+ from transformers import (
19
+ AutoModelForCausalLM,
20
+ AutoTokenizer,
21
+ AutoProcessor,
22
+ MoonshineForConditionalGeneration,
23
+ )
24
 
25
+ from doctr.io import DocumentFile
26
+ from doctr.models import ocr_predictor
27
+
28
+ use_zero_gpu = is_zero_gpu_space()
29
+ use_cuda = torch.cuda.is_available()
30
+
31
+ if use_zero_gpu:
32
  spaces_version = version("spaces")
33
  print("ZeroGPU is available, changing inference call.")
34
+ else:
35
  spaces_version = "N/A"
36
  print("ZeroGPU is not available, skipping...")
37
 
38
+ print(f"Spaces version: {spaces_version}")
 
39
 
40
  if use_cuda:
41
+ print("CUDA is available, setting correct `device` variable.")
42
  device = "cuda"
43
  torch_dtype = torch.bfloat16
44
  else:
 
46
  torch_dtype = torch.bfloat16
47
 
48
  # Config
49
+ model_name = "Yehor/kulyk-en-uk"
 
50
  concurrency_limit = 5
51
+ current_theme = Base()
52
 
53
  # Load the model
54
  model = AutoModelForCausalLM.from_pretrained(
 
58
  trust_remote_code=True,
59
  )
60
  model.eval()
 
61
  tokenizer = AutoTokenizer.from_pretrained(model_name)
62
 
63
+ # Load ASR
64
+ audio_processor = AutoProcessor.from_pretrained("UsefulSensors/moonshine-base")
65
+ audio_model = MoonshineForConditionalGeneration.from_pretrained(
66
+ "UsefulSensors/moonshine-base", attn_implementation="sdpa"
67
+ )
68
+ audio_model.to(device)
69
+ audio_model.to(torch_dtype)
70
+
71
+ # Load OCR
72
+ ocr_model = ocr_predictor(pretrained=True)
73
+ ocr_model.to(device)
74
+
75
+ # Examples
76
+ examples_text = [
77
  "WP: F-16 ะฝะฐะฒั€ัะด ั‡ะธ ััƒั‚ั‚ั”ะฒะพ ะทะผั–ะฝัั‚ัŒ ัะธั‚ัƒะฐั†ั–ัŽ ะฝะฐ ะฟะพะปั– ะฑะพัŽ",
78
  "ะะฐะด ะฃะบั€ะฐั—ะฝะพัŽ ะทะฑะธั‚ะพ ั€ะฐะบะตั‚ัƒ ั‚ะฐ 7 ั–ะท 8 ยซะจะฐั…ะตะดั–ะฒยป",
79
  "ะžะปั–ะผะฟั–ะนััŒะบั– ั–ะณั€ะธ 2024. ะ ะพะทะบะปะฐะด ะทะผะฐะณะฐะฝัŒ ัƒะบั€ะฐั—ะฝััŒะบะธั… ัะฟะพั€ั‚ัะผะตะฝั–ะฒ ะฝะฐ 28 ะปะธะฟะฝั",
 
81
  "ะ—ะฐ ั‚ะธะถะดะตะฝัŒ ะะ‘ะฃ ะฟั€ะพะดะฐะฒ ะผะฐะนะถะต 800 ะผั–ะปัŒะนะพะฝั–ะฒ ะดะพะปะฐั€ั–ะฒ ะฝะฐ ะผั–ะถะฑะฐะฝะบั–ะฒััŒะบะพะผัƒ ั€ะธะฝะบัƒ",
82
  "ะŸะฐั€ะธะถ 2024. ะ”ะตะฝัŒ 2: ะขะตะบัั‚ะพะฒะฐ ั‚ั€ะฐะฝัะปัั†ั–ั",
83
  ]
84
+ examples_audio = [
85
+ "example_1.wav",
86
+ "example_2.wav",
87
+ "example_3.wav",
88
+ "example_4.wav",
89
+ "example_5.wav",
90
+ "example_6.wav",
91
+ "example_7.wav",
92
+ ]
93
+ examples_image = [
94
+ "example_1.jpg",
95
+ "example_2.jpg",
96
+ "example_3.jpg",
97
+ "example_4.jpg",
98
+ "example_5.jpg",
99
+ "example_6.jpg",
100
+ ]
101
 
102
  title = "UK-EN Translator"
103
 
 
104
  authors_table = """
105
  ## Authors
106
 
 
118
  description_head = f"""
119
  # {title}
120
 
121
+ This space translates your text Ukrainian to English. Also, check [EN-UK Translator](https://huggingface.co/spaces/Yehor/en-uk-translator) out.
 
 
122
  """.strip()
123
 
 
 
 
124
 
125
  translated_text_value = """
126
+ Choose an example below the Translate button or type your text.
127
+ """.strip()
128
 
129
+ translated_audio_value = """
130
+ Choose an example below the Translate button or upload your audio.
131
+ """.strip()
132
+
133
+ translated_image_value = """
134
+ Choose an example below the Translate button or upload your image.
135
  """.strip()
136
 
137
  tech_env = f"""
138
  #### Environment
139
 
140
  - Python: {sys.version}
141
+
142
+ #### Models
143
+
144
+ - [kulyk-uk-en](https://huggingface.co/Yehor/kulyk-en-uk)
145
  """.strip()
146
 
147
  tech_libraries = f"""
148
  #### Libraries
149
 
150
+ - torch: {version("torch")}
151
+ - gradio: {version("gradio")}
152
+ - transformers: {version("transformers")}
153
  """.strip()
154
 
155
+
156
  @spaces.GPU
157
+ def inference_text(text, progress=gr.Progress()):
158
  if not text:
159
  raise gr.Error("Please paste your text.")
160
 
 
162
 
163
  results = []
164
 
165
+ sentences = text.split("\n")
166
 
167
  non_empty_sentences = []
168
  for sentence in sentences:
 
170
  if len(s) != 0:
171
  non_empty_sentences.append(s)
172
 
173
+ for sentence in progress.tqdm(
174
+ non_empty_sentences, desc="Translating...", unit="sentence"
175
+ ):
176
  t0 = time.time()
177
 
178
+ prompt = "Translate the text to English:\n" + sentence
179
 
180
  input_ids = tokenizer.apply_chat_template(
181
  [{"role": "user", "content": prompt}],
 
187
  output = model.generate(
188
  input_ids,
189
  max_new_tokens=2048,
 
190
  # Greedy Search
191
  do_sample=False,
192
  repetition_penalty=1.05,
193
+ # Sampling
194
+ # do_sample=True,
195
+ # temperature=0.1,
196
+ # # top_k=1,
197
+ # min_p=0.9,
198
+ # repetition_penalty=1.05,
199
+ )
200
+
201
+ prompt_len = input_ids.shape[1]
202
+ generated_tokens = output[:, prompt_len:]
203
+ translated_text = tokenizer.batch_decode(
204
+ generated_tokens, skip_special_tokens=True
205
+ )[0]
206
+
207
+ elapsed_time = round(time.time() - t0, 2)
208
+
209
+ translated_text = translated_text.strip()
210
+ results.append(
211
+ {
212
+ "sentence": sentence,
213
+ "translated_text": translated_text,
214
+ "elapsed_time": elapsed_time,
215
+ }
216
+ )
217
+
218
+ gr.Info("Finished!", duration=2)
219
 
220
+ result_texts = []
221
+
222
+ for result in results:
223
+ result_texts.append(f"{result['translated_text']}\n")
224
+
225
+ sum_elapsed_text = sum([result["elapsed_time"] for result in results])
226
+ print(f"Elapsed time: {round(sum_elapsed_text, 4)} seconds")
227
+
228
+ return "\n".join(result_texts)
229
+
230
+
231
+ @spaces.GPU
232
+ def inference_audio(audio, progress=gr.Progress()):
233
+ if not audio:
234
+ raise gr.Error("Please paste your audio file.")
235
+
236
+ progress(0, desc="Translating...")
237
+
238
+ if isinstance(audio, str):
239
+ audio_array, sr = torchaudio.load(audio)
240
+ audio_array = audio_array.squeeze()
241
+ else:
242
+ audio_array, sr = audio
243
+
244
+ r_sr = audio_processor.feature_extractor.sampling_rate
245
+
246
+ print("Audio processor SR:", r_sr)
247
+ print("Audio file SR:", sr)
248
+
249
+ if r_sr != sr:
250
+ print("Resampling...")
251
+ resampler = T.Resample(orig_freq=sr, new_freq=r_sr)
252
+ audio_array = resampler(audio_array)
253
+
254
+ inputs = audio_processor(audio_array, return_tensors="pt", sampling_rate=r_sr)
255
+ inputs = inputs.to(device, dtype=torch_dtype)
256
+
257
+ # to avoid hallucination loops, we limit the maximum length of the generated text based expected number of tokens per second
258
+ token_limit_factor = (
259
+ 6.5 / audio_processor.feature_extractor.sampling_rate
260
+ ) # Maximum of 6.5 tokens per second
261
+ seq_lens = inputs.attention_mask.sum(dim=-1)
262
+ max_length = int((seq_lens * token_limit_factor).max().item())
263
+
264
+ generated_ids = audio_model.generate(**inputs, max_length=max_length)
265
+
266
+ predictions = audio_processor.batch_decode(generated_ids, skip_special_tokens=True)
267
+
268
+ print("Predictions:", predictions)
269
+
270
+ text = predictions[0]
271
+
272
+ print("Text:", text)
273
+
274
+ results = []
275
+
276
+ sentences = text.split("\n")
277
+
278
+ non_empty_sentences = []
279
+ for sentence in sentences:
280
+ s = sentence.strip()
281
+ if len(s) != 0:
282
+ non_empty_sentences.append(s)
283
+
284
+ for sentence in progress.tqdm(
285
+ non_empty_sentences, desc="Translating...", unit="sentence"
286
+ ):
287
+ t0 = time.time()
288
+
289
+ prompt = "Translate the text to Ukrainian:\n" + sentence
290
+
291
+ input_ids = tokenizer.apply_chat_template(
292
+ [{"role": "user", "content": prompt}],
293
+ add_generation_prompt=True,
294
+ return_tensors="pt",
295
+ tokenize=True,
296
+ ).to(model.device)
297
+
298
+ output = model.generate(
299
+ input_ids,
300
+ max_new_tokens=2048,
301
+ # Greedy Search
302
+ do_sample=False,
303
+ repetition_penalty=1.05,
304
  # Sampling
305
  # do_sample=True,
306
  # temperature=0.1,
 
311
 
312
  prompt_len = input_ids.shape[1]
313
  generated_tokens = output[:, prompt_len:]
314
+ translated_text = tokenizer.batch_decode(
315
+ generated_tokens, skip_special_tokens=True
316
+ )[0]
317
 
318
  elapsed_time = round(time.time() - t0, 2)
319
 
 
331
  result_texts = []
332
 
333
  for result in results:
334
+ result_texts.append(f"{result['sentence']}: {result['translated_text']}\n")
335
 
336
  sum_elapsed_text = sum([result["elapsed_time"] for result in results])
337
+ print(f"Elapsed time: {round(sum_elapsed_text, 4)} seconds")
338
 
339
  return "\n".join(result_texts)
340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
+ @spaces.GPU
343
+ def inference_image(image, progress=gr.Progress()):
344
+ if not image:
345
+ raise gr.Error("Please paste your image file.")
346
 
347
+ progress(0, desc="Translating...")
348
 
349
+ if isinstance(image, str):
350
+ doc = DocumentFile.from_images(image)
351
+ else:
352
+ raise gr.Error("Please paste your image file.")
353
+
354
+ result = ocr_model(doc)
355
+
356
+ text = result.render()
357
+
358
+ print("Text:", text)
359
+
360
+ results = []
361
 
362
+ sentences = [text.replace("\n", " ")]
363
 
364
+ for sentence in progress.tqdm(sentences, desc="Translating...", unit="sentence"):
365
+ t0 = time.time()
366
+
367
+ prompt = "Translate the text to Ukrainian:\n" + sentence
368
+
369
+ input_ids = tokenizer.apply_chat_template(
370
+ [{"role": "user", "content": prompt}],
371
+ add_generation_prompt=True,
372
+ return_tensors="pt",
373
+ tokenize=True,
374
+ ).to(model.device)
375
+
376
+ output = model.generate(
377
+ input_ids,
378
+ max_new_tokens=2048,
379
+ # Greedy Search
380
+ do_sample=False,
381
+ repetition_penalty=1.05,
382
+ # Sampling
383
+ # do_sample=True,
384
+ # temperature=0.1,
385
+ # # top_k=1,
386
+ # min_p=0.9,
387
+ # repetition_penalty=1.05,
388
+ )
389
+
390
+ prompt_len = input_ids.shape[1]
391
+ generated_tokens = output[:, prompt_len:]
392
+ translated_text = tokenizer.batch_decode(
393
+ generated_tokens, skip_special_tokens=True
394
+ )[0]
395
+
396
+ elapsed_time = round(time.time() - t0, 2)
397
+
398
+ translated_text = translated_text.strip()
399
+ results.append(
400
+ {
401
+ "sentence": sentence,
402
+ "translated_text": translated_text,
403
+ "elapsed_time": elapsed_time,
404
+ }
405
+ )
406
+
407
+ gr.Info("Finished!", duration=2)
408
+
409
+ result_texts = []
410
+
411
+ for result in results:
412
+ result_texts.append(f"> {result['sentence']}: {result['translated_text']}\n")
413
+
414
+ sum_elapsed_text = sum([result["elapsed_time"] for result in results])
415
+ print(f"Elapsed time: {round(sum_elapsed_text, 4)} seconds")
416
+
417
+ return "\n".join(result_texts)
418
+
419
+
420
+ def create_app():
421
+ tab = gr.Blocks(
422
+ title=title,
423
+ analytics_enabled=False,
424
+ theme=current_theme,
425
  )
426
 
427
+ with tab:
428
+ gr.Markdown(description_head)
429
+ gr.Markdown("## Usage")
430
+
431
+ translated_text = gr.Textbox(
432
+ label="Translated text",
433
+ placeholder=translated_text_value,
434
+ show_copy_button=True,
435
+ lines=5,
436
+ )
437
+
438
+ text = gr.Textbox(label="Text", autofocus=True, lines=5)
439
+
440
+ gr.Button("Translate").click(
441
+ inference_text,
442
+ concurrency_limit=concurrency_limit,
443
+ inputs=text,
444
+ outputs=translated_text,
445
+ )
446
+
447
+ with gr.Row():
448
+ gr.Examples(label="Choose an example", inputs=text, examples=examples_text)
449
+
450
+ return tab
451
+
452
+
453
+ def create_audio_app():
454
+ with gr.Blocks(theme=current_theme) as tab:
455
+ gr.Markdown(description_head)
456
+ gr.Markdown("## Usage")
457
+
458
+ translated_text = gr.Textbox(
459
+ label="Translated text",
460
+ placeholder=translated_audio_value,
461
+ show_copy_button=True,
462
+ lines=5,
463
+ )
464
+
465
+ audio = gr.Audio(label="Audio file", sources="upload", type="filepath")
466
+
467
+ gr.Button("Translate").click(
468
+ inference_audio,
469
+ concurrency_limit=concurrency_limit,
470
+ inputs=audio,
471
+ outputs=translated_text,
472
+ )
473
+
474
+ with gr.Row():
475
+ gr.Examples(
476
+ label="Choose an example", inputs=audio, examples=examples_audio
477
+ )
478
+
479
+ return tab
480
+
481
+
482
+ def create_image_app():
483
+ with gr.Blocks(theme=current_theme) as tab:
484
+ gr.Markdown(description_head)
485
+ gr.Markdown("## Usage")
486
+
487
+ translated_text = gr.Textbox(
488
+ label="Translated text",
489
+ placeholder=translated_image_value,
490
+ show_copy_button=True,
491
+ lines=5,
492
+ )
493
+
494
+ image = gr.Image(label="Image file", sources="upload", type="filepath")
495
+
496
+ gr.Button("Translate").click(
497
+ inference_image,
498
+ concurrency_limit=concurrency_limit,
499
+ inputs=image,
500
+ outputs=translated_text,
501
+ )
502
+
503
+ with gr.Row():
504
+ gr.Examples(
505
+ label="Choose an example", inputs=image, examples=examples_image
506
+ )
507
+
508
+ return tab
509
+
510
+
511
+ def create_env():
512
+ with gr.Blocks(theme=current_theme) as tab:
513
+ gr.Markdown(tech_env)
514
+ gr.Markdown(tech_libraries)
515
+
516
+ return tab
517
 
 
518
 
519
+ def create_authors():
520
+ with gr.Blocks(theme=current_theme) as tab:
521
+ gr.Markdown(authors_table)
522
+
523
+ return tab
524
+
525
+
526
+ def create_demo():
527
+ app_tab = create_app()
528
+ # app_audio_tab = create_audio_app()
529
+ # app_image_tab = create_image_app()
530
+ authors_tab = create_authors()
531
+ env_tab = create_env()
532
+
533
+ return gr.TabbedInterface(
534
+ # [app_tab, app_audio_tab, app_image_tab, authors_tab, env_tab],
535
+ [app_tab, authors_tab, env_tab],
536
+ tab_names=[
537
+ "โœ๏ธ Text",
538
+ # "๐Ÿ”Š Audio",
539
+ # "๐Ÿ‘€ Image",
540
+ "๐Ÿ‘ฅ Authors",
541
+ "๐Ÿ“ฆ Environment, Models, and Libraries",
542
+ ],
543
+ )
544
+
545
 
546
  if __name__ == "__main__":
547
+ demo = create_demo()
548
  demo.queue()
549
  demo.launch()
requirements.txt CHANGED
@@ -2,4 +2,9 @@ gradio
2
 
3
  transformers @ git+https://github.com/huggingface/transformers.git@main
4
 
 
 
 
5
  accelerate
 
 
 
2
 
3
  transformers @ git+https://github.com/huggingface/transformers.git@main
4
 
5
+ torch
6
+ torchaudio
7
+
8
  accelerate
9
+
10
+ python-doctr