Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,22 +22,6 @@ from transformers import (
|
|
| 22 |
)
|
| 23 |
from transformers.image_utils import load_image
|
| 24 |
|
| 25 |
-
#theme:custom
|
| 26 |
-
#custom_theme = gr.themes.Base(
|
| 27 |
-
# primary_hue="indigo",
|
| 28 |
-
# secondary_hue="violet",
|
| 29 |
-
# neutral_hue="gray"
|
| 30 |
-
#).set(
|
| 31 |
-
# body_background_fill="#f7f5fa",
|
| 32 |
-
# body_text_color="#1f1f1f",
|
| 33 |
-
# input_background_fill="#ffffff",
|
| 34 |
-
# button_primary_background_fill="#8b5cf6",
|
| 35 |
-
# button_primary_text_color="#ffffff",
|
| 36 |
-
# button_secondary_background_fill="#e0d7f5",
|
| 37 |
-
# button_secondary_text_color="#1f1f1f",
|
| 38 |
-
# shadow_spread="sm"
|
| 39 |
-
#)
|
| 40 |
-
|
| 41 |
# Constants for text generation
|
| 42 |
MAX_MAX_NEW_TOKENS = 2048
|
| 43 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
@@ -307,9 +291,10 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
| 307 |
with gr.Column(elem_classes="canvas-output"):
|
| 308 |
gr.Markdown("## Output")
|
| 309 |
output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
|
| 310 |
-
|
| 311 |
with gr.Accordion("(Result.md)", open=False):
|
| 312 |
markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
|
|
|
|
| 313 |
model_choice = gr.Radio(
|
| 314 |
choices=["olmOCR-7B-0725", "Nanonets-OCR-s", "RolmOCR-7B",
|
| 315 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
|
@@ -322,7 +307,8 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
|
| 322 |
gr.Markdown("> [Qwen2-VL-OCR-2B](https://huggingface.co/prithivMLmods/Qwen2-VL-OCR-2B-Instruct): qwen2-vl-ocr-2b-instruct model is a fine-tuned version of qwen2-vl-2b-instruct, tailored for tasks that involve [messy] optical character recognition (ocr), image-to-text conversion, and math problem solving with latex formatting.")
|
| 323 |
gr.Markdown("> [RolmOCR](https://huggingface.co/reducto/RolmOCR): rolmocr, high-quality, openly available approach to parsing pdfs and other complex documents optical character recognition. it is designed to handle a wide range of document types, including scanned documents, handwritten text, and complex layouts.")
|
| 324 |
gr.Markdown("> [Aya-Vision](https://huggingface.co/CohereLabs/aya-vision-8b): cohere labs aya vision 8b is an open weights research release of an 8-billion parameter model with advanced capabilities optimized for a variety of vision-language use cases, including ocr, captioning, visual reasoning, summarization, question answering, code, and more.")
|
| 325 |
-
|
|
|
|
| 326 |
|
| 327 |
image_submit.click(
|
| 328 |
fn=generate_image,
|
|
|
|
| 22 |
)
|
| 23 |
from transformers.image_utils import load_image
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
# Constants for text generation
|
| 26 |
MAX_MAX_NEW_TOKENS = 2048
|
| 27 |
DEFAULT_MAX_NEW_TOKENS = 1024
|
|
|
|
| 291 |
with gr.Column(elem_classes="canvas-output"):
|
| 292 |
gr.Markdown("## Output")
|
| 293 |
output = gr.Textbox(label="Raw Output Stream", interactive=False, lines=2, show_copy_button=True)
|
| 294 |
+
|
| 295 |
with gr.Accordion("(Result.md)", open=False):
|
| 296 |
markdown_output = gr.Markdown(label="Formatted Result (Result.Md)")
|
| 297 |
+
|
| 298 |
model_choice = gr.Radio(
|
| 299 |
choices=["olmOCR-7B-0725", "Nanonets-OCR-s", "RolmOCR-7B",
|
| 300 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
|
|
|
| 307 |
gr.Markdown("> [Qwen2-VL-OCR-2B](https://huggingface.co/prithivMLmods/Qwen2-VL-OCR-2B-Instruct): qwen2-vl-ocr-2b-instruct model is a fine-tuned version of qwen2-vl-2b-instruct, tailored for tasks that involve [messy] optical character recognition (ocr), image-to-text conversion, and math problem solving with latex formatting.")
|
| 308 |
gr.Markdown("> [RolmOCR](https://huggingface.co/reducto/RolmOCR): rolmocr, high-quality, openly available approach to parsing pdfs and other complex documents optical character recognition. it is designed to handle a wide range of document types, including scanned documents, handwritten text, and complex layouts.")
|
| 309 |
gr.Markdown("> [Aya-Vision](https://huggingface.co/CohereLabs/aya-vision-8b): cohere labs aya vision 8b is an open weights research release of an 8-billion parameter model with advanced capabilities optimized for a variety of vision-language use cases, including ocr, captioning, visual reasoning, summarization, question answering, code, and more.")
|
| 310 |
+
|
| 311 |
+
gr.Markdown("> ⚠️ Note: Models in this space may not perform well on video inference tasks.")
|
| 312 |
|
| 313 |
image_submit.click(
|
| 314 |
fn=generate_image,
|