Spaces:
Runtime error
Runtime error
make the pipeline simple
Browse files- app.py +10 -8
- models/__pycache__/controlnet_model.cpython-38.pyc +0 -0
- models/__pycache__/image_text_transformation.cpython-38.pyc +0 -0
- models/image_text_transformation.py +2 -1
- models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc +0 -0
- pretrained_models/blip-image-captioning-large +0 -1
- pretrained_models/blip2-opt-2.7b +0 -1
- pretrained_models/clip-vit-large-patch14 +0 -1
- pretrained_models/clipseg-rd64-refined +0 -1
- pretrained_models/oneformer_ade20k_swin_large +0 -1
- pretrained_models/oneformer_coco_swin_large +0 -1
- pretrained_models/stable-diffusion-v1-5 +0 -1
app.py
CHANGED
|
@@ -49,7 +49,8 @@ def process_image(image_src, options=None, processor=None):
|
|
| 49 |
print(options)
|
| 50 |
if options is None:
|
| 51 |
options = []
|
| 52 |
-
processor.args.semantic_segment = "Semantic Segment" in options
|
|
|
|
| 53 |
image_generation_status = "Image Generation" in options
|
| 54 |
image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
|
| 55 |
if image_generation_status:
|
|
@@ -93,7 +94,7 @@ processor = ImageTextTransformation(args)
|
|
| 93 |
|
| 94 |
# Create Gradio input and output components
|
| 95 |
image_input = gr.inputs.Image(type='filepath', label="Input Image")
|
| 96 |
-
semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
|
| 97 |
image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
|
| 98 |
|
| 99 |
logo_base64 = add_logo()
|
|
@@ -101,7 +102,7 @@ logo_base64 = add_logo()
|
|
| 101 |
title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
|
| 102 |
|
| 103 |
examples = [
|
| 104 |
-
["examples/
|
| 105 |
]
|
| 106 |
|
| 107 |
# Create Gradio interface
|
|
@@ -110,17 +111,18 @@ interface = gr.Interface(
|
|
| 110 |
inputs=[image_input,
|
| 111 |
gr.CheckboxGroup(
|
| 112 |
label="Options",
|
| 113 |
-
choices=["
|
| 114 |
),
|
| 115 |
],
|
| 116 |
outputs=gr.outputs.HTML(),
|
| 117 |
title=title_with_logo,
|
| 118 |
-
|
| 119 |
description="""
|
| 120 |
This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
|
| 121 |
-
\n
|
| 122 |
-
\n
|
| 123 |
-
\n
|
|
|
|
| 124 |
"""
|
| 125 |
)
|
| 126 |
|
|
|
|
| 49 |
print(options)
|
| 50 |
if options is None:
|
| 51 |
options = []
|
| 52 |
+
# processor.args.semantic_segment = "Semantic Segment" in options
|
| 53 |
+
processor.args.semantic_segment = False
|
| 54 |
image_generation_status = "Image Generation" in options
|
| 55 |
image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
|
| 56 |
if image_generation_status:
|
|
|
|
| 94 |
|
| 95 |
# Create Gradio input and output components
|
| 96 |
image_input = gr.inputs.Image(type='filepath', label="Input Image")
|
| 97 |
+
# semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
|
| 98 |
image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
|
| 99 |
|
| 100 |
logo_base64 = add_logo()
|
|
|
|
| 102 |
title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
|
| 103 |
|
| 104 |
examples = [
|
| 105 |
+
["examples/test_4.jpg"],
|
| 106 |
]
|
| 107 |
|
| 108 |
# Create Gradio interface
|
|
|
|
| 111 |
inputs=[image_input,
|
| 112 |
gr.CheckboxGroup(
|
| 113 |
label="Options",
|
| 114 |
+
choices=["Image Generation"],
|
| 115 |
),
|
| 116 |
],
|
| 117 |
outputs=gr.outputs.HTML(),
|
| 118 |
title=title_with_logo,
|
| 119 |
+
examples=examples,
|
| 120 |
description="""
|
| 121 |
This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
|
| 122 |
+
\n Github: https://github.com/showlab/Image2Paragraph
|
| 123 |
+
\n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
|
| 124 |
+
\n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
|
| 125 |
+
\n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
|
| 126 |
"""
|
| 127 |
)
|
| 128 |
|
models/__pycache__/controlnet_model.cpython-38.pyc
CHANGED
|
Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ
|
|
|
models/__pycache__/image_text_transformation.cpython-38.pyc
CHANGED
|
Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ
|
|
|
models/image_text_transformation.py
CHANGED
|
@@ -33,7 +33,8 @@ class ImageTextTransformation:
|
|
| 33 |
self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
|
| 34 |
self.gpt_model = ImageToText(openai_key)
|
| 35 |
self.controlnet_model = TextToImage(device=self.args.contolnet_device)
|
| 36 |
-
|
|
|
|
| 37 |
print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
|
| 38 |
|
| 39 |
|
|
|
|
| 33 |
self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
|
| 34 |
self.gpt_model = ImageToText(openai_key)
|
| 35 |
self.controlnet_model = TextToImage(device=self.args.contolnet_device)
|
| 36 |
+
# time-conusimg on CPU, run on local
|
| 37 |
+
# self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
|
| 38 |
print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')
|
| 39 |
|
| 40 |
|
models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc
CHANGED
|
Binary files a/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc and b/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc differ
|
|
|
pretrained_models/blip-image-captioning-large
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 293ab01f2dc41c1c214299314f11de635d0937dc
|
|
|
|
|
|
pretrained_models/blip2-opt-2.7b
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 56e1fe81e7e7c346e95e196ace7b442b3f8ff483
|
|
|
|
|
|
pretrained_models/clip-vit-large-patch14
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 8d052a0f05efbaefbc9e8786ba291cfdf93e5bff
|
|
|
|
|
|
pretrained_models/clipseg-rd64-refined
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 583b388deb98a04feb3e1f816dcdb8f3062ee205
|
|
|
|
|
|
pretrained_models/oneformer_ade20k_swin_large
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 4a5bac8e64f82681a12db2e151a4c2f4ce6092b2
|
|
|
|
|
|
pretrained_models/oneformer_coco_swin_large
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 3a263017ca5c75adbea145f25f81b118243d4394
|
|
|
|
|
|
pretrained_models/stable-diffusion-v1-5
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
Subproject commit 39593d5650112b4cc580433f6b0435385882d819
|
|
|
|
|
|