Spaces:

Awiny
/

Image2Paragraph

Runtime error

App Files Files Community

Awiny commited on Apr 16, 2023

Commit

8381241

1 Parent(s): e0a0001

make the pipeline simple

Browse files

Files changed (12) hide show

app.py +10 -8
models/__pycache__/controlnet_model.cpython-38.pyc +0 -0
models/__pycache__/image_text_transformation.cpython-38.pyc +0 -0
models/image_text_transformation.py +2 -1
models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc +0 -0
pretrained_models/blip-image-captioning-large +0 -1
pretrained_models/blip2-opt-2.7b +0 -1
pretrained_models/clip-vit-large-patch14 +0 -1
pretrained_models/clipseg-rd64-refined +0 -1
pretrained_models/oneformer_ade20k_swin_large +0 -1
pretrained_models/oneformer_coco_swin_large +0 -1
pretrained_models/stable-diffusion-v1-5 +0 -1

app.py CHANGED Viewed

@@ -49,7 +49,8 @@ def process_image(image_src, options=None, processor=None):
     print(options)
     if options is None:
         options = []
-    processor.args.semantic_segment = "Semantic Segment" in options
     image_generation_status = "Image Generation" in options
     image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
     if image_generation_status:
@@ -93,7 +94,7 @@ processor = ImageTextTransformation(args)
 # Create Gradio input and output components
 image_input = gr.inputs.Image(type='filepath', label="Input Image")
-semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
 image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
 logo_base64 = add_logo()
@@ -101,7 +102,7 @@ logo_base64 = add_logo()
 title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
 examples = [
-    ["examples/test_3.jpg"],
 ]
 # Create Gradio interface
@@ -110,17 +111,18 @@ interface = gr.Interface(
     inputs=[image_input,
             gr.CheckboxGroup(
             label="Options",
-            choices=["Semantic Segment", "Image Generation"],
             ),
             ],
     outputs=gr.outputs.HTML(),
     title=title_with_logo,
-    # examples=examples,
     description="""
     This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
-    \n Since GPU is expensive, we use CPU for demo. Run code local with gpu or google colab we provided for fast speed.
-    \n Semantic segment is very slow in cpu(~8m).
-    \n Ttext2image model is controlnet is also very slow in cpu(~2m), which used canny edge as reference.
     """
 )

     print(options)
     if options is None:
         options = []
+    # processor.args.semantic_segment = "Semantic Segment" in options
+    processor.args.semantic_segment = False
     image_generation_status = "Image Generation" in options
     image_caption, dense_caption, region_semantic, gen_text = processor.image_to_text(image_src)
     if image_generation_status:
 # Create Gradio input and output components
 image_input = gr.inputs.Image(type='filepath', label="Input Image")
+# semantic_segment_checkbox = gr.inputs.Checkbox(label="Semantic Segment", default=False)
 image_generation_checkbox = gr.inputs.Checkbox(label="Image Generation", default=False)
 logo_base64 = add_logo()
 title_with_logo = f'<img src="data:image/jpeg;base64,{logo_base64}" width="400" style="vertical-align: middle;"> Understanding Image with Text'
 examples = [
+    ["examples/test_4.jpg"],
 ]
 # Create Gradio interface
     inputs=[image_input,
             gr.CheckboxGroup(
             label="Options",
+            choices=["Image Generation"],
             ),
             ],
     outputs=gr.outputs.HTML(),
     title=title_with_logo,
+    examples=examples,
     description="""
     This code support image to text transformation. Then the generated text can do retrieval, question answering et al to conduct zero-shot.
+    \n Github: https://github.com/showlab/Image2Paragraph
+    \n Twitter: https://twitter.com/awinyimgprocess/status/1646225454599372800?s=46&t=HvOe9T2n35iFuCHP5aIHpQ
+    \n Since GPU is expensive, we use CPU for demo and not include semantic segment anything. Run code local with gpu or google colab we provided for fast speed.
+    \n Ttext2image model is controlnet ( very slow in cpu(~2m)), which used canny edge as reference.
     """
 )

models/__pycache__/controlnet_model.cpython-38.pyc CHANGED Viewed

Binary files a/models/__pycache__/controlnet_model.cpython-38.pyc and b/models/__pycache__/controlnet_model.cpython-38.pyc differ

models/__pycache__/image_text_transformation.cpython-38.pyc CHANGED Viewed

Binary files a/models/__pycache__/image_text_transformation.cpython-38.pyc and b/models/__pycache__/image_text_transformation.cpython-38.pyc differ

models/image_text_transformation.py CHANGED Viewed

@@ -33,7 +33,8 @@ class ImageTextTransformation:
         self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
         self.gpt_model = ImageToText(openai_key)
         self.controlnet_model = TextToImage(device=self.args.contolnet_device)
-        self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
         print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')

         self.dense_caption_model = DenseCaptioning(device=self.args.dense_caption_device)
         self.gpt_model = ImageToText(openai_key)
         self.controlnet_model = TextToImage(device=self.args.contolnet_device)
+        # time-conusimg on CPU, run on local
+        # self.region_semantic_model = RegionSemantic(device=self.args.semantic_segment_device)
         print('\033[1;32m' + "Model initialization finished!".center(50, '-') + '\033[0m')

models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc CHANGED Viewed

Binary files a/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc and b/models/segment_models/__pycache__/semantic_segment_anything_model.cpython-38.pyc differ

pretrained_models/blip-image-captioning-large DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 293ab01f2dc41c1c214299314f11de635d0937dc

pretrained_models/blip2-opt-2.7b DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 56e1fe81e7e7c346e95e196ace7b442b3f8ff483

pretrained_models/clip-vit-large-patch14 DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 8d052a0f05efbaefbc9e8786ba291cfdf93e5bff

pretrained_models/clipseg-rd64-refined DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 583b388deb98a04feb3e1f816dcdb8f3062ee205

pretrained_models/oneformer_ade20k_swin_large DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 4a5bac8e64f82681a12db2e151a4c2f4ce6092b2

pretrained_models/oneformer_coco_swin_large DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 3a263017ca5c75adbea145f25f81b118243d4394

pretrained_models/stable-diffusion-v1-5 DELETED Viewed

	@@ -1 +0,0 @@
1	- Subproject commit 39593d5650112b4cc580433f6b0435385882d819