Spaces:

Upyaya
/

Fashion-Image-Captioning-using-BLIP-2

Paused

App Files Files

Upyaya commited on Jun 27, 2023

Commit

3d3369a

1 Parent(s): f29da3c

Clean previous upload to try new sample

Browse files

Files changed (1) hide show

app.py +49 -45

app.py CHANGED Viewed

@@ -58,70 +58,74 @@ st.caption("So, for this project have downloaded the pre-trained model [ybelkada
 st.caption("For more detail: [Github link](https://github.com/SmithaUpadhyaya/fashion_image_caption)")    #write
 #Select few sample images for the catagory of cloths
-st.caption("Select image:")
-option = st.selectbox('From sample', ('None', 'dress', 'earrings', 'sweater', 'sunglasses', 'shoe', 'hat', 'heels', 'socks', 'tee', 'bracelet'), index = 0)
-st.text("Or")
-file_name = st.file_uploader(label = "Upload an image", accept_multiple_files = False)
-btn_click = st.button('Generate')
-st.caption("Application deployed on CPU basic with 16GB RAM")
-if btn_click:
-    if file_name is not None:
-        image = Image.open(file_name)
-    elif option is not 'None':
-        file_name = os.path.join(sample_img_path, map_sampleid_name[option])
-        image = Image.open(file_name)
-    if image is not None:
-        image_col, caption_text = st.columns(2)
-        image_col.header("Image")
-        caption_text.header("Generated Caption")
-        image_col.image(image.resize((252,252)), use_column_width = True)
-        if 'init_model_required' not in st.session_state:
-            with st.spinner('Initializing model...'):
-                init_model_required = True
-                processor, model, init_model_required = init_model(init_model_required)
-                #Save session init model in session state
-                if 'init_model_required' not in st.session_state:
-                    st.session_state.init_model_required = init_model_required
-                    st.session_state.processor = processor
-                    st.session_state.model = model
-        else:
-            processor = st.session_state.processor
-            model = st.session_state.model
-        with st.spinner('Generating Caption...'):
-            #Preprocess the image
-            #Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)"
-            #inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16)
-            #Inferance on CPU
-            inputs = processor(images = image, return_tensors = "pt")
-            pixel_values = inputs.pixel_values
-            #Predict the caption for the imahe
-            generated_ids = model.generate(pixel_values = pixel_values, max_length = 25)
-            generated_caption = processor.batch_decode(generated_ids, skip_special_tokens = True)[0]
-            #Output the predict text
-            caption_text.text(generated_caption)
-    #Reset the variable
-    option = 'None'
-    image = None
-    file_name = None
 #if __name__ == "__main__":
 #   main()

 st.caption("For more detail: [Github link](https://github.com/SmithaUpadhyaya/fashion_image_caption)")    #write
 #Select few sample images for the catagory of cloths
+with st.form("app", clear_on_submit = True):
+    st.caption("Select image:")
+    option = 'None'
+    option = st.selectbox('From sample', ('None', 'dress', 'earrings', 'sweater', 'sunglasses', 'shoe', 'hat', 'heels', 'socks', 'tee', 'bracelet'), index = 0)
+    st.text("Or")
+    file_name = None
+    file_name = st.file_uploader(label = "Upload an image", accept_multiple_files = False)
+    btn_click = st.form_submit_button('Generate')
+    st.caption("Application deployed on CPU basic with 16GB RAM")
+    if btn_click:
+        image = None
+        if file_name is not None:
+            image = Image.open(file_name)
+        elif option is not 'None':
+            file_name = os.path.join(sample_img_path, map_sampleid_name[option])
+            image = Image.open(file_name)
+        if image is not None:
+            image_col, caption_text = st.columns(2)
+            image_col.header("Image")
+            caption_text.header("Generated Caption")
+            image_col.image(image.resize((252,252)), use_column_width = True)
+            if 'init_model_required' not in st.session_state:
+                with st.spinner('Initializing model...'):
+                    init_model_required = True
+                    processor, model, init_model_required = init_model(init_model_required)
+                    #Save session init model in session state
+                    if 'init_model_required' not in st.session_state:
+                        st.session_state.init_model_required = init_model_required
+                        st.session_state.processor = processor
+                        st.session_state.model = model
+            else:
+                processor = st.session_state.processor
+                model = st.session_state.model
+            with st.spinner('Generating Caption...'):
+                #Preprocess the image
+                #Inferance on GPU. When used this on GPU will get errors like: "slow_conv2d_cpu" not implemented for 'Half'" , " Input type (float) and bias type (struct c10::Half)"
+                #inputs = processor(images = image, return_tensors = "pt").to('cuda', torch.float16)
+                #Inferance on CPU
+                inputs = processor(images = image, return_tensors = "pt")
+                pixel_values = inputs.pixel_values
+                #Predict the caption for the imahe
+                generated_ids = model.generate(pixel_values = pixel_values, max_length = 25)
+                generated_caption = processor.batch_decode(generated_ids, skip_special_tokens = True)[0]
+                #Output the predict text
+                caption_text.text(generated_caption)
 #if __name__ == "__main__":
 #   main()