RobotJelly's picture
app.py
569ccef
# Import Libraries
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import pickle
from PIL import Image
from io import BytesIO
import requests
import gradio as gr
import os
import sentence_transformers
from sentence_transformers import SentenceTransformer, util
# check if CUDA available
device = "cuda" if torch.cuda.is_available() else "cpu"
IMAGES_DIR = Path("photos/")
#Load CLIP model
model = SentenceTransformer('clip-ViT-B-32')
# pre-computed embeddings
emb_filename = 'unsplash-25k-photos-embeddings.pkl'
with open(emb_filename, 'rb') as emb:
img_names, img_emb = pickle.load(emb)
def display_matches(similarity, topk):
best_matched_images = []
top_k_indices = torch.topk(similarity, topk, 0).indices
for matched_image in top_k_indices:
img = Image.open(IMAGES_DIR / img_names[matched_image])
best_matched_images.append(img)
return best_matched_images
def image_search(Option, topk, search_text, search_image):
topk = topk+1
# Input Text Query
if Option == "Text-To-Image" :
# Encode the given Input text for Search & take it in tensor form
text_emb = model.encode([search_text], convert_to_tensor=True)
# Compute cosine similarities between encoded input text (in tensor) & encoded images from unsplash dataset
similarity = util.cos_sim(img_emb, text_emb)
#using the computed similarities, find the topk best matches
return display_matches(similarity, topk)
elif Option == "Image-To-Image":
# Encode the given Input Image for Search & take it in tensor form
image_emb = model.encode([Image.fromarray(search_image)], convert_to_tensor=True)
# Compute cosine similarities between encoded input image (in tensor) & encoded images from unsplash dataset
similarity = util.cos_sim(img_emb, image_emb)
#using the computed similarities, find the topk best matches
return display_matches(similarity, topk)
gr.Interface(fn=image_search, title="Search Image",
description="Enter the text or image to search for the most relevant images...",
article="""
Instructions:-
1. Select the option - `Text to Image` OR `Image To Image`.
2. Select the no. of most relevant images you want to see.
3. Then accordingly enter the text or image.
4. Then you will get the images on right. To enter another text/image first clear it then follow steps 1-3.
""",
theme="huggingface",
inputs=[gr.inputs.Dropdown(["Text-To-Image", "Image-To-Image"]),
gr.inputs.Dropdown(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"], type="index", default="1", label="Select Top K Images"),
gr.inputs.Textbox(lines=3, label="Input Text", placeholder="Enter the text..."),
gr.inputs.Image(optional=True)
],
outputs=gr.outputs.Carousel([gr.outputs.Image(type="pil")]),
enable_queue=True
).launch(debug=True,share=True)