visual-deepsearch

Sleeping

App Files Files Community

manu commited on Jun 22, 2024

Commit

3649694

verified ·

1 Parent(s): d40ecad

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -17

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import os
 import gradio as gr
-from pdf2image import convert_from_path
 import torch
 from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm import tqdm
@@ -60,7 +59,7 @@ def search(query: str, ds, images):
     retriever_evaluator = CustomEvaluator(is_multi_vector=True)
     scores = retriever_evaluator.evaluate(qs, ds)
     best_page = int(scores.argmax(axis=1).item())
-    return f"The most relevant page is {best_page}",  images[best_page]
 def index(file, ds):
@@ -84,18 +83,20 @@ def index(file, ds):
     return f"Uploaded and converted {len(images)} pages", ds, images
-COLORS = ['#4285f4', '#db4437', '#f4b400', '#0f9d58', '#e48ef1']
 # Load model
 model_name = "coldoc/colpali-3b-mix-448"
 token = os.environ.get("HF_TOKEN")
-model = ColPali.from_pretrained("google/paligemma-3b-mix-448", torch_dtype=torch.bfloat16, device_map="cuda", token=token).eval()
 model.load_adapter(model_name)
 processor = AutoProcessor.from_pretrained(model_name, token=token)
 device = model.device
 mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
 with gr.Blocks() as demo:
-    gr.Markdown("# PDF to 🤗 Dataset")
     gr.Markdown("## 1️⃣ Upload PDFs")
     file = gr.File(file_types=["pdf"], file_count="multiple")
@@ -103,14 +104,10 @@ with gr.Blocks() as demo:
     convert_button = gr.Button("🔄 Convert and upload")
     message = gr.Textbox("Files not yet uploaded")
     embeds = gr.State(value=[])
-    imgs = gr.State()
     # Define the actions
-    convert_button.click(
-        index,
-        inputs=[file, embeds],
-        outputs=[message, embeds, imgs]
-    )
     gr.Markdown("## 3️⃣ Search")
     query = gr.Textbox(placeholder="Enter your query here")
@@ -118,11 +115,8 @@ with gr.Blocks() as demo:
     message2 = gr.Textbox("Query not yet set")
     output_img = gr.Image()
-    search_button.click(
-        search, inputs=[query, embeds, imgs],
-        outputs=[message2, output_img]
-    )
 if __name__ == "__main__":
-    demo.queue(max_size=10).launch(debug=True)

 import os
 import gradio as gr
 import torch
+from pdf2image import convert_from_path
 from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm import tqdm
     retriever_evaluator = CustomEvaluator(is_multi_vector=True)
     scores = retriever_evaluator.evaluate(qs, ds)
     best_page = int(scores.argmax(axis=1).item())
+    return f"The most relevant page is {best_page}", images[best_page]
 def index(file, ds):
     return f"Uploaded and converted {len(images)} pages", ds, images
+COLORS = ["#4285f4", "#db4437", "#f4b400", "#0f9d58", "#e48ef1"]
 # Load model
 model_name = "coldoc/colpali-3b-mix-448"
 token = os.environ.get("HF_TOKEN")
+model = ColPali.from_pretrained(
+    "google/paligemma-3b-mix-448", torch_dtype=torch.bfloat16, device_map="cuda", token=token
+).eval()
 model.load_adapter(model_name)
 processor = AutoProcessor.from_pretrained(model_name, token=token)
 device = model.device
 mock_image = Image.new("RGB", (448, 448), (255, 255, 255))
 with gr.Blocks() as demo:
+    gr.Markdown("# ColPali: Efficient Document Retrieval with Vision Language Models 📚🔍")
     gr.Markdown("## 1️⃣ Upload PDFs")
     file = gr.File(file_types=["pdf"], file_count="multiple")
     convert_button = gr.Button("🔄 Convert and upload")
     message = gr.Textbox("Files not yet uploaded")
     embeds = gr.State(value=[])
+    imgs = gr.State(value=[])
     # Define the actions
+    convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
     gr.Markdown("## 3️⃣ Search")
     query = gr.Textbox(placeholder="Enter your query here")
     message2 = gr.Textbox("Query not yet set")
     output_img = gr.Image()
+    search_button.click(search, inputs=[query, embeds, imgs], outputs=[message2, output_img])
 if __name__ == "__main__":
+    demo.queue(max_size=10).launch(debug=True)