vision-rag-sourced-docker-image

Paused

App Files Files Community

manu commited on Feb 21

Commit

6efb913

verified ·

1 Parent(s): 10f6f3f

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -3

app.py CHANGED Viewed

@@ -25,9 +25,80 @@ model = ColQwen2.from_pretrained(
         device_map="cuda:0",  # or "mps" if on Apple Silicon
         # attn_implementation="flash_attention_2", # should work on A100
     ).eval()
-processor = ColQwen2Processor.from_pretrained("manu/colqwen2-v1.0-alpha")
 @spaces.GPU
 def search(query: str, ds, images, k):
@@ -50,7 +121,10 @@ def search(query: str, ds, images, k):
     for idx in top_k_indices:
         results.append((images[idx], f"Page {idx}"))
-    return results
 def index(files, ds):
@@ -126,9 +200,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # Define the actions
     search_button = gr.Button("🔍 Search", variant="primary")
     output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
-    search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)

         device_map="cuda:0",  # or "mps" if on Apple Silicon
         # attn_implementation="flash_attention_2", # should work on A100
     ).eval()
+processor = ColQwen2Processor.from_pretrained("manu/colqwen2-v1.0")
+def encode_image_to_base64(image):
+    """Encodes a PIL image to a base64 string."""
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def query_gpt4o_mini(query, images):
+    """Calls OpenAI's GPT-4o-mini with the query and image data."""
+    from openai import OpenAI
+    images = [encode_image_to_base64(image) for image in images]
+    client = OpenAI(api_key=os.env.get("OPENAI_KEY"))
+    PROMPT = """
+    You are a smart assistant designed to answer questions about a PDF document.
+    You are given relevant information in the form of PDF pages. Use them to construct a response to the question, and cite your sources.
+    If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
+    Give detailed and extensive answers, only containing info in the pages you are given.
+    Answer in the same language as the query.
+    Query: {query}
+    PDF pages:
+    """
+    response = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[
+        {
+          "role": "user",
+          "content": [
+            {
+              "type": "text",
+              "text": PROMPT.format(query=query)
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_images[0]}"
+                },
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_images[1]}"
+                },
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_images[2]}"
+                },
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_images[3]}"
+                },
+            },
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": f"data:image/jpeg;base64,{base64_images[4]}"
+                },
+            },
+          ],
+        }
+      ],
+      max_tokens=500,
+    )
+    return response.choices[0].message.content
 @spaces.GPU
 def search(query: str, ds, images, k):
     for idx in top_k_indices:
         results.append((images[idx], f"Page {idx}"))
+    # Generate response from GPT-4o-mini
+    ai_response = "Activate AI response by forking and adding your GPT-4o key" # query_gpt4o_mini(query, results)
+    return results, ai_response
 def index(files, ds):
     # Define the actions
     search_button = gr.Button("🔍 Search", variant="primary")
     output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
+    output_text = gr.Textbox(label="AI Response", placeholder="Generated response based on retrieved documents")
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
+    search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery, output_text])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)