vision-rag-sourced-docker-image

Paused

App Files Files Community

manu commited on Feb 21

Commit

73f30e5

verified ·

1 Parent(s): 42833ce

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -37

app.py CHANGED Viewed

@@ -37,47 +37,53 @@ def encode_image_to_base64(image):
     return base64.b64encode(buffered.getvalue()).decode("utf-8")
-def query_gpt4o_mini(query, images):
     """Calls OpenAI's GPT-4o-mini with the query and image data."""
-    from openai import OpenAI
-    base64_images = [encode_image_to_base64(image[0]) for image in images]
-    client = OpenAI(api_key=os.getenv("OPENAI_KEY"))
-    PROMPT = """
-    You are a smart assistant designed to answer questions about a PDF document.
-    You are given relevant information in the form of PDF pages. Use them to construct a response to the question, and cite your sources.
-    If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
-    Give detailed and extensive answers, only containing info in the pages you are given.
-    Answer in the same language as the query.
-    Query: {query}
-    PDF pages:
-    """
-    response = client.chat.completions.create(
-    model="gpt-4o-mini",
-    messages=[
-        {
-          "role": "user",
-          "content": [
             {
-              "type": "text",
-              "text": PROMPT.format(query=query)
-            }] + [{
-              "type": "image_url",
-              "image_url": {
-                "url": f"data:image/jpeg;base64,{im}"
-                },
-            } for im in base64_images]
-        }
-      ],
-      max_tokens=500,
-    )
-    return response.choices[0].message.content
 @spaces.GPU
-def search(query: str, ds, images, k):
     k = min(k, len(ds))
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     if device != model.device:
@@ -98,7 +104,7 @@ def search(query: str, ds, images, k):
         results.append((images[idx], f"Page {idx}"))
     # Generate response from GPT-4o-mini
-    ai_response = query_gpt4o_mini(query, results)
     return results, ai_response
@@ -164,6 +170,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             convert_button = gr.Button("🔄 Index documents")
             message = gr.Textbox("Files not yet uploaded", label="Status")
             embeds = gr.State(value=[])
             imgs = gr.State(value=[])
@@ -179,7 +186,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     output_text = gr.Textbox(label="AI Response", placeholder="Generated response based on retrieved documents")
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
-    search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery, output_text])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)

     return base64.b64encode(buffered.getvalue()).decode("utf-8")
+def query_gpt4o_mini(query, images, api_key):
     """Calls OpenAI's GPT-4o-mini with the query and image data."""
+    if api_key and api_key.startswith("sk"):
+        from openai import OpenAI
+        base64_images = [encode_image_to_base64(image[0]) for image in images]
+        client = OpenAI(api_key=api_key)
+        PROMPT = """
+        You are a smart assistant designed to answer questions about a PDF document.
+        You are given relevant information in the form of PDF pages. Use them to construct a short response to the question, and cite your sources (page numbers, etc).
+        If it is not possible to answer using the provided pages, do not attempt to provide an answer and simply say the answer is not present within the documents.
+        Give detailed and extensive answers, only containing info in the pages you are given.
+        You can answer using information contained in plots and figures if necessary.
+        Answer in the same language as the query.
+        Query: {query}
+        PDF pages:
+        """
+        response = client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[
             {
+              "role": "user",
+              "content": [
+                {
+                  "type": "text",
+                  "text": PROMPT.format(query=query)
+                }] + [{
+                  "type": "image_url",
+                  "image_url": {
+                    "url": f"data:image/jpeg;base64,{im}"
+                    },
+                } for im in base64_images]
+            }
+          ],
+          max_tokens=500,
+        )
+        return response.choices[0].message.content
+    return "Enter your OpenAI API key to get a custom response"
 @spaces.GPU
+def search(query: str, ds, images, k, api_key):
     k = min(k, len(ds))
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     if device != model.device:
         results.append((images[idx], f"Page {idx}"))
     # Generate response from GPT-4o-mini
+    ai_response = query_gpt4o_mini(query, results, api_key)
     return results, ai_response
             convert_button = gr.Button("🔄 Index documents")
             message = gr.Textbox("Files not yet uploaded", label="Status")
+            api_key = gr.Textbox(placeholder="Enter your OpenAI KEY here (optional)", label="API key")
             embeds = gr.State(value=[])
             imgs = gr.State(value=[])
     output_text = gr.Textbox(label="AI Response", placeholder="Generated response based on retrieved documents")
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
+    search_button.click(search, inputs=[query, embeds, imgs, k, api_key], outputs=[output_gallery, output_text])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)