Spaces:

davanstrien
/

ColPali-Query-Generator

Running on Zero

davanstrien HF Staff commited on Sep 26, 2024

Commit

9e7d682

1 Parent(s): 3555196

stronger JSON prompt

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
-import subprocess  # 🥲
-subprocess.run(
-    "pip install flash-attn --no-build-isolation",
-    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
-    shell=True,
-)
 import spaces
 import gradio as gr
@@ -74,7 +74,8 @@ If there are no relevant visual elements, replace the third query with another s
 Here is the document image to analyze:
 <image>
-Generate the queries based on this image and provide the response in the specified JSON format."""
     return prompt, GeneralRetrievalQuery
@@ -92,7 +93,7 @@ def generate_response(image):
     inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
     output = model.generate_from_batch(
         inputs,
-        GenerationConfig(max_new_tokens=200, stop_token="<|endoftext|>"),
         tokenizer=processor.tokenizer
     )
     generated_tokens = output[0, inputs['input_ids'].size(1):]

+# import subprocess  # 🥲
+# subprocess.run(
+#     "pip install flash-attn --no-build-isolation",
+#     env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+#     shell=True,
+# )
 import spaces
 import gradio as gr
 Here is the document image to analyze:
 <image>
+Generate the queries based on this image and provide the response in the specified JSON format.
+Only return JSON"""
     return prompt, GeneralRetrievalQuery
     inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
     output = model.generate_from_batch(
         inputs,
+        GenerationConfig(max_new_tokens=800, stop_token="<|endoftext|>"),
         tokenizer=processor.tokenizer
     )
     generated_tokens = output[0, inputs['input_ids'].size(1):]