Spaces:
Running
on
Zero
Running
on
Zero
Commit
β’
38576ff
1
Parent(s):
9e7d682
return text
Browse files
app.py
CHANGED
@@ -87,7 +87,7 @@ def _prep_data_for_input(image):
|
|
87 |
text=prompt
|
88 |
)
|
89 |
|
90 |
-
@spaces.GPU
|
91 |
def generate_response(image):
|
92 |
inputs = _prep_data_for_input(image)
|
93 |
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
|
@@ -100,10 +100,10 @@ def generate_response(image):
|
|
100 |
output_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
101 |
|
102 |
try:
|
103 |
-
return json.loads(output_text)
|
104 |
except Exception:
|
105 |
gr.Warning("Failed to parse JSON from output")
|
106 |
-
return
|
107 |
|
108 |
title = "ColPali fine-tuning Query Generator"
|
109 |
description = """[ColPali](https://huggingface.co/papers/2407.01449) is a very exciting new approach to multimodal document retrieval which aims to replace existing document retrievers which often rely on an OCR step with an end-to-end multimodal approach.
|
@@ -129,7 +129,7 @@ examples = [
|
|
129 |
demo = gr.Interface(
|
130 |
fn=generate_response,
|
131 |
inputs=gr.Image(type="pil"),
|
132 |
-
outputs=gr.
|
133 |
title=title,
|
134 |
description=description,
|
135 |
examples=examples,
|
|
|
87 |
text=prompt
|
88 |
)
|
89 |
|
90 |
+
@spaces.GPU(duration=120)
|
91 |
def generate_response(image):
|
92 |
inputs = _prep_data_for_input(image)
|
93 |
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
|
|
|
100 |
output_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
|
101 |
|
102 |
try:
|
103 |
+
return str(json.loads(output_text))
|
104 |
except Exception:
|
105 |
gr.Warning("Failed to parse JSON from output")
|
106 |
+
return output_text
|
107 |
|
108 |
title = "ColPali fine-tuning Query Generator"
|
109 |
description = """[ColPali](https://huggingface.co/papers/2407.01449) is a very exciting new approach to multimodal document retrieval which aims to replace existing document retrievers which often rely on an OCR step with an end-to-end multimodal approach.
|
|
|
129 |
demo = gr.Interface(
|
130 |
fn=generate_response,
|
131 |
inputs=gr.Image(type="pil"),
|
132 |
+
outputs=gr.Text(),
|
133 |
title=title,
|
134 |
description=description,
|
135 |
examples=examples,
|