Spaces:

virendravaishnav
/

po-fetch-detail

Sleeping

virendravaishnav commited on Sep 13, 2024

Commit

7fee682

1 Parent(s): 1229304

Updated with OCR model and Gradio integration

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import gradio as gr
-from transformers import AutoProcessor, AutoModel
 import torch
 repo_id = "OpenGVLab/InternVL2-1B"
-# Load the processor and model directly from the Hub
-processor = AutoProcessor.from_pretrained(repo_id, trust_remote_code=True)
 model = AutoModel.from_pretrained(
     repo_id,
     trust_remote_code=True,
@@ -22,9 +23,9 @@ def analyze_image(image):
         text = "describe this image"
         # Process the image
-        image_inputs = processor.image_processor(images=img, return_tensors="pt").to(device)
         # Process the text
-        text_inputs = processor.tokenizer(text, return_tensors="pt").to(device)
         # Combine the inputs
         inputs = {
@@ -37,7 +38,7 @@ def analyze_image(image):
         outputs = model.generate(**inputs)
         # Decode the outputs
-        generated_text = processor.tokenizer.decode(outputs[0], skip_special_tokens=True)
         return generated_text
     except Exception as e:
         return f"An error occurred: {str(e)}"

 import gradio as gr
+from transformers import AutoImageProcessor, AutoTokenizer, AutoModel
 import torch
 repo_id = "OpenGVLab/InternVL2-1B"
+# Load the image processor, tokenizer, and model directly from the Hub
+image_processor = AutoImageProcessor.from_pretrained(repo_id, trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True)
 model = AutoModel.from_pretrained(
     repo_id,
     trust_remote_code=True,
         text = "describe this image"
         # Process the image
+        image_inputs = image_processor(images=img, return_tensors="pt").to(device)
         # Process the text
+        text_inputs = tokenizer(text, return_tensors="pt").to(device)
         # Combine the inputs
         inputs = {
         outputs = model.generate(**inputs)
         # Decode the outputs
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return generated_text
     except Exception as e:
         return f"An error occurred: {str(e)}"