Spaces:

Ketengan-Diffusion-Lab
/

Dolphin-Inference

Build error

App Files Files Community

Ketengan-Diffusion-Lab commited on Sep 14, 2024

Commit

fd950ef

verified ·

1 Parent(s): 82daaef

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -38

app.py CHANGED Viewed

@@ -1,45 +1,63 @@
 import gradio as gr
 import torch
-from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-# Disable gradient computation
-torch.set_grad_enabled(False)
-# Initialize model and tokenizer
-model = AutoModel.from_pretrained('internlm/internlm-xcomposer2d5-7b',
-                                  torch_dtype=torch.bfloat16,
-                                  trust_remote_code=True).cuda().eval()
-tokenizer = AutoTokenizer.from_pretrained('internlm/internlm-xcomposer2d5-7b',
-                                          trust_remote_code=True)
-model.tokenizer = tokenizer
-# Define the function to process input and generate a response
-def analyze_image(query, image_path):
-    image = Image.open(image_path)
-    # Convert image to required format and save temporarily if needed
-    with torch.autocast(device_type='cuda', dtype=torch.float16):
-        response, _ = model.chat(tokenizer, query, [image_path], do_sample=False, num_beams=3, use_meta=True)
-    return response
-# Create Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("## Image Analysis Tool using Hugging Face's `internlm-xcomposer2d5-7b`")
-    with gr.Row():
-        query_input = gr.Textbox(label="Enter your query", placeholder="Analyze the given image in a detailed manner")
     with gr.Row():
-        image_input = gr.Image(label="Upload an Image", type="filepath")
-    with gr.Row():
-        result_output = gr.Textbox(label="Result", placeholder="Model response will appear here", interactive=False)
-    with gr.Row():
-        submit_button = gr.Button("Submit")
-    submit_button.click(fn=analyze_image, inputs=[query_input, image_input], outputs=result_output)
-# Launch the Gradio interface
-demo.launch()

 import gradio as gr
 import torch
+import transformers
+from transformers import AutoModelForCausalLM, AutoTokenizer
 from PIL import Image
+import warnings
+# disable some warnings
+transformers.logging.set_verbosity_error()
+transformers.logging.disable_progress_bar()
+warnings.filterwarnings('ignore')
+# set device
+torch.set_default_device('cuda')  # or 'cpu'
+model_name = 'cognitivecomputations/dolphin-vision-7b'
+# create model
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float16,
+    device_map='auto',
+    trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    trust_remote_code=True)
+def inference(prompt, image):
+    messages = [
+        {"role": "user", "content": f'<image>\n{prompt}'}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
+    input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1], dtype=torch.long).unsqueeze(0)
+    image_tensor = model.process_images([image], model.config).to(dtype=model.dtype)
+    # generate
+    output_ids = model.generate(
+        input_ids,
+        images=image_tensor,
+        max_new_tokens=2048,
+        use_cache=True)[0]
+    return tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
 with gr.Blocks() as demo:
     with gr.Row():
+        with gr.Column():
+            prompt_input = gr.Textbox(label="Prompt", placeholder="Describe this image in detail")
+            image_input = gr.Image(label="Image", type="pil")
+            submit_button = gr.Button("Submit")
+        with gr.Column():
+            output_text = gr.Textbox(label="Output")
+    submit_button.click(fn=inference, inputs=[prompt_input, image_input], outputs=output_text)
+demo.launch()