Update README.md
Browse files
README.md
CHANGED
@@ -25,8 +25,8 @@ quantized_model_path="OPEA/llama-joycaption-alpha-two-hf-llava-int4-sym-inc"
|
|
25 |
|
26 |
# Load JoyCaption INT4 Model
|
27 |
processor = AutoProcessor.from_pretrained(quantized_model_path)
|
28 |
-
|
29 |
-
|
30 |
|
31 |
image_url = "http://images.cocodataset.org/train2017/000000116003.jpg"
|
32 |
content = "Write a descriptive caption for this image in a formal tone."
|
@@ -48,9 +48,9 @@ with torch.no_grad():
|
|
48 |
assert isinstance(prompt, str)
|
49 |
inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
|
50 |
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
|
51 |
-
|
52 |
# Generate the captions
|
53 |
-
generate_ids =
|
54 |
**inputs,
|
55 |
max_new_tokens=50,
|
56 |
do_sample=False,
|
@@ -60,34 +60,41 @@ with torch.no_grad():
|
|
60 |
top_k=None,
|
61 |
top_p=0.9,
|
62 |
)[0]
|
63 |
-
|
64 |
# Trim off the prompt
|
65 |
generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
|
66 |
-
|
67 |
# Decode the caption
|
68 |
caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
69 |
caption = caption.strip()
|
70 |
print(caption)
|
71 |
|
72 |
|
73 |
-
##INT4:
|
|
|
74 |
|
75 |
-
|
|
|
|
|
76 |
|
77 |
image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
|
78 |
-
content = "
|
79 |
|
80 |
-
##INT4:
|
|
|
81 |
|
82 |
-
##BF16:
|
|
|
83 |
|
84 |
|
85 |
image_url = "http://images.cocodataset.org/train2017/000000093025.jpg"
|
86 |
-
content = "
|
87 |
|
88 |
-
##INT4:
|
|
|
89 |
|
90 |
-
##BF16:
|
|
|
91 |
|
92 |
```
|
93 |
|
|
|
25 |
|
26 |
# Load JoyCaption INT4 Model
|
27 |
processor = AutoProcessor.from_pretrained(quantized_model_path)
|
28 |
+
model = LlavaForConditionalGeneration.from_pretrained(quantized_model_path, device_map=0)
|
29 |
+
model.eval()
|
30 |
|
31 |
image_url = "http://images.cocodataset.org/train2017/000000116003.jpg"
|
32 |
content = "Write a descriptive caption for this image in a formal tone."
|
|
|
48 |
assert isinstance(prompt, str)
|
49 |
inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
|
50 |
inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
|
51 |
+
|
52 |
# Generate the captions
|
53 |
+
generate_ids = model.generate(
|
54 |
**inputs,
|
55 |
max_new_tokens=50,
|
56 |
do_sample=False,
|
|
|
60 |
top_k=None,
|
61 |
top_p=0.9,
|
62 |
)[0]
|
63 |
+
|
64 |
# Trim off the prompt
|
65 |
generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
|
66 |
+
|
67 |
# Decode the caption
|
68 |
caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
69 |
caption = caption.strip()
|
70 |
print(caption)
|
71 |
|
72 |
|
73 |
+
##INT4: This black-and-white photograph captures a moment of triumph on a tennis court. The central figure is a male tennis player, mid-celebration,
|
74 |
+
## with his arms raised high in victory. He is wearing a white athletic shirt and shorts, with a
|
75 |
|
76 |
+
|
77 |
+
##BF16: This black-and-white photograph captures a moment of triumph on a tennis court. The central figure is a male tennis player, mid-celebration,
|
78 |
+
## with his arms raised high in victory. He is wearing a white tennis shirt and shorts, with a
|
79 |
|
80 |
image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
|
81 |
+
content = "Write a descriptive caption for this image in a formal tone."
|
82 |
|
83 |
+
##INT4: This is a photograph capturing a moment during a baseball game. The image is taken from a high vantage point, likely from the stands,
|
84 |
+
## looking down onto the field. The main focus is on a young girl and a man standing on the grassy
|
85 |
|
86 |
+
##BF16: This is a photograph capturing a moment during a baseball game. The image is taken from a high angle, looking down onto the field.
|
87 |
+
## In the foreground, there is a section of the baseball field with a reddish-brown dirt infield and a well
|
88 |
|
89 |
|
90 |
image_url = "http://images.cocodataset.org/train2017/000000093025.jpg"
|
91 |
+
content = "Write a descriptive caption for this image in a formal tone."
|
92 |
|
93 |
+
##INT4: This is a photograph capturing a serene outdoor scene on a rocky mountainous terrain under a clear blue sky with scattered white clouds.
|
94 |
+
## The central focus is on a man and a sheep. The man, positioned slightly to the right of the center, is sitting
|
95 |
|
96 |
+
##BF16: This photograph captures a serene mountainous landscape under a bright blue sky dotted with fluffy white clouds. In the foreground,
|
97 |
+
## a man and a woman are seated on a rocky outcrop. The man, positioned on the left, is wearing a blue jacket and
|
98 |
|
99 |
```
|
100 |
|