cicdatopea commited on
Commit
914cecc
·
verified ·
1 Parent(s): d0c66a2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -14
README.md CHANGED
@@ -25,8 +25,8 @@ quantized_model_path="OPEA/llama-joycaption-alpha-two-hf-llava-int4-sym-inc"
25
 
26
  # Load JoyCaption INT4 Model
27
  processor = AutoProcessor.from_pretrained(quantized_model_path)
28
- llava_model = LlavaForConditionalGeneration.from_pretrained(quantized_model_path, device_map=0)
29
- llava_model.eval()
30
 
31
  image_url = "http://images.cocodataset.org/train2017/000000116003.jpg"
32
  content = "Write a descriptive caption for this image in a formal tone."
@@ -48,9 +48,9 @@ with torch.no_grad():
48
  assert isinstance(prompt, str)
49
  inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
50
  inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
51
-
52
  # Generate the captions
53
- generate_ids = llava_model.generate(
54
  **inputs,
55
  max_new_tokens=50,
56
  do_sample=False,
@@ -60,34 +60,41 @@ with torch.no_grad():
60
  top_k=None,
61
  top_p=0.9,
62
  )[0]
63
-
64
  # Trim off the prompt
65
  generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
66
-
67
  # Decode the caption
68
  caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
69
  caption = caption.strip()
70
  print(caption)
71
 
72
 
73
- ##INT4:
 
74
 
75
- ##BF16:
 
 
76
 
77
  image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
78
- content = "How many people are on the baseball field in the picture?"
79
 
80
- ##INT4:
 
81
 
82
- ##BF16:
 
83
 
84
 
85
  image_url = "http://images.cocodataset.org/train2017/000000093025.jpg"
86
- content = "How many people and animals are there in the image?"
87
 
88
- ##INT4:
 
89
 
90
- ##BF16:
 
91
 
92
  ```
93
 
 
25
 
26
  # Load JoyCaption INT4 Model
27
  processor = AutoProcessor.from_pretrained(quantized_model_path)
28
+ model = LlavaForConditionalGeneration.from_pretrained(quantized_model_path, device_map=0)
29
+ model.eval()
30
 
31
  image_url = "http://images.cocodataset.org/train2017/000000116003.jpg"
32
  content = "Write a descriptive caption for this image in a formal tone."
 
48
  assert isinstance(prompt, str)
49
  inputs = processor(text=[prompt], images=[image], return_tensors="pt").to(model.device)
50
  inputs['pixel_values'] = inputs['pixel_values'].to(model.dtype)
51
+
52
  # Generate the captions
53
+ generate_ids = model.generate(
54
  **inputs,
55
  max_new_tokens=50,
56
  do_sample=False,
 
60
  top_k=None,
61
  top_p=0.9,
62
  )[0]
63
+
64
  # Trim off the prompt
65
  generate_ids = generate_ids[inputs['input_ids'].shape[1]:]
66
+
67
  # Decode the caption
68
  caption = processor.tokenizer.decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
69
  caption = caption.strip()
70
  print(caption)
71
 
72
 
73
+ ##INT4: This black-and-white photograph captures a moment of triumph on a tennis court. The central figure is a male tennis player, mid-celebration,
74
+ ## with his arms raised high in victory. He is wearing a white athletic shirt and shorts, with a
75
 
76
+
77
+ ##BF16: This black-and-white photograph captures a moment of triumph on a tennis court. The central figure is a male tennis player, mid-celebration,
78
+ ## with his arms raised high in victory. He is wearing a white tennis shirt and shorts, with a
79
 
80
  image_url = "http://images.cocodataset.org/train2017/000000411975.jpg"
81
+ content = "Write a descriptive caption for this image in a formal tone."
82
 
83
+ ##INT4: This is a photograph capturing a moment during a baseball game. The image is taken from a high vantage point, likely from the stands,
84
+ ## looking down onto the field. The main focus is on a young girl and a man standing on the grassy
85
 
86
+ ##BF16: This is a photograph capturing a moment during a baseball game. The image is taken from a high angle, looking down onto the field.
87
+ ## In the foreground, there is a section of the baseball field with a reddish-brown dirt infield and a well
88
 
89
 
90
  image_url = "http://images.cocodataset.org/train2017/000000093025.jpg"
91
+ content = "Write a descriptive caption for this image in a formal tone."
92
 
93
+ ##INT4: This is a photograph capturing a serene outdoor scene on a rocky mountainous terrain under a clear blue sky with scattered white clouds.
94
+ ## The central focus is on a man and a sheep. The man, positioned slightly to the right of the center, is sitting
95
 
96
+ ##BF16: This photograph captures a serene mountainous landscape under a bright blue sky dotted with fluffy white clouds. In the foreground,
97
+ ## a man and a woman are seated on a rocky outcrop. The man, positioned on the left, is wearing a blue jacket and
98
 
99
  ```
100