2ch commited on
Commit
c774b65
·
verified ·
1 Parent(s): bc0d769

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ datasets:
4
+ - markury/AndroAtlas
5
+ language:
6
+ - en
7
+ base_model:
8
+ - google/paligemma-3b-mix-448
9
+ pipeline_tag: image-text-to-text
10
+ tags:
11
+ - captioning
12
+ ---
13
+
14
+ ```python
15
+ import torch
16
+ from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
17
+ from PIL import Image
18
+
19
+ def download_model(model_id):
20
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id)
21
+ processor = PaliGemmaProcessor.from_pretrained(model_id)
22
+ return model, processor
23
+
24
+ def infer(model, processor, image_path, text, max_new_tokens=128):
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ model = model.to(device)
27
+ model.eval()
28
+
29
+ image = Image.open(image_path)
30
+ inputs = processor(text=text, images=image, return_tensors="pt").to(device)
31
+
32
+ with torch.inference_mode():
33
+ generated_ids = model.generate(
34
+ **inputs,
35
+ max_new_tokens=max_new_tokens,
36
+ do_sample=False
37
+ )
38
+
39
+ result = processor.batch_decode(generated_ids, skip_special_tokens=True)
40
+ return result[0][len(text):].lstrip("\n")
41
+
42
+ def main():
43
+ model_id = "prolapse/malensfw-paligemma-fp16"
44
+ model, processor = download_model(model_id)
45
+ image_path = "/path/to/image.png"
46
+ prompt = "describe this photo"
47
+ result = infer(model, processor, image_path, text)
48
+ print(result)
49
+
50
+ if __name__ == "__main__":
51
+ main()
52
+ ```
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image>": 257152
3
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.42.4"
7
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_seq_length": 1024,
13
+ "image_std": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "processor_class": "PaliGemmaProcessor",
19
+ "resample": 3,
20
+ "rescale_factor": 0.00392156862745098,
21
+ "size": {
22
+ "height": 448,
23
+ "width": 448
24
+ }
25
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<image>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "bos_token": {
12
+ "content": "<bos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "eos_token": {
19
+ "content": "<eos>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<pad>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "unk_token": {
33
+ "content": "<unk>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15d33625f5a34be43c42d23acc1f26f026c9cd9d6f8ab3d9da05dd2a62e643c
3
+ size 17770231
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8986bb4f423f07f8c7f70d0dbe3526fb2316056c17bae71b1ea975e77a168fc6
3
+ size 4264023
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff