bbexx commited on
Commit
ca48de7
·
1 Parent(s): 9b46a92
Files changed (1) hide show
  1. README.md +5 -0
README.md CHANGED
@@ -22,6 +22,7 @@ from PIL import Image
22
  from transformers import AutoModel, CLIPImageProcessor
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
 
 
25
  model = AutoModel.from_pretrained(
26
  'jienengchen/ViTamin-B-LTT',
27
  trust_remote_code=True).to(device).eval()
@@ -47,6 +48,10 @@ print("Label probs:", text_probs)
47
 
48
  | image encoder | image size | num patches | text encoder depth/width | seen samples (B) | trainable params Image+Text (M) | MACs Image+Text (G) | ImageNet Acc. | avg. 38 datasets | ImageNet dist. shift. | VTAB | retrieval |
49
  |---------------|------------|-------------|--------------------------|-------------------|---------------------------------|----------------------|---------------|------------------|-----------------------|------|-----------|
 
 
 
 
50
  | ViTamin-L | 224 | 196 | 12/768 | 12.8 | 333.3+123.7 | 72.6+6.6 | 80.8 | 66.7 | 69.8 | 65.3 | 60.3 |
51
  | ViTamin-L | 256 | 256 | 12/768 | 12.8+0.2 | 333.4+123.7 | 94.8+6.6 | 81.2 | 67.0 | 71.1 | 65.3 | 61.2 |
52
  | ViTamin-L | 336 | 441 | 12/768 | 12.8+0.2 | 333.6+123.7 | 163.4+6.6 | 81.6 | 67.0 | 72.1 | 64.4 | 61.6 |
 
22
  from transformers import AutoModel, CLIPImageProcessor
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
 
25
+ # obtained 70.72% zero-shot ImageNet score
26
  model = AutoModel.from_pretrained(
27
  'jienengchen/ViTamin-B-LTT',
28
  trust_remote_code=True).to(device).eval()
 
48
 
49
  | image encoder | image size | num patches | text encoder depth/width | seen samples (B) | trainable params Image+Text (M) | MACs Image+Text (G) | ImageNet Acc. | avg. 38 datasets | ImageNet dist. shift. | VTAB | retrieval |
50
  |---------------|------------|-------------|--------------------------|-------------------|---------------------------------|----------------------|---------------|------------------|-----------------------|------|-----------|
51
+ | ViTamin-S | 224 | 196 | 12/384 | 1.28 | 22.0+40.4 | 5.50+1.64 | 62.2 | 53.2 | 51.3 | 51.7 | 50.0 |
52
+ | ViTamin-S-LTT | 224 | 196 | 12/384 | 1.28 | 22.0+40.4 | 5.50+1.64 | 63.4 | 54.6 | 51.6 | 54.9 | 52.9 |
53
+ | ViTamin-B | 224 | 196 | 12/512 | 1.28 | 87.5+63.4 | 21.8+2.9 | 68.9 | 57.7 | 58.3 | 56.4 | 54.1 |
54
+ | ViTamin-B-LTT | 224 | 196 | 12/512 | 1.28 | 87.5+63.4 | 21.8+2.9 | 70.8 | 59.4 | 59.3 | 56.6 | 59.4 |
55
  | ViTamin-L | 224 | 196 | 12/768 | 12.8 | 333.3+123.7 | 72.6+6.6 | 80.8 | 66.7 | 69.8 | 65.3 | 60.3 |
56
  | ViTamin-L | 256 | 256 | 12/768 | 12.8+0.2 | 333.4+123.7 | 94.8+6.6 | 81.2 | 67.0 | 71.1 | 65.3 | 61.2 |
57
  | ViTamin-L | 336 | 441 | 12/768 | 12.8+0.2 | 333.6+123.7 | 163.4+6.6 | 81.6 | 67.0 | 72.1 | 64.4 | 61.6 |