hassonofer commited on
Commit
b781b14
·
verified ·
1 Parent(s): c3e0fca

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -13
README.md CHANGED
@@ -32,17 +32,17 @@ The species list is derived from data available at <https://www.israbirding.com/
32
  import birder
33
  from birder.inference.classification import infer_image
34
 
35
- (net, class_to_idx, signature, rgb_stats) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
36
 
37
  # Get the image size the model was trained on
38
- size = birder.get_size_from_signature(signature)
39
 
40
  # Create an inference transform
41
- transform = birder.classification_transform(size, rgb_stats)
42
 
43
  image = "path/to/image.jpeg" # or a PIL image, must be loaded in RGB format
44
  (out, _) = infer_image(net, image, transform)
45
- # out is a NumPy array with shape of (1, num_classes), representing class probabilities.
46
  ```
47
 
48
  ### Image Embeddings
@@ -51,17 +51,17 @@ image = "path/to/image.jpeg" # or a PIL image, must be loaded in RGB format
51
  import birder
52
  from birder.inference.classification import infer_image
53
 
54
- (net, class_to_idx, signature, rgb_stats) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
55
 
56
  # Get the image size the model was trained on
57
- size = birder.get_size_from_signature(signature)
58
 
59
  # Create an inference transform
60
- transform = birder.classification_transform(size, rgb_stats)
61
 
62
  image = "path/to/image.jpeg" # or a PIL image
63
  (out, embedding) = infer_image(net, image, transform, return_embedding=True)
64
- # embedding is a NumPy array with shape of (1, embedding_size)
65
  ```
66
 
67
  ### Detection Feature Map
@@ -70,13 +70,13 @@ image = "path/to/image.jpeg" # or a PIL image
70
  from PIL import Image
71
  import birder
72
 
73
- (net, class_to_idx, signature, rgb_stats) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
74
 
75
  # Get the image size the model was trained on
76
- size = birder.get_size_from_signature(signature)
77
 
78
  # Create an inference transform
79
- transform = birder.classification_transform(size, rgb_stats)
80
 
81
  image = Image.open("path/to/image.jpeg")
82
  features = net.detection_features(transform(image).unsqueeze(0))
@@ -93,12 +93,12 @@ print([(k, v.size()) for k, v in features.items()])
93
 
94
  ```bibtex
95
  @misc{li2022mvitv2improvedmultiscalevision,
96
- title={MViTv2: Improved Multiscale Vision Transformers for Classification and Detection},
97
  author={Yanghao Li and Chao-Yuan Wu and Haoqi Fan and Karttikeya Mangalam and Bo Xiong and Jitendra Malik and Christoph Feichtenhofer},
98
  year={2022},
99
  eprint={2112.01526},
100
  archivePrefix={arXiv},
101
  primaryClass={cs.CV},
102
- url={https://arxiv.org/abs/2112.01526},
103
  }
104
  ```
 
32
  import birder
33
  from birder.inference.classification import infer_image
34
 
35
+ (net, model_info) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
36
 
37
  # Get the image size the model was trained on
38
+ size = birder.get_size_from_signature(model_info.signature)
39
 
40
  # Create an inference transform
41
+ transform = birder.classification_transform(size, model_info.rgb_stats)
42
 
43
  image = "path/to/image.jpeg" # or a PIL image, must be loaded in RGB format
44
  (out, _) = infer_image(net, image, transform)
45
+ # out is a NumPy array with shape of (1, 550), representing class probabilities.
46
  ```
47
 
48
  ### Image Embeddings
 
51
  import birder
52
  from birder.inference.classification import infer_image
53
 
54
+ (net, model_info) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
55
 
56
  # Get the image size the model was trained on
57
+ size = birder.get_size_from_signature(model_info.signature)
58
 
59
  # Create an inference transform
60
+ transform = birder.classification_transform(size, model_info.rgb_stats)
61
 
62
  image = "path/to/image.jpeg" # or a PIL image
63
  (out, embedding) = infer_image(net, image, transform, return_embedding=True)
64
+ # embedding is a NumPy array with shape of (1, 768)
65
  ```
66
 
67
  ### Detection Feature Map
 
70
  from PIL import Image
71
  import birder
72
 
73
+ (net, model_info) = birder.load_pretrained_model("mvit_v2_t_il-all", inference=True)
74
 
75
  # Get the image size the model was trained on
76
+ size = birder.get_size_from_signature(model_info.signature)
77
 
78
  # Create an inference transform
79
+ transform = birder.classification_transform(size, model_info.rgb_stats)
80
 
81
  image = Image.open("path/to/image.jpeg")
82
  features = net.detection_features(transform(image).unsqueeze(0))
 
93
 
94
  ```bibtex
95
  @misc{li2022mvitv2improvedmultiscalevision,
96
+ title={MViTv2: Improved Multiscale Vision Transformers for Classification and Detection},
97
  author={Yanghao Li and Chao-Yuan Wu and Haoqi Fan and Karttikeya Mangalam and Bo Xiong and Jitendra Malik and Christoph Feichtenhofer},
98
  year={2022},
99
  eprint={2112.01526},
100
  archivePrefix={arXiv},
101
  primaryClass={cs.CV},
102
+ url={https://arxiv.org/abs/2112.01526},
103
  }
104
  ```