Add preprocessor_config.json
Browse files- README.md +3 -3
- preprocessor_config.json +26 -0
    	
        README.md
    CHANGED
    
    | @@ -15,7 +15,7 @@ license: mit | |
| 15 | 
             
            ---
         | 
| 16 | 
             
            ## mmE5-mllama-11b-instruct
         | 
| 17 |  | 
| 18 | 
            -
            [mmE5: Improving Multimodal Multilingual Embeddings via High-quality Synthetic Data](https://arxiv.org/abs/2502.08468.pdf). Haonan Chen, Liang Wang, Nan Yang, Yutao Zhu, Ziliang Zhao, Furu Wei, Zhicheng Dou, arXiv  | 
| 19 |  | 
| 20 | 
             
            This model is trained based on [Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision).
         | 
| 21 |  | 
| @@ -50,7 +50,6 @@ from PIL import Image | |
| 50 | 
             
            import numpy as np
         | 
| 51 | 
             
            model_args = ModelArguments(
         | 
| 52 | 
             
                model_name='intfloat/mmE5-mllama-11b-instruct',
         | 
| 53 | 
            -
                processor_name='meta-llama/Llama-3.2-11B-Vision',
         | 
| 54 | 
             
                pooling='last',
         | 
| 55 | 
             
                normalize=True,
         | 
| 56 | 
             
                model_backbone='mllama')
         | 
| @@ -104,4 +103,5 @@ print(string, '=', model.compute_similarity(qry_output, tgt_output)) | |
| 104 | 
             
              journal={arXiv preprint arXiv:2502.08468},
         | 
| 105 | 
             
              year={2025}
         | 
| 106 | 
             
            }
         | 
| 107 | 
            -
            ```
         | 
|  | 
|  | |
| 15 | 
             
            ---
         | 
| 16 | 
             
            ## mmE5-mllama-11b-instruct
         | 
| 17 |  | 
| 18 | 
            +
            [mmE5: Improving Multimodal Multilingual Embeddings via High-quality Synthetic Data](https://arxiv.org/abs/2502.08468.pdf). Haonan Chen, Liang Wang, Nan Yang, Yutao Zhu, Ziliang Zhao, Furu Wei, Zhicheng Dou, arXiv 2025
         | 
| 19 |  | 
| 20 | 
             
            This model is trained based on [Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision).
         | 
| 21 |  | 
|  | |
| 50 | 
             
            import numpy as np
         | 
| 51 | 
             
            model_args = ModelArguments(
         | 
| 52 | 
             
                model_name='intfloat/mmE5-mllama-11b-instruct',
         | 
|  | |
| 53 | 
             
                pooling='last',
         | 
| 54 | 
             
                normalize=True,
         | 
| 55 | 
             
                model_backbone='mllama')
         | 
|  | |
| 103 | 
             
              journal={arXiv preprint arXiv:2502.08468},
         | 
| 104 | 
             
              year={2025}
         | 
| 105 | 
             
            }
         | 
| 106 | 
            +
            ```
         | 
| 107 | 
            +
             | 
    	
        preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,26 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "do_convert_rgb": true,
         | 
| 3 | 
            +
              "do_normalize": true,
         | 
| 4 | 
            +
              "do_pad": true,
         | 
| 5 | 
            +
              "do_rescale": true,
         | 
| 6 | 
            +
              "do_resize": true,
         | 
| 7 | 
            +
              "image_mean": [
         | 
| 8 | 
            +
                0.48145466,
         | 
| 9 | 
            +
                0.4578275,
         | 
| 10 | 
            +
                0.40821073
         | 
| 11 | 
            +
              ],
         | 
| 12 | 
            +
              "image_processor_type": "MllamaImageProcessor",
         | 
| 13 | 
            +
              "image_std": [
         | 
| 14 | 
            +
                0.26862954,
         | 
| 15 | 
            +
                0.26130258,
         | 
| 16 | 
            +
                0.27577711
         | 
| 17 | 
            +
              ],
         | 
| 18 | 
            +
              "max_image_tiles": 4,
         | 
| 19 | 
            +
              "processor_class": "MllamaProcessor",
         | 
| 20 | 
            +
              "resample": 2,
         | 
| 21 | 
            +
              "rescale_factor": 0.00392156862745098,
         | 
| 22 | 
            +
              "size": {
         | 
| 23 | 
            +
                "height": 448,
         | 
| 24 | 
            +
                "width": 448
         | 
| 25 | 
            +
              }
         | 
| 26 | 
            +
            }
         |