{ "architectures": [ "SiglipForImageClassification" ], "id2label": { "0": "\u0905", "1": "\u0906", "10": "\u0915", "11": "\u0915\u094d\u0937", "12": "\u0916", "13": "\u0917", "14": "\u0918", "15": "\u091a", "16": "\u091b", "17": "\u091c", "18": "\u091c\u094d\u091e", "19": "\u091d", "2": "\u0907", "20": "\u091f", "21": "\u0920", "22": "\u0921", "23": "\u0922", "24": "\u0923", "25": "\u0924", "26": "\u0925", "27": "\u0926", "28": "\u0927", "29": "\u0928", "3": "\u0908", "30": "\u092a", "31": "\u092b", "32": "\u092c", "33": "\u092d", "34": "\u092e", "35": "\u092f", "36": "\u0930", "37": "\u0932", "38": "\u0933", "39": "\u0935", "4": "\u0909", "40": "\u0936", "41": "\u0938", "42": "\u0939", "5": "\u090a", "6": "\u090f", "7": "\u0910", "8": "\u0913", "9": "\u0914" }, "initializer_factor": 1.0, "label2id": { "\u0905": "0", "\u0906": "1", "\u0907": "2", "\u0908": "3", "\u0909": "4", "\u090a": "5", "\u090f": "6", "\u0910": "7", "\u0913": "8", "\u0914": "9", "\u0915": "10", "\u0915\u094d\u0937": "11", "\u0916": "12", "\u0917": "13", "\u0918": "14", "\u091a": "15", "\u091b": "16", "\u091c": "17", "\u091c\u094d\u091e": "18", "\u091d": "19", "\u091f": "20", "\u0920": "21", "\u0921": "22", "\u0922": "23", "\u0923": "24", "\u0924": "25", "\u0925": "26", "\u0926": "27", "\u0927": "28", "\u0928": "29", "\u092a": "30", "\u092b": "31", "\u092c": "32", "\u092d": "33", "\u092e": "34", "\u092f": "35", "\u0930": "36", "\u0932": "37", "\u0933": "38", "\u0935": "39", "\u0936": "40", "\u0938": "41", "\u0939": "42" }, "model_type": "siglip", "problem_type": "single_label_classification", "text_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "projection_size": 768, "torch_dtype": "float32", "vocab_size": 256000 }, "torch_dtype": "float32", "transformers_version": "4.52.0.dev0", "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32" } }