|
{ |
|
"_name_or_path": "Salesforce/blip-vqa-base", |
|
"architectures": [ |
|
"ViltForQuestionAnswering" |
|
], |
|
"attention_probs_dropout_prob": 0.0, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.0, |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "at table", |
|
"1": "skateboard", |
|
"2": "lg", |
|
"3": "6", |
|
"4": "crossing", |
|
"5": "don't know", |
|
"6": "solid", |
|
"7": "picnic table", |
|
"8": "full", |
|
"9": "plain", |
|
"10": "window", |
|
"11": "8:35", |
|
"12": "red and yellow", |
|
"13": "girl", |
|
"14": "tabby", |
|
"15": "blue", |
|
"16": "7:45", |
|
"17": "down", |
|
"18": "unknown", |
|
"19": "hawaii", |
|
"20": "woods", |
|
"21": "little girl", |
|
"22": "roof", |
|
"23": "black and white", |
|
"24": "in car", |
|
"25": "clock tower", |
|
"26": "gray", |
|
"27": "curtains", |
|
"28": "ball", |
|
"29": "dog", |
|
"30": "woman", |
|
"31": "soccer ball", |
|
"32": "windows", |
|
"33": "donut", |
|
"34": "screen", |
|
"35": "bus", |
|
"36": "neon", |
|
"37": "monitor", |
|
"38": "jeep", |
|
"39": "snowboard", |
|
"40": "wine tasting", |
|
"41": "french", |
|
"42": "wedding", |
|
"43": "orange", |
|
"44": "king", |
|
"45": "tired", |
|
"46": "canopy", |
|
"47": "low", |
|
"48": "bikes", |
|
"49": "snowboarding", |
|
"50": "2000", |
|
"51": "skateboarding", |
|
"52": "style", |
|
"53": "tent", |
|
"54": "necklace", |
|
"55": "bike rack", |
|
"56": "lying down", |
|
"57": "clock", |
|
"58": "name tag", |
|
"59": "hat", |
|
"60": "backpack", |
|
"61": "on street", |
|
"62": "air", |
|
"63": "leather", |
|
"64": "2010", |
|
"65": "can't tell", |
|
"66": "bicycle", |
|
"67": "lady", |
|
"68": "clear", |
|
"69": "tan", |
|
"70": "skier", |
|
"71": "car", |
|
"72": "hair", |
|
"73": "curtain", |
|
"74": "10", |
|
"75": "exit", |
|
"76": "natural", |
|
"77": "camera", |
|
"78": "forest", |
|
"79": "station", |
|
"80": "skiing", |
|
"81": "tv", |
|
"82": "fence", |
|
"83": "smiling", |
|
"84": "platform", |
|
"85": "happy", |
|
"86": "bedroom", |
|
"87": "blonde", |
|
"88": "double", |
|
"89": "train", |
|
"90": "nothing", |
|
"91": "street", |
|
"92": "soccer", |
|
"93": "table", |
|
"94": "5", |
|
"95": "trees", |
|
"96": "women", |
|
"97": "giraffes", |
|
"98": "right", |
|
"99": "7", |
|
"100": "shelter", |
|
"101": "ground", |
|
"102": "plate", |
|
"103": "laying down", |
|
"104": "chopsticks", |
|
"105": "red", |
|
"106": "many", |
|
"107": "shrimp", |
|
"108": "not there", |
|
"109": "talking", |
|
"110": "cloudy", |
|
"111": "green", |
|
"112": "bicycles", |
|
"113": "bricks", |
|
"114": "sun", |
|
"115": "2013", |
|
"116": "brick", |
|
"117": "human", |
|
"118": "birthday", |
|
"119": "snowboarder", |
|
"120": "park", |
|
"121": "beagle", |
|
"122": "yes", |
|
"123": "walking", |
|
"124": "rack", |
|
"125": "purple", |
|
"126": "cat", |
|
"127": "giraffe", |
|
"128": "8", |
|
"129": "pink", |
|
"130": "plastic", |
|
"131": "red and blue", |
|
"132": "stripes", |
|
"133": "lanyard", |
|
"134": "shade", |
|
"135": "dirt", |
|
"136": "they aren't", |
|
"137": "0", |
|
"138": "ice cream", |
|
"139": "zoo", |
|
"140": "wall", |
|
"141": "cup", |
|
"142": "queen", |
|
"143": "cage", |
|
"144": "africa", |
|
"145": "beige", |
|
"146": "white", |
|
"147": "snow", |
|
"148": "yellow", |
|
"149": "white and blue", |
|
"150": "calico", |
|
"151": "big ben", |
|
"152": "wine", |
|
"153": "sky", |
|
"154": "security", |
|
"155": "2", |
|
"156": "sidewalk", |
|
"157": "stand", |
|
"158": "4", |
|
"159": "smile", |
|
"160": "gray and black", |
|
"161": "protection", |
|
"162": "3", |
|
"163": "watching", |
|
"164": "shadow", |
|
"165": "shadows", |
|
"166": "fashion", |
|
"167": "7:35", |
|
"168": "crown", |
|
"169": "blue and white", |
|
"170": "man", |
|
"171": "door", |
|
"172": "sleeping", |
|
"173": "large", |
|
"174": "net", |
|
"175": "suv", |
|
"176": "brown", |
|
"177": "not sure", |
|
"178": "arrow", |
|
"179": "1", |
|
"180": "black", |
|
"181": "out", |
|
"182": "person", |
|
"183": "desert", |
|
"184": "boy", |
|
"185": "tower", |
|
"186": "9:35", |
|
"187": "chair", |
|
"188": "talking on phone", |
|
"189": "small", |
|
"190": "resting", |
|
"191": "church", |
|
"192": "outside", |
|
"193": "cross", |
|
"194": "white and black", |
|
"195": "no", |
|
"196": "photographer", |
|
"197": "on road", |
|
"198": "doughnut" |
|
}, |
|
"image_size": 384, |
|
"image_text_hidden_size": 256, |
|
"initializer_factor": 1.0, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"label2id": { |
|
"0": 137, |
|
"1": 179, |
|
"10": 74, |
|
"2": 155, |
|
"2000": 50, |
|
"2010": 64, |
|
"2013": 115, |
|
"3": 162, |
|
"4": 158, |
|
"5": 94, |
|
"6": 3, |
|
"7": 99, |
|
"7:35": 167, |
|
"7:45": 16, |
|
"8": 128, |
|
"8:35": 11, |
|
"9:35": 186, |
|
"africa": 144, |
|
"air": 62, |
|
"arrow": 178, |
|
"at table": 0, |
|
"backpack": 60, |
|
"ball": 28, |
|
"beagle": 121, |
|
"bedroom": 86, |
|
"beige": 145, |
|
"bicycle": 66, |
|
"bicycles": 112, |
|
"big ben": 151, |
|
"bike rack": 55, |
|
"bikes": 48, |
|
"birthday": 118, |
|
"black": 180, |
|
"black and white": 23, |
|
"blonde": 87, |
|
"blue": 15, |
|
"blue and white": 169, |
|
"boy": 184, |
|
"brick": 116, |
|
"bricks": 113, |
|
"brown": 176, |
|
"bus": 35, |
|
"cage": 143, |
|
"calico": 150, |
|
"camera": 77, |
|
"can't tell": 65, |
|
"canopy": 46, |
|
"car": 71, |
|
"cat": 126, |
|
"chair": 187, |
|
"chopsticks": 104, |
|
"church": 191, |
|
"clear": 68, |
|
"clock": 57, |
|
"clock tower": 25, |
|
"cloudy": 110, |
|
"cross": 193, |
|
"crossing": 4, |
|
"crown": 168, |
|
"cup": 141, |
|
"curtain": 73, |
|
"curtains": 27, |
|
"desert": 183, |
|
"dirt": 135, |
|
"dog": 29, |
|
"don't know": 5, |
|
"donut": 33, |
|
"door": 171, |
|
"double": 88, |
|
"doughnut": 198, |
|
"down": 17, |
|
"exit": 75, |
|
"fashion": 166, |
|
"fence": 82, |
|
"forest": 78, |
|
"french": 41, |
|
"full": 8, |
|
"giraffe": 127, |
|
"giraffes": 97, |
|
"girl": 13, |
|
"gray": 26, |
|
"gray and black": 160, |
|
"green": 111, |
|
"ground": 101, |
|
"hair": 72, |
|
"happy": 85, |
|
"hat": 59, |
|
"hawaii": 19, |
|
"human": 117, |
|
"ice cream": 138, |
|
"in car": 24, |
|
"jeep": 38, |
|
"king": 44, |
|
"lady": 67, |
|
"lanyard": 133, |
|
"large": 173, |
|
"laying down": 103, |
|
"leather": 63, |
|
"lg": 2, |
|
"little girl": 21, |
|
"low": 47, |
|
"lying down": 56, |
|
"man": 170, |
|
"many": 106, |
|
"monitor": 37, |
|
"name tag": 58, |
|
"natural": 76, |
|
"necklace": 54, |
|
"neon": 36, |
|
"net": 174, |
|
"no": 195, |
|
"not sure": 177, |
|
"not there": 108, |
|
"nothing": 90, |
|
"on road": 197, |
|
"on street": 61, |
|
"orange": 43, |
|
"out": 181, |
|
"outside": 192, |
|
"park": 120, |
|
"person": 182, |
|
"photographer": 196, |
|
"picnic table": 7, |
|
"pink": 129, |
|
"plain": 9, |
|
"plastic": 130, |
|
"plate": 102, |
|
"platform": 84, |
|
"protection": 161, |
|
"purple": 125, |
|
"queen": 142, |
|
"rack": 124, |
|
"red": 105, |
|
"red and blue": 131, |
|
"red and yellow": 12, |
|
"resting": 190, |
|
"right": 98, |
|
"roof": 22, |
|
"screen": 34, |
|
"security": 154, |
|
"shade": 134, |
|
"shadow": 164, |
|
"shadows": 165, |
|
"shelter": 100, |
|
"shrimp": 107, |
|
"sidewalk": 156, |
|
"skateboard": 1, |
|
"skateboarding": 51, |
|
"skier": 70, |
|
"skiing": 80, |
|
"sky": 153, |
|
"sleeping": 172, |
|
"small": 189, |
|
"smile": 159, |
|
"smiling": 83, |
|
"snow": 147, |
|
"snowboard": 39, |
|
"snowboarder": 119, |
|
"snowboarding": 49, |
|
"soccer": 92, |
|
"soccer ball": 31, |
|
"solid": 6, |
|
"stand": 157, |
|
"station": 79, |
|
"street": 91, |
|
"stripes": 132, |
|
"style": 52, |
|
"sun": 114, |
|
"suv": 175, |
|
"tabby": 14, |
|
"table": 93, |
|
"talking": 109, |
|
"talking on phone": 188, |
|
"tan": 69, |
|
"tent": 53, |
|
"they aren't": 136, |
|
"tired": 45, |
|
"tower": 185, |
|
"train": 89, |
|
"trees": 95, |
|
"tv": 81, |
|
"unknown": 18, |
|
"walking": 123, |
|
"wall": 140, |
|
"watching": 163, |
|
"wedding": 42, |
|
"white": 146, |
|
"white and black": 194, |
|
"white and blue": 149, |
|
"window": 10, |
|
"windows": 32, |
|
"wine": 152, |
|
"wine tasting": 40, |
|
"woman": 30, |
|
"women": 96, |
|
"woods": 20, |
|
"yellow": 148, |
|
"yes": 122, |
|
"zoo": 139 |
|
}, |
|
"layer_norm_eps": 1e-12, |
|
"logit_scale_init_value": 2.6592, |
|
"max_image_length": -1, |
|
"max_position_embeddings": 40, |
|
"modality_type_vocab_size": 2, |
|
"model_type": "vilt", |
|
"num_attention_heads": 12, |
|
"num_channels": 3, |
|
"num_hidden_layers": 12, |
|
"num_images": -1, |
|
"patch_size": 32, |
|
"projection_dim": 512, |
|
"qkv_bias": true, |
|
"text_config": { |
|
"_name_or_path": "", |
|
"add_cross_attention": false, |
|
"architectures": null, |
|
"attention_probs_dropout_prob": 0.0, |
|
"bad_words_ids": null, |
|
"begin_suppress_tokens": null, |
|
"bos_token_id": 30522, |
|
"chunk_size_feed_forward": 0, |
|
"cross_attention_hidden_size": null, |
|
"decoder_start_token_id": null, |
|
"diversity_penalty": 0.0, |
|
"do_sample": false, |
|
"early_stopping": false, |
|
"encoder_no_repeat_ngram_size": 0, |
|
"eos_token_id": 2, |
|
"exponential_decay_length_penalty": null, |
|
"finetuning_task": null, |
|
"forced_bos_token_id": null, |
|
"forced_eos_token_id": null, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.0, |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "LABEL_0", |
|
"1": "LABEL_1" |
|
}, |
|
"initializer_factor": 1.0, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"is_decoder": true, |
|
"is_encoder_decoder": false, |
|
"label2id": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
}, |
|
"layer_norm_eps": 1e-12, |
|
"length_penalty": 1.0, |
|
"max_length": 20, |
|
"max_position_embeddings": 512, |
|
"min_length": 0, |
|
"model_type": "blip_text_model", |
|
"no_repeat_ngram_size": 0, |
|
"num_attention_heads": 12, |
|
"num_beam_groups": 1, |
|
"num_beams": 1, |
|
"num_hidden_layers": 12, |
|
"num_return_sequences": 1, |
|
"output_attentions": false, |
|
"output_hidden_states": false, |
|
"output_scores": false, |
|
"pad_token_id": 0, |
|
"prefix": null, |
|
"problem_type": null, |
|
"projection_dim": 768, |
|
"pruned_heads": {}, |
|
"remove_invalid_values": false, |
|
"repetition_penalty": 1.0, |
|
"return_dict": true, |
|
"return_dict_in_generate": false, |
|
"sep_token_id": 102, |
|
"suppress_tokens": null, |
|
"task_specific_params": null, |
|
"temperature": 1.0, |
|
"tf_legacy_loss": false, |
|
"tie_encoder_decoder": false, |
|
"tie_word_embeddings": true, |
|
"tokenizer_class": null, |
|
"top_k": 50, |
|
"top_p": 1.0, |
|
"torch_dtype": null, |
|
"torchscript": false, |
|
"transformers_version": "4.26.0.dev0", |
|
"typical_p": 1.0, |
|
"use_bfloat16": false, |
|
"use_cache": true, |
|
"vocab_size": 30524 |
|
}, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.42.4", |
|
"type_vocab_size": 2, |
|
"vision_config": { |
|
"_name_or_path": "", |
|
"add_cross_attention": false, |
|
"architectures": null, |
|
"attention_dropout": 0.0, |
|
"bad_words_ids": null, |
|
"begin_suppress_tokens": null, |
|
"bos_token_id": null, |
|
"chunk_size_feed_forward": 0, |
|
"cross_attention_hidden_size": null, |
|
"decoder_start_token_id": null, |
|
"diversity_penalty": 0.0, |
|
"do_sample": false, |
|
"dropout": 0.0, |
|
"early_stopping": false, |
|
"encoder_no_repeat_ngram_size": 0, |
|
"eos_token_id": null, |
|
"exponential_decay_length_penalty": null, |
|
"finetuning_task": null, |
|
"forced_bos_token_id": null, |
|
"forced_eos_token_id": null, |
|
"hidden_act": "gelu", |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "LABEL_0", |
|
"1": "LABEL_1" |
|
}, |
|
"image_size": 384, |
|
"initializer_factor": 1.0, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"is_decoder": false, |
|
"is_encoder_decoder": false, |
|
"label2id": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1 |
|
}, |
|
"layer_norm_eps": 1e-05, |
|
"length_penalty": 1.0, |
|
"max_length": 20, |
|
"min_length": 0, |
|
"model_type": "blip_vision_model", |
|
"no_repeat_ngram_size": 0, |
|
"num_attention_heads": 12, |
|
"num_beam_groups": 1, |
|
"num_beams": 1, |
|
"num_channels": 3, |
|
"num_hidden_layers": 12, |
|
"num_return_sequences": 1, |
|
"output_attentions": false, |
|
"output_hidden_states": false, |
|
"output_scores": false, |
|
"pad_token_id": null, |
|
"patch_size": 16, |
|
"prefix": null, |
|
"problem_type": null, |
|
"projection_dim": 512, |
|
"pruned_heads": {}, |
|
"remove_invalid_values": false, |
|
"repetition_penalty": 1.0, |
|
"return_dict": true, |
|
"return_dict_in_generate": false, |
|
"sep_token_id": null, |
|
"suppress_tokens": null, |
|
"task_specific_params": null, |
|
"temperature": 1.0, |
|
"tf_legacy_loss": false, |
|
"tie_encoder_decoder": false, |
|
"tie_word_embeddings": true, |
|
"tokenizer_class": null, |
|
"top_k": 50, |
|
"top_p": 1.0, |
|
"torch_dtype": null, |
|
"torchscript": false, |
|
"transformers_version": "4.26.0.dev0", |
|
"typical_p": 1.0, |
|
"use_bfloat16": false |
|
}, |
|
"vocab_size": 30522 |
|
} |
|
|