riteshkr's picture
Training in progress, step 100
6a4e639 verified
raw
history blame
12.9 kB
{
"_name_or_path": "Salesforce/blip-vqa-base",
"architectures": [
"ViltForQuestionAnswering"
],
"attention_probs_dropout_prob": 0.0,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "at table",
"1": "skateboard",
"2": "lg",
"3": "6",
"4": "crossing",
"5": "don't know",
"6": "solid",
"7": "picnic table",
"8": "full",
"9": "plain",
"10": "window",
"11": "8:35",
"12": "red and yellow",
"13": "girl",
"14": "tabby",
"15": "blue",
"16": "7:45",
"17": "down",
"18": "unknown",
"19": "hawaii",
"20": "woods",
"21": "little girl",
"22": "roof",
"23": "black and white",
"24": "in car",
"25": "clock tower",
"26": "gray",
"27": "curtains",
"28": "ball",
"29": "dog",
"30": "woman",
"31": "soccer ball",
"32": "windows",
"33": "donut",
"34": "screen",
"35": "bus",
"36": "neon",
"37": "monitor",
"38": "jeep",
"39": "snowboard",
"40": "wine tasting",
"41": "french",
"42": "wedding",
"43": "orange",
"44": "king",
"45": "tired",
"46": "canopy",
"47": "low",
"48": "bikes",
"49": "snowboarding",
"50": "2000",
"51": "skateboarding",
"52": "style",
"53": "tent",
"54": "necklace",
"55": "bike rack",
"56": "lying down",
"57": "clock",
"58": "name tag",
"59": "hat",
"60": "backpack",
"61": "on street",
"62": "air",
"63": "leather",
"64": "2010",
"65": "can't tell",
"66": "bicycle",
"67": "lady",
"68": "clear",
"69": "tan",
"70": "skier",
"71": "car",
"72": "hair",
"73": "curtain",
"74": "10",
"75": "exit",
"76": "natural",
"77": "camera",
"78": "forest",
"79": "station",
"80": "skiing",
"81": "tv",
"82": "fence",
"83": "smiling",
"84": "platform",
"85": "happy",
"86": "bedroom",
"87": "blonde",
"88": "double",
"89": "train",
"90": "nothing",
"91": "street",
"92": "soccer",
"93": "table",
"94": "5",
"95": "trees",
"96": "women",
"97": "giraffes",
"98": "right",
"99": "7",
"100": "shelter",
"101": "ground",
"102": "plate",
"103": "laying down",
"104": "chopsticks",
"105": "red",
"106": "many",
"107": "shrimp",
"108": "not there",
"109": "talking",
"110": "cloudy",
"111": "green",
"112": "bicycles",
"113": "bricks",
"114": "sun",
"115": "2013",
"116": "brick",
"117": "human",
"118": "birthday",
"119": "snowboarder",
"120": "park",
"121": "beagle",
"122": "yes",
"123": "walking",
"124": "rack",
"125": "purple",
"126": "cat",
"127": "giraffe",
"128": "8",
"129": "pink",
"130": "plastic",
"131": "red and blue",
"132": "stripes",
"133": "lanyard",
"134": "shade",
"135": "dirt",
"136": "they aren't",
"137": "0",
"138": "ice cream",
"139": "zoo",
"140": "wall",
"141": "cup",
"142": "queen",
"143": "cage",
"144": "africa",
"145": "beige",
"146": "white",
"147": "snow",
"148": "yellow",
"149": "white and blue",
"150": "calico",
"151": "big ben",
"152": "wine",
"153": "sky",
"154": "security",
"155": "2",
"156": "sidewalk",
"157": "stand",
"158": "4",
"159": "smile",
"160": "gray and black",
"161": "protection",
"162": "3",
"163": "watching",
"164": "shadow",
"165": "shadows",
"166": "fashion",
"167": "7:35",
"168": "crown",
"169": "blue and white",
"170": "man",
"171": "door",
"172": "sleeping",
"173": "large",
"174": "net",
"175": "suv",
"176": "brown",
"177": "not sure",
"178": "arrow",
"179": "1",
"180": "black",
"181": "out",
"182": "person",
"183": "desert",
"184": "boy",
"185": "tower",
"186": "9:35",
"187": "chair",
"188": "talking on phone",
"189": "small",
"190": "resting",
"191": "church",
"192": "outside",
"193": "cross",
"194": "white and black",
"195": "no",
"196": "photographer",
"197": "on road",
"198": "doughnut"
},
"image_size": 384,
"image_text_hidden_size": 256,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"0": 137,
"1": 179,
"10": 74,
"2": 155,
"2000": 50,
"2010": 64,
"2013": 115,
"3": 162,
"4": 158,
"5": 94,
"6": 3,
"7": 99,
"7:35": 167,
"7:45": 16,
"8": 128,
"8:35": 11,
"9:35": 186,
"africa": 144,
"air": 62,
"arrow": 178,
"at table": 0,
"backpack": 60,
"ball": 28,
"beagle": 121,
"bedroom": 86,
"beige": 145,
"bicycle": 66,
"bicycles": 112,
"big ben": 151,
"bike rack": 55,
"bikes": 48,
"birthday": 118,
"black": 180,
"black and white": 23,
"blonde": 87,
"blue": 15,
"blue and white": 169,
"boy": 184,
"brick": 116,
"bricks": 113,
"brown": 176,
"bus": 35,
"cage": 143,
"calico": 150,
"camera": 77,
"can't tell": 65,
"canopy": 46,
"car": 71,
"cat": 126,
"chair": 187,
"chopsticks": 104,
"church": 191,
"clear": 68,
"clock": 57,
"clock tower": 25,
"cloudy": 110,
"cross": 193,
"crossing": 4,
"crown": 168,
"cup": 141,
"curtain": 73,
"curtains": 27,
"desert": 183,
"dirt": 135,
"dog": 29,
"don't know": 5,
"donut": 33,
"door": 171,
"double": 88,
"doughnut": 198,
"down": 17,
"exit": 75,
"fashion": 166,
"fence": 82,
"forest": 78,
"french": 41,
"full": 8,
"giraffe": 127,
"giraffes": 97,
"girl": 13,
"gray": 26,
"gray and black": 160,
"green": 111,
"ground": 101,
"hair": 72,
"happy": 85,
"hat": 59,
"hawaii": 19,
"human": 117,
"ice cream": 138,
"in car": 24,
"jeep": 38,
"king": 44,
"lady": 67,
"lanyard": 133,
"large": 173,
"laying down": 103,
"leather": 63,
"lg": 2,
"little girl": 21,
"low": 47,
"lying down": 56,
"man": 170,
"many": 106,
"monitor": 37,
"name tag": 58,
"natural": 76,
"necklace": 54,
"neon": 36,
"net": 174,
"no": 195,
"not sure": 177,
"not there": 108,
"nothing": 90,
"on road": 197,
"on street": 61,
"orange": 43,
"out": 181,
"outside": 192,
"park": 120,
"person": 182,
"photographer": 196,
"picnic table": 7,
"pink": 129,
"plain": 9,
"plastic": 130,
"plate": 102,
"platform": 84,
"protection": 161,
"purple": 125,
"queen": 142,
"rack": 124,
"red": 105,
"red and blue": 131,
"red and yellow": 12,
"resting": 190,
"right": 98,
"roof": 22,
"screen": 34,
"security": 154,
"shade": 134,
"shadow": 164,
"shadows": 165,
"shelter": 100,
"shrimp": 107,
"sidewalk": 156,
"skateboard": 1,
"skateboarding": 51,
"skier": 70,
"skiing": 80,
"sky": 153,
"sleeping": 172,
"small": 189,
"smile": 159,
"smiling": 83,
"snow": 147,
"snowboard": 39,
"snowboarder": 119,
"snowboarding": 49,
"soccer": 92,
"soccer ball": 31,
"solid": 6,
"stand": 157,
"station": 79,
"street": 91,
"stripes": 132,
"style": 52,
"sun": 114,
"suv": 175,
"tabby": 14,
"table": 93,
"talking": 109,
"talking on phone": 188,
"tan": 69,
"tent": 53,
"they aren't": 136,
"tired": 45,
"tower": 185,
"train": 89,
"trees": 95,
"tv": 81,
"unknown": 18,
"walking": 123,
"wall": 140,
"watching": 163,
"wedding": 42,
"white": 146,
"white and black": 194,
"white and blue": 149,
"window": 10,
"windows": 32,
"wine": 152,
"wine tasting": 40,
"woman": 30,
"women": 96,
"woods": 20,
"yellow": 148,
"yes": 122,
"zoo": 139
},
"layer_norm_eps": 1e-12,
"logit_scale_init_value": 2.6592,
"max_image_length": -1,
"max_position_embeddings": 40,
"modality_type_vocab_size": 2,
"model_type": "vilt",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"num_images": -1,
"patch_size": 32,
"projection_dim": 512,
"qkv_bias": true,
"text_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_probs_dropout_prob": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": 30522,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": 2,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 3072,
"is_decoder": true,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-12,
"length_penalty": 1.0,
"max_length": 20,
"max_position_embeddings": 512,
"min_length": 0,
"model_type": "blip_text_model",
"no_repeat_ngram_size": 0,
"num_attention_heads": 12,
"num_beam_groups": 1,
"num_beams": 1,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": 0,
"prefix": null,
"problem_type": null,
"projection_dim": 768,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": 102,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"transformers_version": "4.26.0.dev0",
"typical_p": 1.0,
"use_bfloat16": false,
"use_cache": true,
"vocab_size": 30524
},
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.42.4",
"type_vocab_size": 2,
"vision_config": {
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"dropout": 0.0,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "gelu",
"hidden_size": 768,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"image_size": 384,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"intermediate_size": 3072,
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"layer_norm_eps": 1e-05,
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "blip_vision_model",
"no_repeat_ngram_size": 0,
"num_attention_heads": 12,
"num_beam_groups": 1,
"num_beams": 1,
"num_channels": 3,
"num_hidden_layers": 12,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"patch_size": 16,
"prefix": null,
"problem_type": null,
"projection_dim": 512,
"pruned_heads": {},
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"transformers_version": "4.26.0.dev0",
"typical_p": 1.0,
"use_bfloat16": false
},
"vocab_size": 30522
}