gribok201
/

smolvla

vision-language-model

Model card Files Files and versions

smolvla / config.json

gribok201's picture

Upload folder using huggingface_hub

2f14779 verified about 2 months ago

history blame contribute delete

2.34 kB

	{
	"architectures": [
	"LerobotSmolVLAWrappedModel"
	],
	"auto_map": {
	"AutoModel": "modeling_lerobot_policy.LerobotSmolVLAWrappedModel"
	},
	"adapt_to_pi_aloha": false,
	"add_image_special_tokens": false,
	"attention_mode": "cross_attn",
	"chunk_size": 50,
	"device": "cuda",
	"empty_cameras": 0,
	"expert_width_multiplier": 0.75,
	"freeze_vision_encoder": true,
	"input_features": {
	"observation.image": {
	"shape": [
	3,
	256,
	256
	],
	"type": "VISUAL"
	},
	"observation.image2": {
	"shape": [
	3,
	256,
	256
	],
	"type": "VISUAL"
	},
	"observation.image3": {
	"shape": [
	3,
	256,
	256
	],
	"type": "VISUAL"
	},
	"observation.state": {
	"shape": [
	6
	],
	"type": "STATE"
	}
	},
	"load_vlm_weights": true,
	"max_action_dim": 32,
	"max_period": 4,
	"max_state_dim": 32,
	"min_period": 0.004,
	"n_action_steps": 50,
	"n_obs_steps": 1,
	"normalization_mapping": {
	"ACTION": "MEAN_STD",
	"STATE": "MEAN_STD",
	"VISUAL": "IDENTITY"
	},
	"num_expert_layers": 0,
	"num_steps": 10,
	"num_vlm_layers": 16,
	"optimizer_betas": [
	0.9,
	0.95
	],
	"optimizer_eps": "1e-08",
	"optimizer_grad_clip_norm": 10,
	"optimizer_lr": 0.0001,
	"optimizer_weight_decay": "1e-10",
	"output_features": {
	"action": {
	"shape": [
	6
	],
	"type": "ACTION"
	}
	},
	"pad_language_to": "max_length",
	"prefix_length": 0,
	"resize_imgs_with_padding": [
	512,
	512
	],
	"scheduler_decay_lr": 2.5e-06,
	"scheduler_decay_steps": 30000,
	"scheduler_warmup_steps": 1000,
	"self_attn_every_n_layers": 2,
	"tokenizer_max_length": 48,
	"train_expert_only": true,
	"train_state_proj": true,
	"type": "smolvla",
	"use_amp": false,
	"use_cache": true,
	"use_delta_joint_actions_aloha": false,
	"vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
	}