{ "n_obs_steps": 1, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MEAN_STD", "ACTION": "MEAN_STD" }, "input_features": { "observation.images.image": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.wrist_image": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.state": { "type": "STATE", "shape": [ 8 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 7 ] } }, "device": "cpu", "use_amp": false, "type": "hume", "s1_chunk_size": 8, "s2_chunk_size": 16, "n_action_steps": 16, "max_state_dim": 32, "max_action_dim": 32, "resize_imgs_with_padding": [ 224, 224 ], "empty_cameras": 0, "adapt_to_pi_aloha": false, "use_delta_joint_actions_aloha": false, "tokenizer_max_length": 48, "proj_width": 1024, "num_steps": 10, "use_cache": true, "attention_implementation": "eager", "freeze_vision_encoder": true, "train_expert_only": false, "train_state_proj": true, "optimizer_lr": 5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 1e-10, "scheduler_warmup_steps": 1000, "scheduler_decay_steps": 1600000, "scheduler_decay_lr": 2.5e-06, "freeze_s2": true, "s1_his_state_size": 4, "cache_s2_actions": false, "theta2": 1.0, "theta1": 1.0, "noise_slides_eps": 0.0, "noise_slides_alp": 0.0, "s1_proj_width": 512, "freeze_s1_vision_encoder": false, "s1_num_steps": 10, "num_pos": 3, "discount": 0.98, "actor_lr": 1e-05, "critic_lr": 1e-05, "temp_lr": 2e-05, "qf_lr": 0.0003, "next_obs_offset": 1, "vqh_chunk_size": 1, "paligemma_config": { "bos_token_id": 2, "eos_token_id": 1, "hidden_size": 2048, "ignore_index": -100, "image_token_index": 257152, "model_type": "paligemma", "pad_token_id": 0, "projection_dim": 2048, "text_config": { "hidden_activation": "gelu_pytorch_tanh", "hidden_size": 2048, "intermediate_size": 16384, "model_type": "gemma", "num_attention_heads": 8, "num_hidden_layers": 18, "num_image_tokens": 256, "num_key_value_heads": 1, "torch_dtype": "float32", "vocab_size": 257152 }, "torch_dtype": "float32", "transformers_version": "4.48.1", "vision_config": { "hidden_size": 1152, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "num_image_tokens": 256, "patch_size": 14, "projection_dim": 2048, "projector_hidden_act": "gelu_fast", "vision_use_head": false }, "vocab_size": 257152 }, "gemma_expert_config": { "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 2, "eos_token_id": 1, "head_dim": 256, "hidden_act": "gelu_pytorch_tanh", "hidden_activation": "gelu_pytorch_tanh", "hidden_size": 1024, "initializer_range": 0.02, "intermediate_size": 4096, "max_position_embeddings": 8192, "model_type": "gemma", "num_attention_heads": 8, "num_hidden_layers": 18, "num_key_value_heads": 1, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "torch_dtype": "float32", "transformers_version": "4.48.1", "use_cache": true, "vocab_size": 257152 }, "s1_dino_config": { "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": "float32", "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": [ "Dinov2Model" ], "finetuning_task": null, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "tokenizer_class": null, "prefix": null, "bos_token_id": null, "pad_token_id": null, "eos_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "../pretrained/dinov2-small", "_attn_implementation_autoset": false, "transformers_version": "4.52.0.dev0", "model_type": "dinov2", "hidden_size": 384, "num_hidden_layers": 12, "num_attention_heads": 6, "mlp_ratio": 4, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "attention_probs_dropout_prob": 0.0, "initializer_range": 0.02, "layer_norm_eps": 1e-06, "image_size": 518, "patch_size": 14, "num_channels": 3, "qkv_bias": true, "layerscale_value": 1.0, "drop_path_rate": 0.0, "use_swiglu_ffn": false, "stage_names": [ "stem", "stage1", "stage2", "stage3", "stage4", "stage5", "stage6", "stage7", "stage8", "stage9", "stage10", "stage11", "stage12" ], "apply_layernorm": true, "reshape_hidden_states": true, "use_mask_token": true, "out_features": [ "stage12" ], "out_indices": [ 12 ] }, "s1_gemma_expert_config": { "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 2, "eos_token_id": 1, "head_dim": 128, "hidden_act": "gelu_pytorch_tanh", "hidden_activation": "gelu_pytorch_tanh", "hidden_size": 512, "initializer_range": 0.02, "intermediate_size": 2048, "max_position_embeddings": 8192, "model_type": "gemma", "num_attention_heads": 8, "num_hidden_layers": 13, "num_key_value_heads": 1, "pad_token_id": 0, "rms_norm_eps": 1e-06, "rope_theta": 10000.0, "torch_dtype": "float32", "transformers_version": "4.48.1", "use_cache": true, "vocab_size": 257152 } }