{ "batch_size": 32, "buffer_size": 64, "eval_mix": "chris_aubo", "frozen_keys": [ "*hf_model*" ], "lora": false, "lora_config": { "bias": "none", "lora_alpha": 16, "lora_dropout": 0.05, "r": 8 }, "model": { "heads": { "action": { "args": [], "kwargs": { "action_dim": 7, "action_horizon": 4, "dropout_rate": 0.0, "n_diffusion_samples": 1, "readout_key": "readout_action", "token_embedding_size": 768, "use_map": false }, "module": "octo.model.components.action_heads", "name": "DiffusionActionHead" } }, "max_horizon": 10, "observation_tokenizers": { "primary": { "args": [], "kwargs": { "encoder": { "args": [], "kwargs": { "in_features": 6 }, "module": "octo.model.components.vit_encoders", "name": "SmallStem16" }, "obs_stack_keys": [ "image_primary" ], "task_stack_keys": [ "image_primary" ] }, "module": "octo.model.components.tokenizers", "name": "ImageTokenizer" } }, "readouts": { "action": 1 }, "repeat_task_tokens": true, "task_tokenizers": { "language": { "args": [], "kwargs": { "encoder": "t5-base", "finetune_encoder": false }, "module": "octo.model.components.tokenizers", "name": "LanguageTokenizer" } }, "token_embedding_size": 768, "transformer_kwargs": { "add_position_embedding": false, "attention_dropout_rate": 0.0, "dropout_rate": 0.0, "mlp_dim": 3072, "num_attention_heads": 12, "num_layers": 12 }, "use_correct_attention": true }, "normalize_method": "sign", "obs_token_nums": { "primary": 256 }, "output_dir": "ljp_aubo_20250610_test", "pretrained": { "kwargs": { "subpath": "oxe-g2-checkpoint-300000" }, "model": "hf://chuanmew/octo_torch" }, "run_name": "aubo", "sampler_num_samples": 2560000, "seed": 42, "subsample_length": 99999, "text_processor": { "args": [], "kwargs": { "encode_with_model": false, "tokenizer_kwargs": { "max_length": 16, "padding": "max_length", "return_tensors": "np", "truncation": true }, "tokenizer_name": "t5-base" }, "module": "octo.components.text_processing", "name": "HFTokenizer" }, "train_mix": "ljp_aubo_merged_20250519to20250529_20250604", "training_arguments": { "bf16": true, "dataloader_pin_memory": true, "ddp_find_unused_parameters": true, "eval_steps": 1000, "gradient_accumulation_steps": 1, "learning_rate": 3e-05, "logging_nan_inf_filter": false, "logging_steps": 1000, "lr_scheduler_kwargs": { "decay_type": "cosine", "min_lr_ratio": 0.05, "num_decay_steps": 0 }, "lr_scheduler_type": "warmup_stable_decay", "max_grad_norm": 1.0, "max_steps": 5200, "optim": "adamw_torch_fused", "per_device_eval_batch_size": 1, "per_device_train_batch_size": 1, "report_to": "wandb", "save_safetensors": true, "save_steps": 5200, "torch_compile": true, "warmup_steps": 100, "weight_decay": 0.1 }, "training_keys": [ "*action*", "*observation_tokenizers*", "*transformer.transformer*", "*task_projections*", "*obs_projections*", "*readout_embeddings*", "*task_pos_embeddings*", "*obs_pos_embeddings*" ] }