|
{ |
|
"batch_size": 32, |
|
"buffer_size": 64, |
|
"eval_mix": "chris_aubo", |
|
"frozen_keys": [ |
|
"*hf_model*" |
|
], |
|
"lora": false, |
|
"lora_config": { |
|
"bias": "none", |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"r": 8 |
|
}, |
|
"model": { |
|
"heads": { |
|
"action": { |
|
"args": [], |
|
"kwargs": { |
|
"action_dim": 7, |
|
"action_horizon": 4, |
|
"dropout_rate": 0.0, |
|
"n_diffusion_samples": 1, |
|
"readout_key": "readout_action", |
|
"token_embedding_size": 768, |
|
"use_map": false |
|
}, |
|
"module": "octo.model.components.action_heads", |
|
"name": "DiffusionActionHead" |
|
} |
|
}, |
|
"max_horizon": 10, |
|
"observation_tokenizers": { |
|
"primary": { |
|
"args": [], |
|
"kwargs": { |
|
"encoder": { |
|
"args": [], |
|
"kwargs": { |
|
"in_features": 6 |
|
}, |
|
"module": "octo.model.components.vit_encoders", |
|
"name": "SmallStem16" |
|
}, |
|
"obs_stack_keys": [ |
|
"image_primary" |
|
], |
|
"task_stack_keys": [ |
|
"image_primary" |
|
] |
|
}, |
|
"module": "octo.model.components.tokenizers", |
|
"name": "ImageTokenizer" |
|
} |
|
}, |
|
"readouts": { |
|
"action": 1 |
|
}, |
|
"repeat_task_tokens": true, |
|
"task_tokenizers": { |
|
"language": { |
|
"args": [], |
|
"kwargs": { |
|
"encoder": "t5-base", |
|
"finetune_encoder": false |
|
}, |
|
"module": "octo.model.components.tokenizers", |
|
"name": "LanguageTokenizer" |
|
} |
|
}, |
|
"token_embedding_size": 768, |
|
"transformer_kwargs": { |
|
"add_position_embedding": false, |
|
"attention_dropout_rate": 0.0, |
|
"dropout_rate": 0.0, |
|
"mlp_dim": 3072, |
|
"num_attention_heads": 12, |
|
"num_layers": 12 |
|
}, |
|
"use_correct_attention": true |
|
}, |
|
"normalize_method": "sign", |
|
"obs_token_nums": { |
|
"primary": 256 |
|
}, |
|
"output_dir": "ljp_aubo_20250610_test", |
|
"pretrained": { |
|
"kwargs": { |
|
"subpath": "oxe-g2-checkpoint-300000" |
|
}, |
|
"model": "hf://chuanmew/octo_torch" |
|
}, |
|
"run_name": "aubo", |
|
"sampler_num_samples": 2560000, |
|
"seed": 42, |
|
"subsample_length": 99999, |
|
"text_processor": { |
|
"args": [], |
|
"kwargs": { |
|
"encode_with_model": false, |
|
"tokenizer_kwargs": { |
|
"max_length": 16, |
|
"padding": "max_length", |
|
"return_tensors": "np", |
|
"truncation": true |
|
}, |
|
"tokenizer_name": "t5-base" |
|
}, |
|
"module": "octo.components.text_processing", |
|
"name": "HFTokenizer" |
|
}, |
|
"train_mix": "ljp_aubo_merged_20250519to20250529_20250604", |
|
"training_arguments": { |
|
"bf16": true, |
|
"dataloader_pin_memory": true, |
|
"ddp_find_unused_parameters": true, |
|
"eval_steps": 1000, |
|
"gradient_accumulation_steps": 1, |
|
"learning_rate": 3e-05, |
|
"logging_nan_inf_filter": false, |
|
"logging_steps": 1000, |
|
"lr_scheduler_kwargs": { |
|
"decay_type": "cosine", |
|
"min_lr_ratio": 0.05, |
|
"num_decay_steps": 0 |
|
}, |
|
"lr_scheduler_type": "warmup_stable_decay", |
|
"max_grad_norm": 1.0, |
|
"max_steps": 5200, |
|
"optim": "adamw_torch_fused", |
|
"per_device_eval_batch_size": 1, |
|
"per_device_train_batch_size": 1, |
|
"report_to": "wandb", |
|
"save_safetensors": true, |
|
"save_steps": 5200, |
|
"torch_compile": true, |
|
"warmup_steps": 100, |
|
"weight_decay": 0.1 |
|
}, |
|
"training_keys": [ |
|
"*action*", |
|
"*observation_tokenizers*", |
|
"*transformer.transformer*", |
|
"*task_projections*", |
|
"*obs_projections*", |
|
"*readout_embeddings*", |
|
"*task_pos_embeddings*", |
|
"*obs_pos_embeddings*" |
|
] |
|
} |
|
|