Hume Model Zoo
Collection
Collection of models in paper: https://arxiv.org/abs/2505.21432
•
6 items
•
Updated
•
2
A Dual-System Visual-Language-Action model with System-2 thinking trained on Libero-Goal.
s2_candidates_num=5
noise_temp_lower_bound=1.0
noise_temp_upper_bound=1.0
time_temp_lower_bound=0.9
time_temp_upper_bound=1.0
from hume import HumePolicy
import numpy as np
# load policy
hume = HumePolicy.from_pretrained("/path/to/checkpoints")
# config Test-Time Computing args
hume.init_infer(
infer_cfg=dict(
replan_steps=8,
s2_replan_steps=16,
s2_candidates_num=5,
noise_temp_lower_bound=1.0,
noise_temp_upper_bound=1.0,
time_temp_lower_bound=0.9,
time_temp_upper_bound=1.0,
post_process_action=True,
device="cuda",
)
)
# prepare observations
observation = {
"observation.images.image": np.zeros((1,224,224,3), dtype = np.uint8), # (B, H, W, C)
"observation.images.wrist_image": np.zeros((1,224,224,3), dtype = np.uint8), # (B, H, W, C)
"observation.state": np.zeros((1, 7)), # (B, state_dim)
"task": ["Lift the papper"],
}
# Infer the action
action = hume.infer(observation) # (B, action_dim)
# source ckpts
2025-05-02/08-10-44_libero_goal_ck8-16-1_sh-4_gpu8_lr5e-5_1e-5_1e-5_2e-5_bs16_s1600k/0090000
# original logs
2025-06-12/20-53-09+08-10-44_libero_goal_ck8-16-1_sh-4_gpu8_lr5e-5_1e-5_1e-5_2e-5_bs16_s1600k_0090000_s1-8_s2-16_s2cand-5_ntl-1.0_ntu-1.0_ttl-0.9_ttu-1.0.log
@article{song2025hume,
title={Hume: Introducing System-2 Thinking in Visual-Language-Action Model},
author={Anonimous Authors},
journal={arXiv preprint arXiv:2505.21432},
year={2025}
}
Base model
Hume-vla/Hume-System2