| _wandb: | |
| value: | |
| cli_version: 0.20.1 | |
| m: | |
| - "1": ued/visit_patterns/num_replay_batches | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": step/env-step-all-after | |
| "6": | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_actor_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": step/ppo-update-after | |
| "6": | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_actor_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/avg_benchmark_regret | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_critic_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_critic_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_critic_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/avg_benchmark_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/solvable_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/num_walls_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/lvl_benchmark_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/mouse-cheese_dist_finite_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/eta | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/shift_proportion | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/num_walls_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/prop_walls_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/scoring/avg_scores | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/shift_proportion | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/avg_benchmark_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/visit_patterns/avg_first_visit_time | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_entropy | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_advantage | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/levels_cheese_is_in_corner_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_actor_approxkl1 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/lvl_benchmark_regret_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/visit_patterns/avg_last_visit_time | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/cheese-corner_dist_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/solvable_num | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/num_walls_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/prop_walls_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/solvable_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_actor_approxkl3 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/avg_benchmark_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_actor_approxkl1 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_actor_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_grad_norm_pre_clip | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/avg_benchmark_regret | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_entropy | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/shift_proportion | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/visit_patterns/last_visit_time_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/cheese-corne_dist_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/scoring/scores_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_critic_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/lvl_benchmark_regret_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/lvl_benchmark_regret_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/proxy_corner/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/avg_benchmark_regret | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/prop_walls_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_advantage | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_critic_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/visit_patterns/prev_batch_level_ids_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_actor_approxkl3 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/mouse-cheese_dist_solvable_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_actor_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_grad_norm_pre_clip | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_critic_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_actor_approxkl1 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/avg_avg_episode_length | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/lvl_benchmark_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/proxy_corner/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/mouse-cheese_dist_finite_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_actor_approxkl3 | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_advantage | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/avg_avg_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/avg_benchmark_regret | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/cheese-corne_dist_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/avg_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/max/max_actor_loss | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/avg_benchmark_return | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/proxy_corner/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/proxy_corner/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-all/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/mouse-cheese_dist_solvable_wavg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_entropy | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-orig/lvl_benchmark_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/lvl_avg_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-replay/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-train/avg_reward_per_step | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/lvl_benchmark_regret_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/visit_patterns/first_visit_time_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_actor_clipfrac | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ppo/std/std_grad_norm_pre_clip | |
| "5": 4 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": train-generate/rollouts_gif | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/distances/mouse-cheese_dist_finite_avg | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": ued/layout/levels16_img | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/lvl_avg_episode_length_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-shift/lvl_benchmark_return_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| - "1": eval-batch-tree/lvl_reward_per_step_hist | |
| "5": 2 | |
| "6": | |
| - 1 | |
| - 3 | |
| "7": [] | |
| python_version: 3.11.11 | |
| t: | |
| "1": | |
| - 12 | |
| - 45 | |
| "2": | |
| - 12 | |
| - 45 | |
| "3": | |
| - 2 | |
| - 3 | |
| - 7 | |
| - 13 | |
| - 16 | |
| - 55 | |
| - 61 | |
| "4": 3.11.11 | |
| "5": 0.20.1 | |
| "12": 0.20.1 | |
| "13": linux-x86_64 | |
| chain_mutate: | |
| value: true | |
| checkpointing: | |
| value: true | |
| clipping: | |
| value: false | |
| console_log: | |
| value: true | |
| env_corner_size: | |
| value: 1 | |
| env_layout: | |
| value: tree | |
| env_penalize_time: | |
| value: false | |
| env_size: | |
| value: 13 | |
| env_terminate_after_corner: | |
| value: false | |
| evals_num_env_steps: | |
| value: 512 | |
| evals_num_levels: | |
| value: 256 | |
| gif_grid_width: | |
| value: 16 | |
| img_level_of_detail: | |
| value: 1 | |
| keep_all_checkpoints: | |
| value: true | |
| level_splayer: | |
| value: mouse | |
| log_gifs: | |
| value: true | |
| log_hists: | |
| value: false | |
| log_imgs: | |
| value: true | |
| max_num_checkpoints: | |
| value: 1 | |
| mutate_cheese: | |
| value: true | |
| net_cnn_type: | |
| value: large | |
| net_rnn_type: | |
| value: ff | |
| net_width: | |
| value: 256 | |
| num_cycles_per_big_eval: | |
| value: 1024 | |
| num_cycles_per_checkpoint: | |
| value: 64 | |
| num_cycles_per_eval: | |
| value: 32 | |
| num_cycles_per_gifs: | |
| value: 1024 | |
| num_cycles_per_log: | |
| value: 32 | |
| num_env_steps_per_cycle: | |
| value: 128 | |
| num_epochs_per_cycle: | |
| value: 5 | |
| num_minibatches_per_epoch: | |
| value: 4 | |
| num_mutate_steps: | |
| value: 12 | |
| num_parallel_envs: | |
| value: 256 | |
| num_total_env_steps: | |
| value: 200000000 | |
| num_train_levels: | |
| value: 2048 | |
| obs_level_of_detail: | |
| value: 0 | |
| plr_buffer_size: | |
| value: 4096 | |
| plr_prob_replay: | |
| value: 0.5 | |
| plr_proxy_shaping: | |
| value: false | |
| plr_proxy_shaping_coeff: | |
| value: 0.5 | |
| plr_regret_estimator: | |
| value: maxmc-actor | |
| plr_robust: | |
| value: false | |
| plr_staleness_coeff: | |
| value: 0.1 | |
| plr_temperature: | |
| value: 0.1 | |
| ppo_clip_eps: | |
| value: 0.1 | |
| ppo_critic_coeff: | |
| value: 0.5 | |
| ppo_entropy_coeff: | |
| value: 0.001 | |
| ppo_gae_lambda: | |
| value: 0.95 | |
| ppo_gamma: | |
| value: 0.999 | |
| ppo_lr: | |
| value: 5e-05 | |
| ppo_lr_annealing: | |
| value: false | |
| ppo_max_grad_norm: | |
| value: 0.5 | |
| ppo_proxy_critic_coeff: | |
| value: 0.5 | |
| prob_mutate_shift: | |
| value: 0 | |
| prob_shift: | |
| value: 1 | |
| proxy_name: | |
| value: proxy_corner | |
| seed: | |
| value: 42 | |
| train_proxy_critic: | |
| value: false | |
| ued: | |
| value: plr | |
| use_fixed_eval_levels: | |
| value: false | |
| wall_prob: | |
| value: 0.7 | |