diff --git "a/sf_log.txt" "b/sf_log.txt" --- "a/sf_log.txt" +++ "b/sf_log.txt" @@ -6304,3 +6304,1550 @@ main_loop: 2230.9332 [2024-09-01 17:30:52,275][00194] Avg episode rewards: #0: 37.812, true rewards: #0: 14.512 [2024-09-01 17:30:52,278][00194] Avg episode reward: 37.812, avg true_objective: 14.512 [2024-09-01 17:32:32,823][00194] Replay video saved to /content/train_dir/default_experiment/replay.mp4! +[2024-09-01 17:32:35,863][00194] The model has been pushed to https://huggingface.co/jarski/rl_course_vizdoom_health_gathering_supreme +[2024-09-01 17:34:43,971][00194] Environment doom_basic already registered, overwriting... +[2024-09-01 17:34:43,975][00194] Environment doom_two_colors_easy already registered, overwriting... +[2024-09-01 17:34:43,979][00194] Environment doom_two_colors_hard already registered, overwriting... +[2024-09-01 17:34:43,981][00194] Environment doom_dm already registered, overwriting... +[2024-09-01 17:34:43,983][00194] Environment doom_dwango5 already registered, overwriting... +[2024-09-01 17:34:43,985][00194] Environment doom_my_way_home_flat_actions already registered, overwriting... +[2024-09-01 17:34:43,987][00194] Environment doom_defend_the_center_flat_actions already registered, overwriting... +[2024-09-01 17:34:43,989][00194] Environment doom_my_way_home already registered, overwriting... +[2024-09-01 17:34:43,991][00194] Environment doom_deadly_corridor already registered, overwriting... +[2024-09-01 17:34:43,994][00194] Environment doom_defend_the_center already registered, overwriting... +[2024-09-01 17:34:43,996][00194] Environment doom_defend_the_line already registered, overwriting... +[2024-09-01 17:34:43,998][00194] Environment doom_health_gathering already registered, overwriting... +[2024-09-01 17:34:44,000][00194] Environment doom_health_gathering_supreme already registered, overwriting... +[2024-09-01 17:34:44,001][00194] Environment doom_battle already registered, overwriting... +[2024-09-01 17:34:44,003][00194] Environment doom_battle2 already registered, overwriting... +[2024-09-01 17:34:44,005][00194] Environment doom_duel_bots already registered, overwriting... +[2024-09-01 17:34:44,008][00194] Environment doom_deathmatch_bots already registered, overwriting... +[2024-09-01 17:34:44,010][00194] Environment doom_duel already registered, overwriting... +[2024-09-01 17:34:44,012][00194] Environment doom_deathmatch_full already registered, overwriting... +[2024-09-01 17:34:44,014][00194] Environment doom_benchmark already registered, overwriting... +[2024-09-01 17:34:44,016][00194] register_encoder_factory: +[2024-09-01 17:34:44,036][00194] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-01 17:34:44,037][00194] Overriding arg 'train_for_env_steps' with value 10000000 passed from command line +[2024-09-01 17:34:44,053][00194] Experiment dir /content/train_dir/default_experiment already exists! +[2024-09-01 17:34:44,063][00194] Resuming existing experiment from /content/train_dir/default_experiment... +[2024-09-01 17:34:44,064][00194] Weights and Biases integration disabled +[2024-09-01 17:34:44,073][00194] Environment var CUDA_VISIBLE_DEVICES is +[2024-09-01 17:34:46,068][00194] Starting experiment with the following configuration: +help=False +algo=APPO +env=doom_health_gathering_supreme +experiment=default_experiment +train_dir=/content/train_dir +restart_behavior=resume +device=cpu +seed=None +num_policies=1 +async_rl=True +serial_mode=False +batched_sampling=False +num_batches_to_accumulate=2 +worker_num_splits=2 +policy_workers_per_policy=1 +max_policy_lag=1000 +num_workers=8 +num_envs_per_worker=4 +batch_size=1024 +num_batches_per_epoch=1 +num_epochs=1 +rollout=32 +recurrence=32 +shuffle_minibatches=False +gamma=0.99 +reward_scale=1.0 +reward_clip=1000.0 +value_bootstrap=False +normalize_returns=True +exploration_loss_coeff=0.001 +value_loss_coeff=0.5 +kl_loss_coeff=0.0 +exploration_loss=symmetric_kl +gae_lambda=0.95 +ppo_clip_ratio=0.1 +ppo_clip_value=0.2 +with_vtrace=False +vtrace_rho=1.0 +vtrace_c=1.0 +optimizer=adam +adam_eps=1e-06 +adam_beta1=0.9 +adam_beta2=0.999 +max_grad_norm=4.0 +learning_rate=0.0001 +lr_schedule=constant +lr_schedule_kl_threshold=0.008 +lr_adaptive_min=1e-06 +lr_adaptive_max=0.01 +obs_subtract_mean=0.0 +obs_scale=255.0 +normalize_input=True +normalize_input_keys=None +decorrelate_experience_max_seconds=0 +decorrelate_envs_on_one_worker=True +actor_worker_gpus=[] +set_workers_cpu_affinity=True +force_envs_single_thread=False +default_niceness=0 +log_to_file=True +experiment_summaries_interval=10 +flush_summaries_interval=30 +stats_avg=100 +summaries_use_frameskip=True +heartbeat_interval=20 +heartbeat_reporting_interval=600 +train_for_env_steps=10000000 +train_for_seconds=10000000000 +save_every_sec=120 +keep_checkpoints=2 +load_checkpoint_kind=latest +save_milestones_sec=-1 +save_best_every_sec=5 +save_best_metric=reward +save_best_after=100000 +benchmark=False +encoder_mlp_layers=[512, 512] +encoder_conv_architecture=convnet_simple +encoder_conv_mlp_layers=[512] +use_rnn=True +rnn_size=512 +rnn_type=gru +rnn_num_layers=1 +decoder_mlp_layers=[] +nonlinearity=elu +policy_initialization=orthogonal +policy_init_gain=1.0 +actor_critic_share_weights=True +adaptive_stddev=True +continuous_tanh_scale=0.0 +initial_stddev=1.0 +use_env_info_cache=False +env_gpu_actions=False +env_gpu_observations=True +env_frameskip=4 +env_framestack=1 +pixel_format=CHW +use_record_episode_statistics=False +with_wandb=False +wandb_user=None +wandb_project=sample_factory +wandb_group=None +wandb_job_type=SF +wandb_tags=[] +with_pbt=False +pbt_mix_policies_in_one_env=True +pbt_period_env_steps=5000000 +pbt_start_mutation=20000000 +pbt_replace_fraction=0.3 +pbt_mutation_rate=0.15 +pbt_replace_reward_gap=0.1 +pbt_replace_reward_gap_absolute=1e-06 +pbt_optimize_gamma=False +pbt_target_objective=true_objective +pbt_perturb_min=1.1 +pbt_perturb_max=1.5 +num_agents=-1 +num_humans=0 +num_bots=-1 +start_bot_difficulty=None +timelimit=None +res_w=128 +res_h=72 +wide_aspect_ratio=False +eval_env_frameskip=1 +fps=35 +command_line=--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --device=cpu --train_for_env_steps=4000000 +cli_args={'env': 'doom_health_gathering_supreme', 'device': 'cpu', 'num_workers': 8, 'num_envs_per_worker': 4, 'train_for_env_steps': 4000000} +git_hash=unknown +git_repo_name=not a git repository +[2024-09-01 17:34:46,072][00194] Saving configuration to /content/train_dir/default_experiment/config.json... +[2024-09-01 17:34:46,078][00194] Rollout worker 0 uses device cpu +[2024-09-01 17:34:46,080][00194] Rollout worker 1 uses device cpu +[2024-09-01 17:34:46,083][00194] Rollout worker 2 uses device cpu +[2024-09-01 17:34:46,087][00194] Rollout worker 3 uses device cpu +[2024-09-01 17:34:46,089][00194] Rollout worker 4 uses device cpu +[2024-09-01 17:34:46,090][00194] Rollout worker 5 uses device cpu +[2024-09-01 17:34:46,092][00194] Rollout worker 6 uses device cpu +[2024-09-01 17:34:46,093][00194] Rollout worker 7 uses device cpu +[2024-09-01 17:34:46,238][00194] InferenceWorker_p0-w0: min num requests: 2 +[2024-09-01 17:34:46,277][00194] Starting all processes... +[2024-09-01 17:34:46,278][00194] Starting process learner_proc0 +[2024-09-01 17:34:46,332][00194] Starting all processes... +[2024-09-01 17:34:46,340][00194] Starting process inference_proc0-0 +[2024-09-01 17:34:46,343][00194] Starting process rollout_proc0 +[2024-09-01 17:34:46,343][00194] Starting process rollout_proc1 +[2024-09-01 17:34:46,343][00194] Starting process rollout_proc2 +[2024-09-01 17:34:46,344][00194] Starting process rollout_proc3 +[2024-09-01 17:34:46,349][00194] Starting process rollout_proc4 +[2024-09-01 17:34:46,349][00194] Starting process rollout_proc5 +[2024-09-01 17:34:46,349][00194] Starting process rollout_proc6 +[2024-09-01 17:34:46,356][00194] Starting process rollout_proc7 +[2024-09-01 17:35:00,678][47745] Worker 4 uses CPU cores [0] +[2024-09-01 17:35:01,036][47749] Worker 7 uses CPU cores [1] +[2024-09-01 17:35:01,112][47728] Starting seed is not provided +[2024-09-01 17:35:01,114][47728] Initializing actor-critic model on device cpu +[2024-09-01 17:35:01,115][47728] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 17:35:01,118][47728] RunningMeanStd input shape: (1,) +[2024-09-01 17:35:01,190][47728] ConvEncoder: input_channels=3 +[2024-09-01 17:35:01,267][47744] Worker 2 uses CPU cores [0] +[2024-09-01 17:35:01,290][47742] Worker 0 uses CPU cores [0] +[2024-09-01 17:35:01,378][47748] Worker 6 uses CPU cores [0] +[2024-09-01 17:35:01,415][47743] Worker 1 uses CPU cores [1] +[2024-09-01 17:35:01,568][47746] Worker 3 uses CPU cores [1] +[2024-09-01 17:35:01,595][47747] Worker 5 uses CPU cores [1] +[2024-09-01 17:35:01,663][47728] Conv encoder output size: 512 +[2024-09-01 17:35:01,663][47728] Policy head output size: 512 +[2024-09-01 17:35:01,689][47728] Created Actor Critic model with architecture: +[2024-09-01 17:35:01,690][47728] ActorCriticSharedWeights( + (obs_normalizer): ObservationNormalizer( + (running_mean_std): RunningMeanStdDictInPlace( + (running_mean_std): ModuleDict( + (obs): RunningMeanStdInPlace() + ) + ) + ) + (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace) + (encoder): VizdoomEncoder( + (basic_encoder): ConvEncoder( + (enc): RecursiveScriptModule( + original_name=ConvEncoderImpl + (conv_head): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Conv2d) + (1): RecursiveScriptModule(original_name=ELU) + (2): RecursiveScriptModule(original_name=Conv2d) + (3): RecursiveScriptModule(original_name=ELU) + (4): RecursiveScriptModule(original_name=Conv2d) + (5): RecursiveScriptModule(original_name=ELU) + ) + (mlp_layers): RecursiveScriptModule( + original_name=Sequential + (0): RecursiveScriptModule(original_name=Linear) + (1): RecursiveScriptModule(original_name=ELU) + ) + ) + ) + ) + (core): ModelCoreRNN( + (core): GRU(512, 512) + ) + (decoder): MlpDecoder( + (mlp): Identity() + ) + (critic_linear): Linear(in_features=512, out_features=1, bias=True) + (action_parameterization): ActionParameterizationDefault( + (distribution_linear): Linear(in_features=512, out_features=5, bias=True) + ) +) +[2024-09-01 17:35:02,269][47728] Using optimizer +[2024-09-01 17:35:02,271][47728] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001956_8011776.pth... +[2024-09-01 17:35:02,317][47728] Loading model from checkpoint +[2024-09-01 17:35:02,342][47728] Loaded experiment state at self.train_step=1956, self.env_steps=8011776 +[2024-09-01 17:35:02,343][47728] Initialized policy 0 weights for model version 1956 +[2024-09-01 17:35:02,349][47741] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 17:35:02,351][47728] LearnerWorker_p0 finished initialization! +[2024-09-01 17:35:02,352][47741] RunningMeanStd input shape: (1,) +[2024-09-01 17:35:02,375][47741] ConvEncoder: input_channels=3 +[2024-09-01 17:35:02,542][47741] Conv encoder output size: 512 +[2024-09-01 17:35:02,544][47741] Policy head output size: 512 +[2024-09-01 17:35:02,569][00194] Inference worker 0-0 is ready! +[2024-09-01 17:35:02,571][00194] All inference workers are ready! Signal rollout workers to start! +[2024-09-01 17:35:02,682][47749] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,711][47743] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,717][47746] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,713][47747] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,717][47748] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,733][47745] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,745][47744] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:02,748][47742] Doom resolution: 160x120, resize resolution: (128, 72) +[2024-09-01 17:35:04,073][00194] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 8011776. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 17:35:04,181][47749] Decorrelating experience for 0 frames... +[2024-09-01 17:35:04,198][47747] Decorrelating experience for 0 frames... +[2024-09-01 17:35:04,196][47744] Decorrelating experience for 0 frames... +[2024-09-01 17:35:04,211][47746] Decorrelating experience for 0 frames... +[2024-09-01 17:35:04,209][47745] Decorrelating experience for 0 frames... +[2024-09-01 17:35:04,215][47742] Decorrelating experience for 0 frames... +[2024-09-01 17:35:05,153][47745] Decorrelating experience for 32 frames... +[2024-09-01 17:35:05,156][47744] Decorrelating experience for 32 frames... +[2024-09-01 17:35:05,490][47749] Decorrelating experience for 32 frames... +[2024-09-01 17:35:05,542][47746] Decorrelating experience for 32 frames... +[2024-09-01 17:35:05,625][47747] Decorrelating experience for 32 frames... +[2024-09-01 17:35:06,217][00194] Heartbeat connected on Batcher_0 +[2024-09-01 17:35:06,230][00194] Heartbeat connected on LearnerWorker_p0 +[2024-09-01 17:35:06,261][47749] Decorrelating experience for 64 frames... +[2024-09-01 17:35:06,286][00194] Heartbeat connected on InferenceWorker_p0-w0 +[2024-09-01 17:35:06,751][47742] Decorrelating experience for 32 frames... +[2024-09-01 17:35:06,972][47745] Decorrelating experience for 64 frames... +[2024-09-01 17:35:06,990][47744] Decorrelating experience for 64 frames... +[2024-09-01 17:35:07,222][47748] Decorrelating experience for 0 frames... +[2024-09-01 17:35:07,637][47742] Decorrelating experience for 64 frames... +[2024-09-01 17:35:07,752][47747] Decorrelating experience for 64 frames... +[2024-09-01 17:35:08,502][47749] Decorrelating experience for 96 frames... +[2024-09-01 17:35:08,563][47746] Decorrelating experience for 64 frames... +[2024-09-01 17:35:08,891][00194] Heartbeat connected on RolloutWorker_w7 +[2024-09-01 17:35:08,953][47743] Decorrelating experience for 0 frames... +[2024-09-01 17:35:09,078][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8011776. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 17:35:09,939][47742] Decorrelating experience for 96 frames... +[2024-09-01 17:35:10,501][00194] Heartbeat connected on RolloutWorker_w0 +[2024-09-01 17:35:11,259][47745] Decorrelating experience for 96 frames... +[2024-09-01 17:35:12,147][00194] Heartbeat connected on RolloutWorker_w4 +[2024-09-01 17:35:12,263][47746] Decorrelating experience for 96 frames... +[2024-09-01 17:35:12,468][47743] Decorrelating experience for 32 frames... +[2024-09-01 17:35:13,205][00194] Heartbeat connected on RolloutWorker_w3 +[2024-09-01 17:35:14,074][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8011776. Throughput: 0: 36.2. Samples: 362. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 17:35:14,080][00194] Avg episode reward: [(0, '0.435')] +[2024-09-01 17:35:14,145][47747] Decorrelating experience for 96 frames... +[2024-09-01 17:35:15,130][00194] Heartbeat connected on RolloutWorker_w5 +[2024-09-01 17:35:16,849][47748] Decorrelating experience for 32 frames... +[2024-09-01 17:35:17,911][47743] Decorrelating experience for 64 frames... +[2024-09-01 17:35:19,073][00194] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 8011776. Throughput: 0: 102.4. Samples: 1536. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0) +[2024-09-01 17:35:19,081][00194] Avg episode reward: [(0, '3.744')] +[2024-09-01 17:35:20,858][47728] Signal inference workers to stop experience collection... +[2024-09-01 17:35:20,896][47741] InferenceWorker_p0-w0: stopping experience collection +[2024-09-01 17:35:20,988][47744] Decorrelating experience for 96 frames... +[2024-09-01 17:35:21,148][00194] Heartbeat connected on RolloutWorker_w2 +[2024-09-01 17:35:21,180][47748] Decorrelating experience for 64 frames... +[2024-09-01 17:35:21,284][47743] Decorrelating experience for 96 frames... +[2024-09-01 17:35:21,385][00194] Heartbeat connected on RolloutWorker_w1 +[2024-09-01 17:35:21,850][47748] Decorrelating experience for 96 frames... +[2024-09-01 17:35:21,959][00194] Heartbeat connected on RolloutWorker_w6 +[2024-09-01 17:35:22,289][47728] Signal inference workers to resume experience collection... +[2024-09-01 17:35:22,289][47741] InferenceWorker_p0-w0: resuming experience collection +[2024-09-01 17:35:24,073][00194] Fps is (10 sec: 409.6, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 8015872. Throughput: 0: 137.8. Samples: 2756. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 17:35:24,076][00194] Avg episode reward: [(0, '4.928')] +[2024-09-01 17:35:29,092][00194] Fps is (10 sec: 817.7, 60 sec: 327.4, 300 sec: 327.4). Total num frames: 8019968. Throughput: 0: 142.9. Samples: 3576. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0) +[2024-09-01 17:35:29,097][00194] Avg episode reward: [(0, '6.433')] +[2024-09-01 17:35:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 409.6, 300 sec: 409.6). Total num frames: 8024064. Throughput: 0: 158.4. Samples: 4752. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:35:34,076][00194] Avg episode reward: [(0, '6.413')] +[2024-09-01 17:35:39,074][00194] Fps is (10 sec: 820.7, 60 sec: 468.1, 300 sec: 468.1). Total num frames: 8028160. Throughput: 0: 171.1. Samples: 5988. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:35:39,090][00194] Avg episode reward: [(0, '7.944')] +[2024-09-01 17:35:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 512.0, 300 sec: 512.0). Total num frames: 8032256. Throughput: 0: 166.9. Samples: 6678. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:35:44,077][00194] Avg episode reward: [(0, '8.864')] +[2024-09-01 17:35:49,073][00194] Fps is (10 sec: 819.3, 60 sec: 546.1, 300 sec: 546.1). Total num frames: 8036352. Throughput: 0: 181.3. Samples: 8160. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:35:49,077][00194] Avg episode reward: [(0, '9.813')] +[2024-09-01 17:35:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 573.4, 300 sec: 573.4). Total num frames: 8040448. Throughput: 0: 206.5. Samples: 9290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:35:54,075][00194] Avg episode reward: [(0, '11.407')] +[2024-09-01 17:35:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 595.8, 300 sec: 595.8). Total num frames: 8044544. Throughput: 0: 215.6. Samples: 10064. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:35:59,083][00194] Avg episode reward: [(0, '13.735')] +[2024-09-01 17:36:03,866][47741] Updated weights for policy 0, policy_version 1966 (0.2506) +[2024-09-01 17:36:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 682.7, 300 sec: 682.7). Total num frames: 8052736. Throughput: 0: 229.0. Samples: 11842. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:04,075][00194] Avg episode reward: [(0, '15.147')] +[2024-09-01 17:36:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 751.0, 300 sec: 693.2). Total num frames: 8056832. Throughput: 0: 223.6. Samples: 12820. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:36:09,080][00194] Avg episode reward: [(0, '17.124')] +[2024-09-01 17:36:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 702.2). Total num frames: 8060928. Throughput: 0: 221.3. Samples: 13532. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:36:14,076][00194] Avg episode reward: [(0, '17.111')] +[2024-09-01 17:36:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 710.0). Total num frames: 8065024. Throughput: 0: 225.3. Samples: 14890. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:36:19,075][00194] Avg episode reward: [(0, '19.737')] +[2024-09-01 17:36:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 716.8). Total num frames: 8069120. Throughput: 0: 238.0. Samples: 16696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:36:24,075][00194] Avg episode reward: [(0, '21.668')] +[2024-09-01 17:36:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.7, 300 sec: 722.8). Total num frames: 8073216. Throughput: 0: 228.7. Samples: 16968. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:29,080][00194] Avg episode reward: [(0, '22.343')] +[2024-09-01 17:36:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 728.2). Total num frames: 8077312. Throughput: 0: 222.9. Samples: 18192. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:34,081][00194] Avg episode reward: [(0, '24.009')] +[2024-09-01 17:36:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 776.1). Total num frames: 8085504. Throughput: 0: 237.8. Samples: 19992. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:39,076][00194] Avg episode reward: [(0, '24.846')] +[2024-09-01 17:36:44,076][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 778.2). Total num frames: 8089600. Throughput: 0: 237.9. Samples: 20770. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:44,079][00194] Avg episode reward: [(0, '24.920')] +[2024-09-01 17:36:49,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 741.2). Total num frames: 8089600. Throughput: 0: 223.4. Samples: 21896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:36:49,081][00194] Avg episode reward: [(0, '24.920')] +[2024-09-01 17:36:49,460][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001976_8093696.pth... +[2024-09-01 17:36:49,465][47741] Updated weights for policy 0, policy_version 1976 (0.1945) +[2024-09-01 17:36:49,579][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001941_7950336.pth +[2024-09-01 17:36:54,076][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 781.9). Total num frames: 8097792. Throughput: 0: 229.1. Samples: 23130. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:36:54,081][00194] Avg episode reward: [(0, '25.429')] +[2024-09-01 17:36:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 783.6). Total num frames: 8101888. Throughput: 0: 234.5. Samples: 24086. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:36:59,076][00194] Avg episode reward: [(0, '26.293')] +[2024-09-01 17:37:04,079][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 785.0). Total num frames: 8105984. Throughput: 0: 232.2. Samples: 25340. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:37:04,086][00194] Avg episode reward: [(0, '25.988')] +[2024-09-01 17:37:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 786.4). Total num frames: 8110080. Throughput: 0: 218.6. Samples: 26532. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:37:09,078][00194] Avg episode reward: [(0, '26.540')] +[2024-09-01 17:37:14,073][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 787.7). Total num frames: 8114176. Throughput: 0: 232.6. Samples: 27436. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:37:14,081][00194] Avg episode reward: [(0, '26.060')] +[2024-09-01 17:37:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 788.9). Total num frames: 8118272. Throughput: 0: 246.3. Samples: 29276. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:37:19,081][00194] Avg episode reward: [(0, '26.599')] +[2024-09-01 17:37:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 789.9). Total num frames: 8122368. Throughput: 0: 227.8. Samples: 30244. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:37:24,079][00194] Avg episode reward: [(0, '26.365')] +[2024-09-01 17:37:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 8130560. Throughput: 0: 223.6. Samples: 30830. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:37:29,080][00194] Avg episode reward: [(0, '26.365')] +[2024-09-01 17:37:32,238][47741] Updated weights for policy 0, policy_version 1986 (0.1956) +[2024-09-01 17:37:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 8134656. Throughput: 0: 234.1. Samples: 32430. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:37:34,081][00194] Avg episode reward: [(0, '26.898')] +[2024-09-01 17:37:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 8138752. Throughput: 0: 236.5. Samples: 33774. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:37:39,079][00194] Avg episode reward: [(0, '27.384')] +[2024-09-01 17:37:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 8142848. Throughput: 0: 227.5. Samples: 34322. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:37:44,076][00194] Avg episode reward: [(0, '28.181')] +[2024-09-01 17:37:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 819.2). Total num frames: 8146944. Throughput: 0: 237.6. Samples: 36030. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:37:49,082][00194] Avg episode reward: [(0, '29.622')] +[2024-09-01 17:37:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 8151040. Throughput: 0: 242.1. Samples: 37426. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:37:54,076][00194] Avg episode reward: [(0, '30.945')] +[2024-09-01 17:37:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 819.2). Total num frames: 8155136. Throughput: 0: 234.9. Samples: 38006. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:37:59,076][00194] Avg episode reward: [(0, '31.694')] +[2024-09-01 17:38:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 842.0). Total num frames: 8163328. Throughput: 0: 226.4. Samples: 39464. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:38:04,085][00194] Avg episode reward: [(0, '32.151')] +[2024-09-01 17:38:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 841.3). Total num frames: 8167424. Throughput: 0: 237.5. Samples: 40932. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:38:09,080][00194] Avg episode reward: [(0, '32.851')] +[2024-09-01 17:38:11,855][47728] Saving new best policy, reward=32.851! +[2024-09-01 17:38:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 840.8). Total num frames: 8171520. Throughput: 0: 236.7. Samples: 41480. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:14,079][00194] Avg episode reward: [(0, '32.978')] +[2024-09-01 17:38:17,536][47728] Saving new best policy, reward=32.978! +[2024-09-01 17:38:17,549][47741] Updated weights for policy 0, policy_version 1996 (0.1004) +[2024-09-01 17:38:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 840.2). Total num frames: 8175616. Throughput: 0: 224.2. Samples: 42518. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:19,077][00194] Avg episode reward: [(0, '32.800')] +[2024-09-01 17:38:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 839.7). Total num frames: 8179712. Throughput: 0: 236.0. Samples: 44394. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:24,082][00194] Avg episode reward: [(0, '31.144')] +[2024-09-01 17:38:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 839.2). Total num frames: 8183808. Throughput: 0: 236.1. Samples: 44946. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:29,076][00194] Avg episode reward: [(0, '30.769')] +[2024-09-01 17:38:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 838.7). Total num frames: 8187904. Throughput: 0: 230.0. Samples: 46380. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:34,076][00194] Avg episode reward: [(0, '30.520')] +[2024-09-01 17:38:39,081][00194] Fps is (10 sec: 1227.8, 60 sec: 955.6, 300 sec: 857.3). Total num frames: 8196096. Throughput: 0: 226.1. Samples: 47602. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:39,086][00194] Avg episode reward: [(0, '29.990')] +[2024-09-01 17:38:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 856.4). Total num frames: 8200192. Throughput: 0: 236.8. Samples: 48662. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:44,076][00194] Avg episode reward: [(0, '29.219')] +[2024-09-01 17:38:46,962][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002003_8204288.pth... +[2024-09-01 17:38:47,084][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001956_8011776.pth +[2024-09-01 17:38:49,073][00194] Fps is (10 sec: 819.8, 60 sec: 955.7, 300 sec: 855.6). Total num frames: 8204288. Throughput: 0: 230.5. Samples: 49836. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:49,081][00194] Avg episode reward: [(0, '28.558')] +[2024-09-01 17:38:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 854.8). Total num frames: 8208384. Throughput: 0: 221.8. Samples: 50912. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:38:54,078][00194] Avg episode reward: [(0, '28.558')] +[2024-09-01 17:38:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 854.1). Total num frames: 8212480. Throughput: 0: 227.5. Samples: 51718. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:38:59,077][00194] Avg episode reward: [(0, '28.676')] +[2024-09-01 17:39:00,708][47741] Updated weights for policy 0, policy_version 2006 (0.0984) +[2024-09-01 17:39:02,980][47728] Signal inference workers to stop experience collection... (50 times) +[2024-09-01 17:39:03,022][47741] InferenceWorker_p0-w0: stopping experience collection (50 times) +[2024-09-01 17:39:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 853.3). Total num frames: 8216576. Throughput: 0: 245.3. Samples: 53556. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:04,078][00194] Avg episode reward: [(0, '28.719')] +[2024-09-01 17:39:04,605][47728] Signal inference workers to resume experience collection... (50 times) +[2024-09-01 17:39:04,605][47741] InferenceWorker_p0-w0: resuming experience collection (50 times) +[2024-09-01 17:39:09,076][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 852.6). Total num frames: 8220672. Throughput: 0: 229.5. Samples: 54724. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:09,082][00194] Avg episode reward: [(0, '28.698')] +[2024-09-01 17:39:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 852.0). Total num frames: 8224768. Throughput: 0: 225.4. Samples: 55088. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:14,076][00194] Avg episode reward: [(0, '29.010')] +[2024-09-01 17:39:19,073][00194] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 867.4). Total num frames: 8232960. Throughput: 0: 232.0. Samples: 56822. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:39:19,076][00194] Avg episode reward: [(0, '28.808')] +[2024-09-01 17:39:24,079][00194] Fps is (10 sec: 1228.1, 60 sec: 955.6, 300 sec: 866.4). Total num frames: 8237056. Throughput: 0: 235.2. Samples: 58184. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:24,083][00194] Avg episode reward: [(0, '29.338')] +[2024-09-01 17:39:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 865.6). Total num frames: 8241152. Throughput: 0: 226.0. Samples: 58832. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:29,076][00194] Avg episode reward: [(0, '29.945')] +[2024-09-01 17:39:34,073][00194] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 864.7). Total num frames: 8245248. Throughput: 0: 225.4. Samples: 59980. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:39:34,076][00194] Avg episode reward: [(0, '29.800')] +[2024-09-01 17:39:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 863.9). Total num frames: 8249344. Throughput: 0: 241.8. Samples: 61794. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:39:39,076][00194] Avg episode reward: [(0, '28.606')] +[2024-09-01 17:39:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 863.1). Total num frames: 8253440. Throughput: 0: 238.6. Samples: 62454. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:44,076][00194] Avg episode reward: [(0, '28.662')] +[2024-09-01 17:39:45,294][47741] Updated weights for policy 0, policy_version 2016 (0.0524) +[2024-09-01 17:39:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 862.3). Total num frames: 8257536. Throughput: 0: 221.2. Samples: 63512. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:49,079][00194] Avg episode reward: [(0, '29.399')] +[2024-09-01 17:39:54,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 875.7). Total num frames: 8265728. Throughput: 0: 228.1. Samples: 64990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:39:54,082][00194] Avg episode reward: [(0, '28.389')] +[2024-09-01 17:39:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 874.7). Total num frames: 8269824. Throughput: 0: 241.8. Samples: 65970. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:39:59,076][00194] Avg episode reward: [(0, '28.925')] +[2024-09-01 17:40:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 888.6). Total num frames: 8273920. Throughput: 0: 226.4. Samples: 67010. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:04,080][00194] Avg episode reward: [(0, '28.617')] +[2024-09-01 17:40:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 902.5). Total num frames: 8278016. Throughput: 0: 224.3. Samples: 68276. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:40:09,081][00194] Avg episode reward: [(0, '28.685')] +[2024-09-01 17:40:14,074][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 8282112. Throughput: 0: 227.1. Samples: 69050. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:40:14,077][00194] Avg episode reward: [(0, '28.195')] +[2024-09-01 17:40:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8286208. Throughput: 0: 236.0. Samples: 70602. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:19,076][00194] Avg episode reward: [(0, '28.054')] +[2024-09-01 17:40:24,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8290304. Throughput: 0: 222.0. Samples: 71786. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:24,076][00194] Avg episode reward: [(0, '27.097')] +[2024-09-01 17:40:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8294400. Throughput: 0: 217.0. Samples: 72218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:29,083][00194] Avg episode reward: [(0, '27.508')] +[2024-09-01 17:40:29,848][47741] Updated weights for policy 0, policy_version 2026 (0.2936) +[2024-09-01 17:40:34,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8302592. Throughput: 0: 238.9. Samples: 74264. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:34,079][00194] Avg episode reward: [(0, '27.500')] +[2024-09-01 17:40:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8306688. Throughput: 0: 227.7. Samples: 75236. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:39,075][00194] Avg episode reward: [(0, '28.245')] +[2024-09-01 17:40:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8310784. Throughput: 0: 221.9. Samples: 75956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:44,081][00194] Avg episode reward: [(0, '28.808')] +[2024-09-01 17:40:47,596][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002030_8314880.pth... +[2024-09-01 17:40:47,712][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000001976_8093696.pth +[2024-09-01 17:40:49,074][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8314880. Throughput: 0: 228.5. Samples: 77294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:49,081][00194] Avg episode reward: [(0, '28.897')] +[2024-09-01 17:40:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8318976. Throughput: 0: 236.5. Samples: 78918. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:54,079][00194] Avg episode reward: [(0, '29.124')] +[2024-09-01 17:40:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8323072. Throughput: 0: 228.7. Samples: 79342. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:40:59,079][00194] Avg episode reward: [(0, '29.053')] +[2024-09-01 17:41:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8327168. Throughput: 0: 224.0. Samples: 80684. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:41:04,077][00194] Avg episode reward: [(0, '29.066')] +[2024-09-01 17:41:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8331264. Throughput: 0: 236.7. Samples: 82438. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:41:09,080][00194] Avg episode reward: [(0, '29.593')] +[2024-09-01 17:41:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8335360. Throughput: 0: 240.9. Samples: 83058. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:41:14,076][00194] Avg episode reward: [(0, '29.257')] +[2024-09-01 17:41:14,433][47741] Updated weights for policy 0, policy_version 2036 (0.2112) +[2024-09-01 17:41:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8339456. Throughput: 0: 221.8. Samples: 84246. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:41:19,078][00194] Avg episode reward: [(0, '29.091')] +[2024-09-01 17:41:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8347648. Throughput: 0: 228.8. Samples: 85534. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:24,075][00194] Avg episode reward: [(0, '29.140')] +[2024-09-01 17:41:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8351744. Throughput: 0: 235.5. Samples: 86552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:29,078][00194] Avg episode reward: [(0, '28.605')] +[2024-09-01 17:41:34,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 8355840. Throughput: 0: 231.9. Samples: 87732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:34,081][00194] Avg episode reward: [(0, '27.803')] +[2024-09-01 17:41:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8359936. Throughput: 0: 221.4. Samples: 88882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:39,081][00194] Avg episode reward: [(0, '27.708')] +[2024-09-01 17:41:44,073][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8364032. Throughput: 0: 232.6. Samples: 89810. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:44,080][00194] Avg episode reward: [(0, '27.955')] +[2024-09-01 17:41:49,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8368128. Throughput: 0: 243.6. Samples: 91646. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:49,077][00194] Avg episode reward: [(0, '28.333')] +[2024-09-01 17:41:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8372224. Throughput: 0: 224.8. Samples: 92552. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:54,076][00194] Avg episode reward: [(0, '28.403')] +[2024-09-01 17:41:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8376320. Throughput: 0: 225.5. Samples: 93206. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:41:59,078][00194] Avg episode reward: [(0, '28.141')] +[2024-09-01 17:41:59,104][47741] Updated weights for policy 0, policy_version 2046 (0.0538) +[2024-09-01 17:42:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8384512. Throughput: 0: 233.2. Samples: 94740. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:04,077][00194] Avg episode reward: [(0, '27.790')] +[2024-09-01 17:42:09,078][00194] Fps is (10 sec: 1228.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8388608. Throughput: 0: 233.7. Samples: 96050. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:09,082][00194] Avg episode reward: [(0, '27.775')] +[2024-09-01 17:42:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8392704. Throughput: 0: 226.2. Samples: 96732. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:14,076][00194] Avg episode reward: [(0, '27.054')] +[2024-09-01 17:42:19,073][00194] Fps is (10 sec: 819.6, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8396800. Throughput: 0: 226.8. Samples: 97938. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:19,076][00194] Avg episode reward: [(0, '27.615')] +[2024-09-01 17:42:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8400896. Throughput: 0: 243.1. Samples: 99820. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:24,076][00194] Avg episode reward: [(0, '28.009')] +[2024-09-01 17:42:29,077][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 8404992. Throughput: 0: 229.7. Samples: 100146. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:42:29,086][00194] Avg episode reward: [(0, '27.523')] +[2024-09-01 17:42:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8409088. Throughput: 0: 217.7. Samples: 101442. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:42:34,082][00194] Avg episode reward: [(0, '27.837')] +[2024-09-01 17:42:39,073][00194] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8417280. Throughput: 0: 233.2. Samples: 103044. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:42:39,075][00194] Avg episode reward: [(0, '27.536')] +[2024-09-01 17:42:42,661][47741] Updated weights for policy 0, policy_version 2056 (0.0997) +[2024-09-01 17:42:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8421376. Throughput: 0: 237.2. Samples: 103882. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:42:44,077][00194] Avg episode reward: [(0, '27.613')] +[2024-09-01 17:42:46,930][47728] Signal inference workers to stop experience collection... (100 times) +[2024-09-01 17:42:47,001][47741] InferenceWorker_p0-w0: stopping experience collection (100 times) +[2024-09-01 17:42:48,343][47728] Signal inference workers to resume experience collection... (100 times) +[2024-09-01 17:42:48,345][47741] InferenceWorker_p0-w0: resuming experience collection (100 times) +[2024-09-01 17:42:48,355][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002057_8425472.pth... +[2024-09-01 17:42:48,470][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002003_8204288.pth +[2024-09-01 17:42:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8425472. Throughput: 0: 225.7. Samples: 104896. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:42:49,078][00194] Avg episode reward: [(0, '27.437')] +[2024-09-01 17:42:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8429568. Throughput: 0: 233.2. Samples: 106542. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:42:54,077][00194] Avg episode reward: [(0, '27.518')] +[2024-09-01 17:42:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 8433664. Throughput: 0: 234.2. Samples: 107272. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:42:59,084][00194] Avg episode reward: [(0, '27.548')] +[2024-09-01 17:43:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8437760. Throughput: 0: 235.5. Samples: 108536. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:43:04,079][00194] Avg episode reward: [(0, '27.638')] +[2024-09-01 17:43:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8441856. Throughput: 0: 227.4. Samples: 110054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:43:09,081][00194] Avg episode reward: [(0, '27.944')] +[2024-09-01 17:43:14,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8450048. Throughput: 0: 233.7. Samples: 110660. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:43:14,076][00194] Avg episode reward: [(0, '27.805')] +[2024-09-01 17:43:19,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8454144. Throughput: 0: 237.1. Samples: 112112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:43:19,078][00194] Avg episode reward: [(0, '27.478')] +[2024-09-01 17:43:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8458240. Throughput: 0: 224.4. Samples: 113140. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:43:24,081][00194] Avg episode reward: [(0, '27.109')] +[2024-09-01 17:43:27,810][47741] Updated weights for policy 0, policy_version 2066 (0.0994) +[2024-09-01 17:43:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8462336. Throughput: 0: 227.8. Samples: 114132. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:43:29,076][00194] Avg episode reward: [(0, '28.299')] +[2024-09-01 17:43:34,079][00194] Fps is (10 sec: 818.7, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 8466432. Throughput: 0: 238.6. Samples: 115636. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:43:34,084][00194] Avg episode reward: [(0, '28.297')] +[2024-09-01 17:43:39,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 8470528. Throughput: 0: 231.7. Samples: 116968. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:43:39,083][00194] Avg episode reward: [(0, '28.267')] +[2024-09-01 17:43:44,073][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8474624. Throughput: 0: 222.4. Samples: 117278. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:43:44,080][00194] Avg episode reward: [(0, '28.089')] +[2024-09-01 17:43:49,073][00194] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8482816. Throughput: 0: 237.7. Samples: 119232. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:43:49,082][00194] Avg episode reward: [(0, '27.878')] +[2024-09-01 17:43:54,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8486912. Throughput: 0: 231.0. Samples: 120448. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:43:54,081][00194] Avg episode reward: [(0, '27.970')] +[2024-09-01 17:43:59,075][00194] Fps is (10 sec: 819.0, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8491008. Throughput: 0: 233.6. Samples: 121174. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:43:59,088][00194] Avg episode reward: [(0, '27.649')] +[2024-09-01 17:44:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8495104. Throughput: 0: 227.6. Samples: 122354. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:44:04,081][00194] Avg episode reward: [(0, '27.529')] +[2024-09-01 17:44:09,073][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8499200. Throughput: 0: 246.3. Samples: 124224. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:44:09,082][00194] Avg episode reward: [(0, '27.160')] +[2024-09-01 17:44:10,731][47741] Updated weights for policy 0, policy_version 2076 (0.1024) +[2024-09-01 17:44:14,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 8503296. Throughput: 0: 234.5. Samples: 124684. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:44:14,085][00194] Avg episode reward: [(0, '27.277')] +[2024-09-01 17:44:19,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8507392. Throughput: 0: 222.7. Samples: 125656. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:19,076][00194] Avg episode reward: [(0, '27.712')] +[2024-09-01 17:44:24,073][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8511488. Throughput: 0: 234.9. Samples: 127538. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:24,083][00194] Avg episode reward: [(0, '28.255')] +[2024-09-01 17:44:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8519680. Throughput: 0: 245.5. Samples: 128324. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:44:29,078][00194] Avg episode reward: [(0, '28.103')] +[2024-09-01 17:44:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8523776. Throughput: 0: 229.8. Samples: 129572. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:34,079][00194] Avg episode reward: [(0, '28.428')] +[2024-09-01 17:44:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8527872. Throughput: 0: 228.4. Samples: 130728. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:39,076][00194] Avg episode reward: [(0, '28.993')] +[2024-09-01 17:44:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8531968. Throughput: 0: 231.5. Samples: 131592. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:44,080][00194] Avg episode reward: [(0, '29.369')] +[2024-09-01 17:44:45,596][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002084_8536064.pth... +[2024-09-01 17:44:45,702][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002030_8314880.pth +[2024-09-01 17:44:49,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8536064. Throughput: 0: 242.1. Samples: 133248. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:49,077][00194] Avg episode reward: [(0, '29.197')] +[2024-09-01 17:44:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8540160. Throughput: 0: 222.0. Samples: 134216. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:54,079][00194] Avg episode reward: [(0, '29.067')] +[2024-09-01 17:44:56,478][47741] Updated weights for policy 0, policy_version 2086 (0.2080) +[2024-09-01 17:44:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8544256. Throughput: 0: 221.9. Samples: 134670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:44:59,077][00194] Avg episode reward: [(0, '28.834')] +[2024-09-01 17:45:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8552448. Throughput: 0: 244.7. Samples: 136668. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:04,082][00194] Avg episode reward: [(0, '28.215')] +[2024-09-01 17:45:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8556544. Throughput: 0: 225.3. Samples: 137676. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:09,078][00194] Avg episode reward: [(0, '28.374')] +[2024-09-01 17:45:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8560640. Throughput: 0: 222.1. Samples: 138320. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:14,075][00194] Avg episode reward: [(0, '29.109')] +[2024-09-01 17:45:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8564736. Throughput: 0: 225.9. Samples: 139738. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:19,084][00194] Avg episode reward: [(0, '29.334')] +[2024-09-01 17:45:24,079][00194] Fps is (10 sec: 818.8, 60 sec: 955.6, 300 sec: 930.3). Total num frames: 8568832. Throughput: 0: 241.7. Samples: 141606. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:24,089][00194] Avg episode reward: [(0, '30.070')] +[2024-09-01 17:45:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8572928. Throughput: 0: 228.0. Samples: 141850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:45:29,076][00194] Avg episode reward: [(0, '29.908')] +[2024-09-01 17:45:34,073][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8577024. Throughput: 0: 221.6. Samples: 143218. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:45:34,082][00194] Avg episode reward: [(0, '29.328')] +[2024-09-01 17:45:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8585216. Throughput: 0: 236.2. Samples: 144844. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:45:39,076][00194] Avg episode reward: [(0, '29.064')] +[2024-09-01 17:45:39,505][47741] Updated weights for policy 0, policy_version 2096 (0.1945) +[2024-09-01 17:45:44,075][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8589312. Throughput: 0: 243.1. Samples: 145608. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:45:44,080][00194] Avg episode reward: [(0, '29.126')] +[2024-09-01 17:45:49,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8589312. Throughput: 0: 224.9. Samples: 146790. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:45:49,084][00194] Avg episode reward: [(0, '29.148')] +[2024-09-01 17:45:54,076][00194] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8597504. Throughput: 0: 233.8. Samples: 148198. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:54,087][00194] Avg episode reward: [(0, '29.209')] +[2024-09-01 17:45:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8601600. Throughput: 0: 236.6. Samples: 148968. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:45:59,076][00194] Avg episode reward: [(0, '29.140')] +[2024-09-01 17:46:04,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8605696. Throughput: 0: 234.8. Samples: 150304. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:04,082][00194] Avg episode reward: [(0, '28.622')] +[2024-09-01 17:46:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8609792. Throughput: 0: 219.6. Samples: 151488. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:09,075][00194] Avg episode reward: [(0, '28.744')] +[2024-09-01 17:46:14,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8613888. Throughput: 0: 230.2. Samples: 152210. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:14,076][00194] Avg episode reward: [(0, '29.571')] +[2024-09-01 17:46:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8617984. Throughput: 0: 242.9. Samples: 154148. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:19,076][00194] Avg episode reward: [(0, '28.882')] +[2024-09-01 17:46:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8622080. Throughput: 0: 228.8. Samples: 155138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:24,083][00194] Avg episode reward: [(0, '28.514')] +[2024-09-01 17:46:25,234][47741] Updated weights for policy 0, policy_version 2106 (0.1972) +[2024-09-01 17:46:27,556][47728] Signal inference workers to stop experience collection... (150 times) +[2024-09-01 17:46:27,596][47741] InferenceWorker_p0-w0: stopping experience collection (150 times) +[2024-09-01 17:46:28,461][47728] Signal inference workers to resume experience collection... (150 times) +[2024-09-01 17:46:28,462][47741] InferenceWorker_p0-w0: resuming experience collection (150 times) +[2024-09-01 17:46:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8630272. Throughput: 0: 225.1. Samples: 155736. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:29,082][00194] Avg episode reward: [(0, '28.457')] +[2024-09-01 17:46:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8634368. Throughput: 0: 235.5. Samples: 157388. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:34,078][00194] Avg episode reward: [(0, '28.078')] +[2024-09-01 17:46:39,079][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 930.3). Total num frames: 8638464. Throughput: 0: 229.1. Samples: 158510. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:39,082][00194] Avg episode reward: [(0, '28.232')] +[2024-09-01 17:46:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8642560. Throughput: 0: 227.2. Samples: 159194. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:44,076][00194] Avg episode reward: [(0, '27.772')] +[2024-09-01 17:46:45,914][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002111_8646656.pth... +[2024-09-01 17:46:46,010][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002057_8425472.pth +[2024-09-01 17:46:49,073][00194] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8646656. Throughput: 0: 236.6. Samples: 160950. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:49,082][00194] Avg episode reward: [(0, '27.768')] +[2024-09-01 17:46:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8650752. Throughput: 0: 239.2. Samples: 162254. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:54,080][00194] Avg episode reward: [(0, '27.986')] +[2024-09-01 17:46:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8654848. Throughput: 0: 236.0. Samples: 162828. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:46:59,076][00194] Avg episode reward: [(0, '28.048')] +[2024-09-01 17:47:04,075][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8663040. Throughput: 0: 226.7. Samples: 164350. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:04,084][00194] Avg episode reward: [(0, '28.526')] +[2024-09-01 17:47:07,711][47741] Updated weights for policy 0, policy_version 2116 (0.1196) +[2024-09-01 17:47:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8667136. Throughput: 0: 235.0. Samples: 165714. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:09,081][00194] Avg episode reward: [(0, '28.320')] +[2024-09-01 17:47:14,073][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8671232. Throughput: 0: 236.3. Samples: 166370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:14,078][00194] Avg episode reward: [(0, '28.787')] +[2024-09-01 17:47:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8675328. Throughput: 0: 222.0. Samples: 167378. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:19,076][00194] Avg episode reward: [(0, '27.880')] +[2024-09-01 17:47:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8679424. Throughput: 0: 240.2. Samples: 169318. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:24,078][00194] Avg episode reward: [(0, '27.446')] +[2024-09-01 17:47:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8683520. Throughput: 0: 235.2. Samples: 169776. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:29,080][00194] Avg episode reward: [(0, '27.774')] +[2024-09-01 17:47:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8687616. Throughput: 0: 226.0. Samples: 171122. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:34,076][00194] Avg episode reward: [(0, '27.756')] +[2024-09-01 17:47:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 8691712. Throughput: 0: 229.8. Samples: 172596. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:39,086][00194] Avg episode reward: [(0, '27.583')] +[2024-09-01 17:47:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8699904. Throughput: 0: 232.7. Samples: 173300. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:44,076][00194] Avg episode reward: [(0, '27.100')] +[2024-09-01 17:47:49,074][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8704000. Throughput: 0: 228.0. Samples: 174612. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:47:49,081][00194] Avg episode reward: [(0, '26.435')] +[2024-09-01 17:47:53,501][47741] Updated weights for policy 0, policy_version 2126 (0.1473) +[2024-09-01 17:47:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8708096. Throughput: 0: 221.4. Samples: 175676. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:47:54,080][00194] Avg episode reward: [(0, '26.903')] +[2024-09-01 17:47:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8712192. Throughput: 0: 228.8. Samples: 176666. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:47:59,081][00194] Avg episode reward: [(0, '25.674')] +[2024-09-01 17:48:04,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8716288. Throughput: 0: 244.0. Samples: 178360. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:48:04,076][00194] Avg episode reward: [(0, '26.020')] +[2024-09-01 17:48:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8720384. Throughput: 0: 227.0. Samples: 179532. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:48:09,076][00194] Avg episode reward: [(0, '26.615')] +[2024-09-01 17:48:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8724480. Throughput: 0: 224.8. Samples: 179894. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:48:14,082][00194] Avg episode reward: [(0, '27.573')] +[2024-09-01 17:48:19,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8732672. Throughput: 0: 239.5. Samples: 181900. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:48:19,079][00194] Avg episode reward: [(0, '27.229')] +[2024-09-01 17:48:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8736768. Throughput: 0: 232.2. Samples: 183044. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 17:48:24,078][00194] Avg episode reward: [(0, '27.459')] +[2024-09-01 17:48:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8740864. Throughput: 0: 232.8. Samples: 183776. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 17:48:29,079][00194] Avg episode reward: [(0, '27.559')] +[2024-09-01 17:48:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8744960. Throughput: 0: 230.8. Samples: 184996. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 17:48:34,077][00194] Avg episode reward: [(0, '27.768')] +[2024-09-01 17:48:37,069][47741] Updated weights for policy 0, policy_version 2136 (0.1021) +[2024-09-01 17:48:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8749056. Throughput: 0: 244.8. Samples: 186692. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 17:48:39,078][00194] Avg episode reward: [(0, '27.537')] +[2024-09-01 17:48:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8753152. Throughput: 0: 235.1. Samples: 187244. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 17:48:44,080][00194] Avg episode reward: [(0, '27.537')] +[2024-09-01 17:48:46,362][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002138_8757248.pth... +[2024-09-01 17:48:46,512][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002084_8536064.pth +[2024-09-01 17:48:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8757248. Throughput: 0: 221.6. Samples: 188332. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 17:48:49,076][00194] Avg episode reward: [(0, '27.440')] +[2024-09-01 17:48:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8761344. Throughput: 0: 234.0. Samples: 190064. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:48:54,083][00194] Avg episode reward: [(0, '27.980')] +[2024-09-01 17:48:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8769536. Throughput: 0: 246.8. Samples: 191000. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:48:59,077][00194] Avg episode reward: [(0, '28.400')] +[2024-09-01 17:49:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8769536. Throughput: 0: 223.1. Samples: 191940. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:49:04,081][00194] Avg episode reward: [(0, '28.569')] +[2024-09-01 17:49:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8777728. Throughput: 0: 225.1. Samples: 193174. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:49:09,080][00194] Avg episode reward: [(0, '28.653')] +[2024-09-01 17:49:14,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8781824. Throughput: 0: 230.7. Samples: 194156. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:49:14,076][00194] Avg episode reward: [(0, '28.217')] +[2024-09-01 17:49:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8785920. Throughput: 0: 236.4. Samples: 195632. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:19,081][00194] Avg episode reward: [(0, '28.416')] +[2024-09-01 17:49:21,535][47741] Updated weights for policy 0, policy_version 2146 (0.2257) +[2024-09-01 17:49:24,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8790016. Throughput: 0: 223.9. Samples: 196766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:24,077][00194] Avg episode reward: [(0, '28.271')] +[2024-09-01 17:49:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8794112. Throughput: 0: 228.2. Samples: 197512. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:29,081][00194] Avg episode reward: [(0, '27.301')] +[2024-09-01 17:49:34,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8802304. Throughput: 0: 243.7. Samples: 199300. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:34,079][00194] Avg episode reward: [(0, '26.947')] +[2024-09-01 17:49:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8806400. Throughput: 0: 226.1. Samples: 200240. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:39,079][00194] Avg episode reward: [(0, '26.947')] +[2024-09-01 17:49:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8810496. Throughput: 0: 221.2. Samples: 200954. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:44,076][00194] Avg episode reward: [(0, '27.199')] +[2024-09-01 17:49:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8814592. Throughput: 0: 234.3. Samples: 202482. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:49,076][00194] Avg episode reward: [(0, '27.960')] +[2024-09-01 17:49:54,077][00194] Fps is (10 sec: 818.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8818688. Throughput: 0: 237.5. Samples: 203862. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:54,085][00194] Avg episode reward: [(0, '28.228')] +[2024-09-01 17:49:59,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 8822784. Throughput: 0: 227.4. Samples: 204390. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:49:59,082][00194] Avg episode reward: [(0, '28.568')] +[2024-09-01 17:50:04,073][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 8826880. Throughput: 0: 222.7. Samples: 205654. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:04,079][00194] Avg episode reward: [(0, '29.417')] +[2024-09-01 17:50:05,233][47741] Updated weights for policy 0, policy_version 2156 (0.1494) +[2024-09-01 17:50:07,590][47728] Signal inference workers to stop experience collection... (200 times) +[2024-09-01 17:50:07,690][47741] InferenceWorker_p0-w0: stopping experience collection (200 times) +[2024-09-01 17:50:09,055][47728] Signal inference workers to resume experience collection... (200 times) +[2024-09-01 17:50:09,057][47741] InferenceWorker_p0-w0: resuming experience collection (200 times) +[2024-09-01 17:50:09,073][00194] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8835072. Throughput: 0: 237.3. Samples: 207442. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:09,076][00194] Avg episode reward: [(0, '29.478')] +[2024-09-01 17:50:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8835072. Throughput: 0: 238.4. Samples: 208240. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:14,077][00194] Avg episode reward: [(0, '28.928')] +[2024-09-01 17:50:19,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8839168. Throughput: 0: 221.2. Samples: 209254. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:19,082][00194] Avg episode reward: [(0, '28.281')] +[2024-09-01 17:50:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8847360. Throughput: 0: 234.2. Samples: 210780. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:24,080][00194] Avg episode reward: [(0, '28.533')] +[2024-09-01 17:50:29,075][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8851456. Throughput: 0: 234.6. Samples: 211510. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:29,081][00194] Avg episode reward: [(0, '28.379')] +[2024-09-01 17:50:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8855552. Throughput: 0: 226.9. Samples: 212694. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:34,080][00194] Avg episode reward: [(0, '28.409')] +[2024-09-01 17:50:39,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8859648. Throughput: 0: 231.6. Samples: 214282. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:39,078][00194] Avg episode reward: [(0, '27.912')] +[2024-09-01 17:50:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8863744. Throughput: 0: 235.4. Samples: 214984. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:44,081][00194] Avg episode reward: [(0, '28.844')] +[2024-09-01 17:50:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8867840. Throughput: 0: 244.4. Samples: 216652. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:49,076][00194] Avg episode reward: [(0, '28.827')] +[2024-09-01 17:50:49,466][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002166_8871936.pth... +[2024-09-01 17:50:49,470][47741] Updated weights for policy 0, policy_version 2166 (0.0041) +[2024-09-01 17:50:49,639][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002111_8646656.pth +[2024-09-01 17:50:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8871936. Throughput: 0: 226.8. Samples: 217646. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:54,076][00194] Avg episode reward: [(0, '29.740')] +[2024-09-01 17:50:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 8880128. Throughput: 0: 226.8. Samples: 218448. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:50:59,082][00194] Avg episode reward: [(0, '29.780')] +[2024-09-01 17:51:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8884224. Throughput: 0: 236.1. Samples: 219880. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:51:04,079][00194] Avg episode reward: [(0, '29.849')] +[2024-09-01 17:51:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8888320. Throughput: 0: 227.7. Samples: 221028. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:51:09,076][00194] Avg episode reward: [(0, '29.272')] +[2024-09-01 17:51:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8892416. Throughput: 0: 227.6. Samples: 221750. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:51:14,082][00194] Avg episode reward: [(0, '28.903')] +[2024-09-01 17:51:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8896512. Throughput: 0: 238.1. Samples: 223408. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:51:19,076][00194] Avg episode reward: [(0, '29.191')] +[2024-09-01 17:51:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8900608. Throughput: 0: 234.0. Samples: 224814. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:51:24,076][00194] Avg episode reward: [(0, '29.591')] +[2024-09-01 17:51:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8904704. Throughput: 0: 229.0. Samples: 225288. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:51:29,075][00194] Avg episode reward: [(0, '29.685')] +[2024-09-01 17:51:34,074][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8912896. Throughput: 0: 226.5. Samples: 226844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:51:34,082][00194] Avg episode reward: [(0, '29.773')] +[2024-09-01 17:51:34,477][47741] Updated weights for policy 0, policy_version 2176 (0.1454) +[2024-09-01 17:51:39,074][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8916992. Throughput: 0: 236.5. Samples: 228290. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:51:39,078][00194] Avg episode reward: [(0, '30.039')] +[2024-09-01 17:51:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8921088. Throughput: 0: 232.8. Samples: 228922. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:51:44,079][00194] Avg episode reward: [(0, '30.317')] +[2024-09-01 17:51:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8925184. Throughput: 0: 223.9. Samples: 229954. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:51:49,083][00194] Avg episode reward: [(0, '30.376')] +[2024-09-01 17:51:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8929280. Throughput: 0: 234.3. Samples: 231572. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:51:54,076][00194] Avg episode reward: [(0, '30.311')] +[2024-09-01 17:51:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8933376. Throughput: 0: 229.8. Samples: 232090. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:51:59,078][00194] Avg episode reward: [(0, '30.406')] +[2024-09-01 17:52:04,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8937472. Throughput: 0: 222.9. Samples: 233440. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:52:04,084][00194] Avg episode reward: [(0, '30.699')] +[2024-09-01 17:52:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8941568. Throughput: 0: 223.0. Samples: 234848. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:52:09,084][00194] Avg episode reward: [(0, '30.460')] +[2024-09-01 17:52:14,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8945664. Throughput: 0: 222.3. Samples: 235290. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 17:52:14,079][00194] Avg episode reward: [(0, '30.489')] +[2024-09-01 17:52:18,948][47741] Updated weights for policy 0, policy_version 2186 (0.1046) +[2024-09-01 17:52:19,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8953856. Throughput: 0: 228.2. Samples: 237114. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:52:19,085][00194] Avg episode reward: [(0, '30.118')] +[2024-09-01 17:52:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8953856. Throughput: 0: 217.3. Samples: 238070. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:52:24,079][00194] Avg episode reward: [(0, '30.606')] +[2024-09-01 17:52:29,074][00194] Fps is (10 sec: 819.1, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8962048. Throughput: 0: 222.6. Samples: 238938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 17:52:29,085][00194] Avg episode reward: [(0, '30.184')] +[2024-09-01 17:52:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 930.3). Total num frames: 8966144. Throughput: 0: 226.7. Samples: 240156. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:52:34,076][00194] Avg episode reward: [(0, '30.236')] +[2024-09-01 17:52:39,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8970240. Throughput: 0: 222.0. Samples: 241562. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:52:39,081][00194] Avg episode reward: [(0, '30.433')] +[2024-09-01 17:52:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8974336. Throughput: 0: 222.8. Samples: 242118. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:52:44,076][00194] Avg episode reward: [(0, '30.482')] +[2024-09-01 17:52:46,236][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002192_8978432.pth... +[2024-09-01 17:52:46,358][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002138_8757248.pth +[2024-09-01 17:52:49,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8978432. Throughput: 0: 228.4. Samples: 243720. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:52:49,079][00194] Avg episode reward: [(0, '30.506')] +[2024-09-01 17:52:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8982528. Throughput: 0: 230.7. Samples: 245228. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:52:54,076][00194] Avg episode reward: [(0, '29.454')] +[2024-09-01 17:52:59,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8986624. Throughput: 0: 230.8. Samples: 245676. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:52:59,080][00194] Avg episode reward: [(0, '29.401')] +[2024-09-01 17:53:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 8990720. Throughput: 0: 222.1. Samples: 247108. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:04,083][00194] Avg episode reward: [(0, '29.413')] +[2024-09-01 17:53:04,385][47741] Updated weights for policy 0, policy_version 2196 (0.0984) +[2024-09-01 17:53:09,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 8998912. Throughput: 0: 232.1. Samples: 248514. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:09,083][00194] Avg episode reward: [(0, '29.376')] +[2024-09-01 17:53:14,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9003008. Throughput: 0: 230.3. Samples: 249302. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:14,079][00194] Avg episode reward: [(0, '28.254')] +[2024-09-01 17:53:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9007104. Throughput: 0: 226.1. Samples: 250332. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:19,084][00194] Avg episode reward: [(0, '28.653')] +[2024-09-01 17:53:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9011200. Throughput: 0: 233.2. Samples: 252054. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:24,076][00194] Avg episode reward: [(0, '28.125')] +[2024-09-01 17:53:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9015296. Throughput: 0: 236.8. Samples: 252772. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:53:29,076][00194] Avg episode reward: [(0, '27.323')] +[2024-09-01 17:53:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9019392. Throughput: 0: 229.4. Samples: 254044. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:53:34,080][00194] Avg episode reward: [(0, '27.434')] +[2024-09-01 17:53:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9023488. Throughput: 0: 225.7. Samples: 255384. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:39,083][00194] Avg episode reward: [(0, '27.793')] +[2024-09-01 17:53:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9031680. Throughput: 0: 235.4. Samples: 256270. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:44,079][00194] Avg episode reward: [(0, '28.300')] +[2024-09-01 17:53:47,791][47741] Updated weights for policy 0, policy_version 2206 (0.0527) +[2024-09-01 17:53:49,075][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9035776. Throughput: 0: 231.1. Samples: 257510. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:49,079][00194] Avg episode reward: [(0, '28.630')] +[2024-09-01 17:53:51,967][47728] Signal inference workers to stop experience collection... (250 times) +[2024-09-01 17:53:52,041][47741] InferenceWorker_p0-w0: stopping experience collection (250 times) +[2024-09-01 17:53:53,579][47728] Signal inference workers to resume experience collection... (250 times) +[2024-09-01 17:53:53,581][47741] InferenceWorker_p0-w0: resuming experience collection (250 times) +[2024-09-01 17:53:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9039872. Throughput: 0: 222.8. Samples: 258538. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:54,076][00194] Avg episode reward: [(0, '28.255')] +[2024-09-01 17:53:59,075][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9043968. Throughput: 0: 228.3. Samples: 259578. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:53:59,078][00194] Avg episode reward: [(0, '28.007')] +[2024-09-01 17:54:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9048064. Throughput: 0: 241.5. Samples: 261200. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:54:04,078][00194] Avg episode reward: [(0, '27.672')] +[2024-09-01 17:54:09,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9052160. Throughput: 0: 224.9. Samples: 262176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:54:09,078][00194] Avg episode reward: [(0, '27.186')] +[2024-09-01 17:54:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9056256. Throughput: 0: 221.8. Samples: 262752. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:54:14,076][00194] Avg episode reward: [(0, '27.156')] +[2024-09-01 17:54:19,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9064448. Throughput: 0: 236.8. Samples: 264700. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:54:19,076][00194] Avg episode reward: [(0, '26.479')] +[2024-09-01 17:54:24,076][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9068544. Throughput: 0: 232.4. Samples: 265844. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:54:24,080][00194] Avg episode reward: [(0, '25.856')] +[2024-09-01 17:54:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9072640. Throughput: 0: 228.8. Samples: 266564. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:54:29,083][00194] Avg episode reward: [(0, '26.118')] +[2024-09-01 17:54:33,247][47741] Updated weights for policy 0, policy_version 2216 (0.1225) +[2024-09-01 17:54:34,073][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9076736. Throughput: 0: 227.7. Samples: 267758. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:54:34,081][00194] Avg episode reward: [(0, '26.107')] +[2024-09-01 17:54:39,075][00194] Fps is (10 sec: 819.0, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9080832. Throughput: 0: 243.5. Samples: 269496. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:54:39,078][00194] Avg episode reward: [(0, '25.984')] +[2024-09-01 17:54:44,079][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9084928. Throughput: 0: 231.4. Samples: 269990. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:54:44,087][00194] Avg episode reward: [(0, '25.637')] +[2024-09-01 17:54:46,478][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002219_9089024.pth... +[2024-09-01 17:54:46,594][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002166_8871936.pth +[2024-09-01 17:54:49,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9089024. Throughput: 0: 220.0. Samples: 271102. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:54:49,078][00194] Avg episode reward: [(0, '26.531')] +[2024-09-01 17:54:54,073][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9093120. Throughput: 0: 235.7. Samples: 272782. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:54:54,075][00194] Avg episode reward: [(0, '26.678')] +[2024-09-01 17:54:59,076][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9101312. Throughput: 0: 245.1. Samples: 273780. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:54:59,081][00194] Avg episode reward: [(0, '26.632')] +[2024-09-01 17:55:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9101312. Throughput: 0: 224.0. Samples: 274778. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:55:04,079][00194] Avg episode reward: [(0, '26.347')] +[2024-09-01 17:55:09,074][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9109504. Throughput: 0: 224.2. Samples: 275932. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:55:09,081][00194] Avg episode reward: [(0, '26.557')] +[2024-09-01 17:55:14,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9113600. Throughput: 0: 230.9. Samples: 276956. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:14,076][00194] Avg episode reward: [(0, '26.815')] +[2024-09-01 17:55:17,193][47741] Updated weights for policy 0, policy_version 2226 (0.1650) +[2024-09-01 17:55:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9117696. Throughput: 0: 230.7. Samples: 278140. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:19,082][00194] Avg episode reward: [(0, '26.721')] +[2024-09-01 17:55:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9121792. Throughput: 0: 219.4. Samples: 279370. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:24,083][00194] Avg episode reward: [(0, '27.443')] +[2024-09-01 17:55:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9125888. Throughput: 0: 225.0. Samples: 280112. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:29,078][00194] Avg episode reward: [(0, '27.041')] +[2024-09-01 17:55:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9129984. Throughput: 0: 239.2. Samples: 281864. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:34,076][00194] Avg episode reward: [(0, '26.704')] +[2024-09-01 17:55:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9134080. Throughput: 0: 230.0. Samples: 283134. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:55:39,080][00194] Avg episode reward: [(0, '25.751')] +[2024-09-01 17:55:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 9138176. Throughput: 0: 217.9. Samples: 283584. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:55:44,079][00194] Avg episode reward: [(0, '25.506')] +[2024-09-01 17:55:49,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9146368. Throughput: 0: 231.1. Samples: 285176. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:55:49,082][00194] Avg episode reward: [(0, '25.400')] +[2024-09-01 17:55:54,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9150464. Throughput: 0: 237.2. Samples: 286606. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:55:54,078][00194] Avg episode reward: [(0, '25.441')] +[2024-09-01 17:55:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9154560. Throughput: 0: 228.1. Samples: 287220. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:55:59,076][00194] Avg episode reward: [(0, '25.553')] +[2024-09-01 17:56:02,337][47741] Updated weights for policy 0, policy_version 2236 (0.2024) +[2024-09-01 17:56:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9158656. Throughput: 0: 227.6. Samples: 288380. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:04,075][00194] Avg episode reward: [(0, '25.467')] +[2024-09-01 17:56:09,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9162752. Throughput: 0: 242.8. Samples: 290294. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:09,077][00194] Avg episode reward: [(0, '25.422')] +[2024-09-01 17:56:14,079][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9166848. Throughput: 0: 235.7. Samples: 290720. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:14,087][00194] Avg episode reward: [(0, '25.323')] +[2024-09-01 17:56:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9170944. Throughput: 0: 224.1. Samples: 291948. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:19,080][00194] Avg episode reward: [(0, '25.586')] +[2024-09-01 17:56:24,073][00194] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9179136. Throughput: 0: 229.2. Samples: 293450. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:24,078][00194] Avg episode reward: [(0, '25.686')] +[2024-09-01 17:56:29,075][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9183232. Throughput: 0: 240.2. Samples: 294394. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:29,078][00194] Avg episode reward: [(0, '26.265')] +[2024-09-01 17:56:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9187328. Throughput: 0: 227.3. Samples: 295404. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:34,077][00194] Avg episode reward: [(0, '26.472')] +[2024-09-01 17:56:39,073][00194] Fps is (10 sec: 819.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9191424. Throughput: 0: 224.2. Samples: 296696. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:39,077][00194] Avg episode reward: [(0, '26.659')] +[2024-09-01 17:56:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9195520. Throughput: 0: 230.7. Samples: 297602. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:44,081][00194] Avg episode reward: [(0, '26.592')] +[2024-09-01 17:56:44,937][47741] Updated weights for policy 0, policy_version 2246 (0.1615) +[2024-09-01 17:56:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9199616. Throughput: 0: 238.1. Samples: 299094. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:49,076][00194] Avg episode reward: [(0, '26.841')] +[2024-09-01 17:56:50,624][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002247_9203712.pth... +[2024-09-01 17:56:50,754][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002192_8978432.pth +[2024-09-01 17:56:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9203712. Throughput: 0: 218.9. Samples: 300144. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:54,076][00194] Avg episode reward: [(0, '26.851')] +[2024-09-01 17:56:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9207808. Throughput: 0: 226.5. Samples: 300910. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:56:59,082][00194] Avg episode reward: [(0, '26.917')] +[2024-09-01 17:57:04,075][00194] Fps is (10 sec: 1228.5, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9216000. Throughput: 0: 233.8. Samples: 302468. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:04,079][00194] Avg episode reward: [(0, '27.535')] +[2024-09-01 17:57:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9220096. Throughput: 0: 223.3. Samples: 303500. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:09,080][00194] Avg episode reward: [(0, '27.383')] +[2024-09-01 17:57:14,073][00194] Fps is (10 sec: 819.4, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 9224192. Throughput: 0: 218.6. Samples: 304230. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:14,084][00194] Avg episode reward: [(0, '27.213')] +[2024-09-01 17:57:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9228288. Throughput: 0: 227.6. Samples: 305646. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:19,084][00194] Avg episode reward: [(0, '26.823')] +[2024-09-01 17:57:24,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9232384. Throughput: 0: 239.4. Samples: 307468. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:24,083][00194] Avg episode reward: [(0, '26.843')] +[2024-09-01 17:57:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9236480. Throughput: 0: 226.6. Samples: 307798. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:29,089][00194] Avg episode reward: [(0, '26.074')] +[2024-09-01 17:57:31,917][47741] Updated weights for policy 0, policy_version 2256 (0.1984) +[2024-09-01 17:57:34,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9240576. Throughput: 0: 219.9. Samples: 308990. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:57:34,076][00194] Avg episode reward: [(0, '26.646')] +[2024-09-01 17:57:34,305][47728] Signal inference workers to stop experience collection... (300 times) +[2024-09-01 17:57:34,358][47741] InferenceWorker_p0-w0: stopping experience collection (300 times) +[2024-09-01 17:57:35,270][47728] Signal inference workers to resume experience collection... (300 times) +[2024-09-01 17:57:35,271][47741] InferenceWorker_p0-w0: resuming experience collection (300 times) +[2024-09-01 17:57:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9248768. Throughput: 0: 235.0. Samples: 310720. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:57:39,075][00194] Avg episode reward: [(0, '26.778')] +[2024-09-01 17:57:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9248768. Throughput: 0: 236.4. Samples: 311550. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:57:44,078][00194] Avg episode reward: [(0, '26.796')] +[2024-09-01 17:57:49,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9252864. Throughput: 0: 221.5. Samples: 312434. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 17:57:49,080][00194] Avg episode reward: [(0, '27.605')] +[2024-09-01 17:57:54,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9261056. Throughput: 0: 235.6. Samples: 314100. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:57:54,081][00194] Avg episode reward: [(0, '27.461')] +[2024-09-01 17:57:59,075][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9265152. Throughput: 0: 236.3. Samples: 314866. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:57:59,086][00194] Avg episode reward: [(0, '28.027')] +[2024-09-01 17:58:04,080][00194] Fps is (10 sec: 818.7, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9269248. Throughput: 0: 232.0. Samples: 316086. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:58:04,089][00194] Avg episode reward: [(0, '27.889')] +[2024-09-01 17:58:09,076][00194] Fps is (10 sec: 819.2, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9273344. Throughput: 0: 218.7. Samples: 317310. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 17:58:09,082][00194] Avg episode reward: [(0, '27.408')] +[2024-09-01 17:58:14,073][00194] Fps is (10 sec: 819.7, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9277440. Throughput: 0: 231.0. Samples: 318194. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:58:14,076][00194] Avg episode reward: [(0, '28.402')] +[2024-09-01 17:58:14,541][47741] Updated weights for policy 0, policy_version 2266 (0.2102) +[2024-09-01 17:58:19,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9281536. Throughput: 0: 242.9. Samples: 319920. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:58:19,076][00194] Avg episode reward: [(0, '29.030')] +[2024-09-01 17:58:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9285632. Throughput: 0: 226.8. Samples: 320926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:58:24,084][00194] Avg episode reward: [(0, '29.274')] +[2024-09-01 17:58:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9293824. Throughput: 0: 223.5. Samples: 321606. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:58:29,075][00194] Avg episode reward: [(0, '29.318')] +[2024-09-01 17:58:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9297920. Throughput: 0: 235.6. Samples: 323036. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:58:34,077][00194] Avg episode reward: [(0, '29.675')] +[2024-09-01 17:58:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9302016. Throughput: 0: 227.6. Samples: 324340. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:58:39,081][00194] Avg episode reward: [(0, '29.264')] +[2024-09-01 17:58:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9306112. Throughput: 0: 223.6. Samples: 324926. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 17:58:44,077][00194] Avg episode reward: [(0, '28.850')] +[2024-09-01 17:58:46,026][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002273_9310208.pth... +[2024-09-01 17:58:46,130][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002219_9089024.pth +[2024-09-01 17:58:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9310208. Throughput: 0: 233.7. Samples: 326602. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 17:58:49,081][00194] Avg episode reward: [(0, '29.030')] +[2024-09-01 17:58:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9314304. Throughput: 0: 236.4. Samples: 327948. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 17:58:54,076][00194] Avg episode reward: [(0, '28.378')] +[2024-09-01 17:58:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9318400. Throughput: 0: 229.5. Samples: 328522. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:58:59,076][00194] Avg episode reward: [(0, '28.371')] +[2024-09-01 17:59:00,085][47741] Updated weights for policy 0, policy_version 2276 (0.0995) +[2024-09-01 17:59:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 9326592. Throughput: 0: 225.4. Samples: 330062. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:04,081][00194] Avg episode reward: [(0, '27.959')] +[2024-09-01 17:59:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 9330688. Throughput: 0: 234.5. Samples: 331480. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:09,075][00194] Avg episode reward: [(0, '27.625')] +[2024-09-01 17:59:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9334784. Throughput: 0: 234.6. Samples: 332162. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:14,078][00194] Avg episode reward: [(0, '27.248')] +[2024-09-01 17:59:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9338880. Throughput: 0: 224.4. Samples: 333134. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:19,078][00194] Avg episode reward: [(0, '27.081')] +[2024-09-01 17:59:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9342976. Throughput: 0: 238.6. Samples: 335078. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:59:24,076][00194] Avg episode reward: [(0, '27.300')] +[2024-09-01 17:59:29,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9347072. Throughput: 0: 237.2. Samples: 335602. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 17:59:29,077][00194] Avg episode reward: [(0, '27.668')] +[2024-09-01 17:59:34,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9351168. Throughput: 0: 230.2. Samples: 336962. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:34,077][00194] Avg episode reward: [(0, '27.926')] +[2024-09-01 17:59:39,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9355264. Throughput: 0: 229.1. Samples: 338256. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:39,083][00194] Avg episode reward: [(0, '28.114')] +[2024-09-01 17:59:42,981][47741] Updated weights for policy 0, policy_version 2286 (0.1024) +[2024-09-01 17:59:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9363456. Throughput: 0: 239.0. Samples: 339276. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:44,083][00194] Avg episode reward: [(0, '27.599')] +[2024-09-01 17:59:49,077][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9367552. Throughput: 0: 229.8. Samples: 340404. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:49,081][00194] Avg episode reward: [(0, '27.545')] +[2024-09-01 17:59:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9371648. Throughput: 0: 222.0. Samples: 341470. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:54,081][00194] Avg episode reward: [(0, '27.211')] +[2024-09-01 17:59:59,073][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9375744. Throughput: 0: 225.5. Samples: 342310. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 17:59:59,082][00194] Avg episode reward: [(0, '27.063')] +[2024-09-01 18:00:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9379840. Throughput: 0: 239.6. Samples: 343914. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:04,076][00194] Avg episode reward: [(0, '27.299')] +[2024-09-01 18:00:09,077][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9383936. Throughput: 0: 225.4. Samples: 345220. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:09,088][00194] Avg episode reward: [(0, '27.322')] +[2024-09-01 18:00:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9388032. Throughput: 0: 221.6. Samples: 345574. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:14,082][00194] Avg episode reward: [(0, '27.807')] +[2024-09-01 18:00:19,073][00194] Fps is (10 sec: 1229.3, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9396224. Throughput: 0: 231.0. Samples: 347358. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 18:00:19,081][00194] Avg episode reward: [(0, '27.612')] +[2024-09-01 18:00:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9400320. Throughput: 0: 229.0. Samples: 348560. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 18:00:24,079][00194] Avg episode reward: [(0, '27.614')] +[2024-09-01 18:00:28,520][47741] Updated weights for policy 0, policy_version 2296 (0.1670) +[2024-09-01 18:00:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9404416. Throughput: 0: 222.4. Samples: 349284. Policy #0 lag: (min: 1.0, avg: 1.3, max: 3.0) +[2024-09-01 18:00:29,078][00194] Avg episode reward: [(0, '27.375')] +[2024-09-01 18:00:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9408512. Throughput: 0: 225.9. Samples: 350568. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:34,081][00194] Avg episode reward: [(0, '27.414')] +[2024-09-01 18:00:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9412608. Throughput: 0: 240.0. Samples: 352270. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:39,082][00194] Avg episode reward: [(0, '27.178')] +[2024-09-01 18:00:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9416704. Throughput: 0: 234.0. Samples: 352840. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:00:44,081][00194] Avg episode reward: [(0, '26.971')] +[2024-09-01 18:00:46,282][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth... +[2024-09-01 18:00:46,397][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002247_9203712.pth +[2024-09-01 18:00:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9420800. Throughput: 0: 222.7. Samples: 353934. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:00:49,075][00194] Avg episode reward: [(0, '27.308')] +[2024-09-01 18:00:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9424896. Throughput: 0: 232.3. Samples: 355672. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:54,076][00194] Avg episode reward: [(0, '27.894')] +[2024-09-01 18:00:59,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9433088. Throughput: 0: 245.1. Samples: 356604. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:00:59,076][00194] Avg episode reward: [(0, '27.773')] +[2024-09-01 18:01:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9437184. Throughput: 0: 231.5. Samples: 357774. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:01:04,081][00194] Avg episode reward: [(0, '27.797')] +[2024-09-01 18:01:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 9441280. Throughput: 0: 226.8. Samples: 358764. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:01:09,076][00194] Avg episode reward: [(0, '28.278')] +[2024-09-01 18:01:12,428][47741] Updated weights for policy 0, policy_version 2306 (0.1038) +[2024-09-01 18:01:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9445376. Throughput: 0: 233.8. Samples: 359806. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:01:14,078][00194] Avg episode reward: [(0, '27.950')] +[2024-09-01 18:01:14,656][47728] Signal inference workers to stop experience collection... (350 times) +[2024-09-01 18:01:14,691][47741] InferenceWorker_p0-w0: stopping experience collection (350 times) +[2024-09-01 18:01:16,083][47728] Signal inference workers to resume experience collection... (350 times) +[2024-09-01 18:01:16,085][47741] InferenceWorker_p0-w0: resuming experience collection (350 times) +[2024-09-01 18:01:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9449472. Throughput: 0: 233.6. Samples: 361082. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:19,076][00194] Avg episode reward: [(0, '27.933')] +[2024-09-01 18:01:24,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9453568. Throughput: 0: 224.1. Samples: 362356. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:24,078][00194] Avg episode reward: [(0, '27.933')] +[2024-09-01 18:01:29,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9457664. Throughput: 0: 225.9. Samples: 363004. Policy #0 lag: (min: 1.0, avg: 1.3, max: 2.0) +[2024-09-01 18:01:29,083][00194] Avg episode reward: [(0, '28.447')] +[2024-09-01 18:01:34,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9465856. Throughput: 0: 246.0. Samples: 365006. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:34,078][00194] Avg episode reward: [(0, '28.880')] +[2024-09-01 18:01:39,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9465856. Throughput: 0: 231.2. Samples: 366074. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:39,077][00194] Avg episode reward: [(0, '29.244')] +[2024-09-01 18:01:44,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9469952. Throughput: 0: 221.7. Samples: 366580. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:44,076][00194] Avg episode reward: [(0, '29.575')] +[2024-09-01 18:01:49,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9478144. Throughput: 0: 229.7. Samples: 368112. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:01:49,081][00194] Avg episode reward: [(0, '30.006')] +[2024-09-01 18:01:54,081][00194] Fps is (10 sec: 1227.8, 60 sec: 955.6, 300 sec: 930.3). Total num frames: 9482240. Throughput: 0: 244.7. Samples: 369778. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:01:54,086][00194] Avg episode reward: [(0, '30.527')] +[2024-09-01 18:01:56,942][47741] Updated weights for policy 0, policy_version 2316 (0.1467) +[2024-09-01 18:01:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9486336. Throughput: 0: 229.6. Samples: 370138. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:01:59,080][00194] Avg episode reward: [(0, '30.686')] +[2024-09-01 18:02:04,073][00194] Fps is (10 sec: 819.9, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9490432. Throughput: 0: 230.4. Samples: 371450. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:04,076][00194] Avg episode reward: [(0, '31.365')] +[2024-09-01 18:02:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9494528. Throughput: 0: 241.0. Samples: 373202. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:09,082][00194] Avg episode reward: [(0, '31.352')] +[2024-09-01 18:02:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9498624. Throughput: 0: 241.0. Samples: 373850. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:14,076][00194] Avg episode reward: [(0, '32.387')] +[2024-09-01 18:02:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9502720. Throughput: 0: 221.9. Samples: 374992. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:19,075][00194] Avg episode reward: [(0, '33.308')] +[2024-09-01 18:02:23,747][47728] Saving new best policy, reward=33.308! +[2024-09-01 18:02:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9510912. Throughput: 0: 229.1. Samples: 376384. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:24,076][00194] Avg episode reward: [(0, '32.880')] +[2024-09-01 18:02:29,082][00194] Fps is (10 sec: 1227.8, 60 sec: 955.6, 300 sec: 930.3). Total num frames: 9515008. Throughput: 0: 239.0. Samples: 377338. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:29,087][00194] Avg episode reward: [(0, '32.728')] +[2024-09-01 18:02:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9519104. Throughput: 0: 227.9. Samples: 378366. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:34,076][00194] Avg episode reward: [(0, '32.728')] +[2024-09-01 18:02:39,073][00194] Fps is (10 sec: 819.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9523200. Throughput: 0: 216.3. Samples: 379510. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:39,076][00194] Avg episode reward: [(0, '32.549')] +[2024-09-01 18:02:41,630][47741] Updated weights for policy 0, policy_version 2326 (0.0521) +[2024-09-01 18:02:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9527296. Throughput: 0: 228.6. Samples: 380426. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:44,076][00194] Avg episode reward: [(0, '32.306')] +[2024-09-01 18:02:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9531392. Throughput: 0: 241.6. Samples: 382324. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:49,076][00194] Avg episode reward: [(0, '32.847')] +[2024-09-01 18:02:50,069][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002328_9535488.pth... +[2024-09-01 18:02:50,201][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002273_9310208.pth +[2024-09-01 18:02:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 9535488. Throughput: 0: 224.4. Samples: 383300. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:02:54,077][00194] Avg episode reward: [(0, '32.847')] +[2024-09-01 18:02:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9539584. Throughput: 0: 218.2. Samples: 383670. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:02:59,079][00194] Avg episode reward: [(0, '32.767')] +[2024-09-01 18:03:04,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9547776. Throughput: 0: 233.2. Samples: 385484. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:03:04,076][00194] Avg episode reward: [(0, '32.921')] +[2024-09-01 18:03:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9551872. Throughput: 0: 228.9. Samples: 386686. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:03:09,078][00194] Avg episode reward: [(0, '32.756')] +[2024-09-01 18:03:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9555968. Throughput: 0: 222.8. Samples: 387362. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:03:14,077][00194] Avg episode reward: [(0, '32.784')] +[2024-09-01 18:03:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9560064. Throughput: 0: 230.0. Samples: 388718. Policy #0 lag: (min: 1.0, avg: 1.4, max: 3.0) +[2024-09-01 18:03:19,075][00194] Avg episode reward: [(0, '32.518')] +[2024-09-01 18:03:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9564160. Throughput: 0: 249.3. Samples: 390728. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:03:24,077][00194] Avg episode reward: [(0, '32.294')] +[2024-09-01 18:03:25,777][47741] Updated weights for policy 0, policy_version 2336 (0.0519) +[2024-09-01 18:03:29,075][00194] Fps is (10 sec: 819.0, 60 sec: 887.6, 300 sec: 916.4). Total num frames: 9568256. Throughput: 0: 233.9. Samples: 390952. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:03:29,081][00194] Avg episode reward: [(0, '32.163')] +[2024-09-01 18:03:34,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9572352. Throughput: 0: 220.5. Samples: 392246. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:03:34,091][00194] Avg episode reward: [(0, '32.098')] +[2024-09-01 18:03:39,073][00194] Fps is (10 sec: 1229.1, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9580544. Throughput: 0: 232.5. Samples: 393764. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:03:39,076][00194] Avg episode reward: [(0, '32.891')] +[2024-09-01 18:03:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9584640. Throughput: 0: 246.9. Samples: 394782. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:03:44,076][00194] Avg episode reward: [(0, '33.006')] +[2024-09-01 18:03:49,073][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9584640. Throughput: 0: 225.4. Samples: 395626. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:03:49,080][00194] Avg episode reward: [(0, '32.666')] +[2024-09-01 18:03:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9592832. Throughput: 0: 227.8. Samples: 396936. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:03:54,076][00194] Avg episode reward: [(0, '32.172')] +[2024-09-01 18:03:59,075][00194] Fps is (10 sec: 1228.6, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9596928. Throughput: 0: 234.8. Samples: 397930. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:03:59,082][00194] Avg episode reward: [(0, '30.917')] +[2024-09-01 18:04:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9601024. Throughput: 0: 231.9. Samples: 399152. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:04,076][00194] Avg episode reward: [(0, '30.305')] +[2024-09-01 18:04:09,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9605120. Throughput: 0: 216.1. Samples: 400454. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:09,078][00194] Avg episode reward: [(0, '30.406')] +[2024-09-01 18:04:11,342][47741] Updated weights for policy 0, policy_version 2346 (0.0520) +[2024-09-01 18:04:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9609216. Throughput: 0: 226.6. Samples: 401150. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:14,082][00194] Avg episode reward: [(0, '30.061')] +[2024-09-01 18:04:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9613312. Throughput: 0: 236.5. Samples: 402890. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:19,081][00194] Avg episode reward: [(0, '30.212')] +[2024-09-01 18:04:24,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 916.4). Total num frames: 9617408. Throughput: 0: 228.1. Samples: 404028. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:24,081][00194] Avg episode reward: [(0, '28.819')] +[2024-09-01 18:04:29,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.8, 300 sec: 930.3). Total num frames: 9625600. Throughput: 0: 220.6. Samples: 404710. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:29,076][00194] Avg episode reward: [(0, '28.755')] +[2024-09-01 18:04:34,073][00194] Fps is (10 sec: 1229.4, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9629696. Throughput: 0: 233.4. Samples: 406130. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:34,076][00194] Avg episode reward: [(0, '28.670')] +[2024-09-01 18:04:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9633792. Throughput: 0: 234.4. Samples: 407486. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:39,076][00194] Avg episode reward: [(0, '28.003')] +[2024-09-01 18:04:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9637888. Throughput: 0: 226.1. Samples: 408104. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:44,079][00194] Avg episode reward: [(0, '27.878')] +[2024-09-01 18:04:46,661][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002354_9641984.pth... +[2024-09-01 18:04:46,779][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002300_9420800.pth +[2024-09-01 18:04:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9641984. Throughput: 0: 231.4. Samples: 409564. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:04:49,076][00194] Avg episode reward: [(0, '27.461')] +[2024-09-01 18:04:54,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9646080. Throughput: 0: 238.2. Samples: 411172. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:04:54,081][00194] Avg episode reward: [(0, '27.707')] +[2024-09-01 18:04:54,880][47741] Updated weights for policy 0, policy_version 2356 (0.1951) +[2024-09-01 18:04:58,825][47728] Signal inference workers to stop experience collection... (400 times) +[2024-09-01 18:04:58,938][47741] InferenceWorker_p0-w0: stopping experience collection (400 times) +[2024-09-01 18:04:59,076][00194] Fps is (10 sec: 819.0, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9650176. Throughput: 0: 231.8. Samples: 411580. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:04:59,087][00194] Avg episode reward: [(0, '27.191')] +[2024-09-01 18:05:00,578][47728] Signal inference workers to resume experience collection... (400 times) +[2024-09-01 18:05:00,578][47741] InferenceWorker_p0-w0: resuming experience collection (400 times) +[2024-09-01 18:05:04,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9654272. Throughput: 0: 224.4. Samples: 412990. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:05:04,080][00194] Avg episode reward: [(0, '27.787')] +[2024-09-01 18:05:09,077][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9662464. Throughput: 0: 230.0. Samples: 414378. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:09,085][00194] Avg episode reward: [(0, '27.479')] +[2024-09-01 18:05:14,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9666560. Throughput: 0: 233.1. Samples: 415198. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:14,079][00194] Avg episode reward: [(0, '28.152')] +[2024-09-01 18:05:19,073][00194] Fps is (10 sec: 819.5, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9670656. Throughput: 0: 223.9. Samples: 416204. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:19,076][00194] Avg episode reward: [(0, '28.340')] +[2024-09-01 18:05:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.8, 300 sec: 916.4). Total num frames: 9674752. Throughput: 0: 232.1. Samples: 417932. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:24,076][00194] Avg episode reward: [(0, '28.312')] +[2024-09-01 18:05:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9678848. Throughput: 0: 234.4. Samples: 418650. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:29,078][00194] Avg episode reward: [(0, '28.669')] +[2024-09-01 18:05:34,074][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9682944. Throughput: 0: 229.9. Samples: 419910. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:34,080][00194] Avg episode reward: [(0, '29.460')] +[2024-09-01 18:05:39,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9687040. Throughput: 0: 227.9. Samples: 421428. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:39,084][00194] Avg episode reward: [(0, '28.777')] +[2024-09-01 18:05:39,969][47741] Updated weights for policy 0, policy_version 2366 (0.0062) +[2024-09-01 18:05:44,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9695232. Throughput: 0: 235.1. Samples: 422158. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:44,078][00194] Avg episode reward: [(0, '28.424')] +[2024-09-01 18:05:49,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 930.3). Total num frames: 9699328. Throughput: 0: 233.3. Samples: 423490. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:49,079][00194] Avg episode reward: [(0, '28.188')] +[2024-09-01 18:05:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9703424. Throughput: 0: 226.4. Samples: 424564. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:54,075][00194] Avg episode reward: [(0, '27.966')] +[2024-09-01 18:05:59,076][00194] Fps is (10 sec: 819.0, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9707520. Throughput: 0: 231.1. Samples: 425600. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:05:59,083][00194] Avg episode reward: [(0, '28.226')] +[2024-09-01 18:06:04,075][00194] Fps is (10 sec: 409.5, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9707520. Throughput: 0: 231.2. Samples: 426610. Policy #0 lag: (min: 1.0, avg: 1.5, max: 3.0) +[2024-09-01 18:06:04,094][00194] Avg episode reward: [(0, '28.003')] +[2024-09-01 18:06:09,076][00194] Fps is (10 sec: 409.6, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9711616. Throughput: 0: 209.3. Samples: 427352. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:06:09,086][00194] Avg episode reward: [(0, '28.003')] +[2024-09-01 18:06:14,073][00194] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9715712. Throughput: 0: 201.1. Samples: 427700. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:06:14,076][00194] Avg episode reward: [(0, '28.529')] +[2024-09-01 18:06:19,073][00194] Fps is (10 sec: 819.5, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9719808. Throughput: 0: 205.4. Samples: 429152. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:06:19,075][00194] Avg episode reward: [(0, '28.474')] +[2024-09-01 18:06:24,074][00194] Fps is (10 sec: 819.1, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9723904. Throughput: 0: 208.0. Samples: 430786. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:06:24,085][00194] Avg episode reward: [(0, '29.285')] +[2024-09-01 18:06:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 888.6). Total num frames: 9728000. Throughput: 0: 204.7. Samples: 431370. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:06:29,076][00194] Avg episode reward: [(0, '28.261')] +[2024-09-01 18:06:30,004][47741] Updated weights for policy 0, policy_version 2376 (0.1544) +[2024-09-01 18:06:34,073][00194] Fps is (10 sec: 819.3, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9732096. Throughput: 0: 200.8. Samples: 432528. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:06:34,082][00194] Avg episode reward: [(0, '28.600')] +[2024-09-01 18:06:39,073][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9740288. Throughput: 0: 209.6. Samples: 433996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:06:39,075][00194] Avg episode reward: [(0, '29.039')] +[2024-09-01 18:06:44,074][00194] Fps is (10 sec: 1228.8, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9744384. Throughput: 0: 205.1. Samples: 434828. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:06:44,078][00194] Avg episode reward: [(0, '29.022')] +[2024-09-01 18:06:48,050][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002380_9748480.pth... +[2024-09-01 18:06:48,214][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002328_9535488.pth +[2024-09-01 18:06:49,080][00194] Fps is (10 sec: 818.7, 60 sec: 819.1, 300 sec: 902.5). Total num frames: 9748480. Throughput: 0: 205.1. Samples: 435840. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:06:49,084][00194] Avg episode reward: [(0, '28.680')] +[2024-09-01 18:06:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9752576. Throughput: 0: 216.9. Samples: 437114. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:06:54,075][00194] Avg episode reward: [(0, '28.503')] +[2024-09-01 18:06:59,074][00194] Fps is (10 sec: 819.7, 60 sec: 819.2, 300 sec: 902.5). Total num frames: 9756672. Throughput: 0: 228.8. Samples: 437996. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:06:59,077][00194] Avg episode reward: [(0, '28.928')] +[2024-09-01 18:07:04,075][00194] Fps is (10 sec: 819.1, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9760768. Throughput: 0: 229.1. Samples: 439462. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:07:04,087][00194] Avg episode reward: [(0, '30.595')] +[2024-09-01 18:07:09,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9764864. Throughput: 0: 216.2. Samples: 440516. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:09,085][00194] Avg episode reward: [(0, '31.551')] +[2024-09-01 18:07:14,073][00194] Fps is (10 sec: 819.3, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9768960. Throughput: 0: 218.8. Samples: 441218. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:14,076][00194] Avg episode reward: [(0, '31.883')] +[2024-09-01 18:07:14,662][47741] Updated weights for policy 0, policy_version 2386 (0.1149) +[2024-09-01 18:07:19,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9777152. Throughput: 0: 233.0. Samples: 443014. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:07:19,076][00194] Avg episode reward: [(0, '31.932')] +[2024-09-01 18:07:24,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9777152. Throughput: 0: 224.4. Samples: 444096. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:07:24,076][00194] Avg episode reward: [(0, '32.186')] +[2024-09-01 18:07:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9785344. Throughput: 0: 218.2. Samples: 444648. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:07:29,084][00194] Avg episode reward: [(0, '31.895')] +[2024-09-01 18:07:34,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9789440. Throughput: 0: 229.2. Samples: 446154. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:34,082][00194] Avg episode reward: [(0, '30.308')] +[2024-09-01 18:07:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9793536. Throughput: 0: 231.8. Samples: 447546. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:39,079][00194] Avg episode reward: [(0, '30.303')] +[2024-09-01 18:07:44,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9797632. Throughput: 0: 226.8. Samples: 448200. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:44,077][00194] Avg episode reward: [(0, '30.197')] +[2024-09-01 18:07:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 9801728. Throughput: 0: 223.7. Samples: 449526. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:49,082][00194] Avg episode reward: [(0, '30.171')] +[2024-09-01 18:07:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9805824. Throughput: 0: 238.4. Samples: 451244. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:54,079][00194] Avg episode reward: [(0, '29.821')] +[2024-09-01 18:07:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9809920. Throughput: 0: 237.3. Samples: 451896. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:07:59,076][00194] Avg episode reward: [(0, '29.821')] +[2024-09-01 18:08:00,456][47741] Updated weights for policy 0, policy_version 2396 (0.1299) +[2024-09-01 18:08:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9814016. Throughput: 0: 218.1. Samples: 452828. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:08:04,081][00194] Avg episode reward: [(0, '29.633')] +[2024-09-01 18:08:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9822208. Throughput: 0: 228.3. Samples: 454370. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:08:09,081][00194] Avg episode reward: [(0, '29.469')] +[2024-09-01 18:08:14,074][00194] Fps is (10 sec: 1228.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9826304. Throughput: 0: 237.4. Samples: 455330. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:08:14,081][00194] Avg episode reward: [(0, '30.367')] +[2024-09-01 18:08:19,076][00194] Fps is (10 sec: 819.0, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9830400. Throughput: 0: 226.1. Samples: 456328. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:08:19,084][00194] Avg episode reward: [(0, '31.097')] +[2024-09-01 18:08:24,073][00194] Fps is (10 sec: 819.3, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9834496. Throughput: 0: 228.7. Samples: 457838. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:08:24,077][00194] Avg episode reward: [(0, '31.731')] +[2024-09-01 18:08:29,073][00194] Fps is (10 sec: 819.4, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9838592. Throughput: 0: 229.9. Samples: 458544. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:08:29,077][00194] Avg episode reward: [(0, '31.720')] +[2024-09-01 18:08:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9842688. Throughput: 0: 235.6. Samples: 460128. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:08:34,077][00194] Avg episode reward: [(0, '31.138')] +[2024-09-01 18:08:39,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9846784. Throughput: 0: 221.2. Samples: 461198. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:08:39,084][00194] Avg episode reward: [(0, '30.651')] +[2024-09-01 18:08:44,079][00194] Fps is (10 sec: 1228.1, 60 sec: 955.6, 300 sec: 916.4). Total num frames: 9854976. Throughput: 0: 224.2. Samples: 461986. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:08:44,090][00194] Avg episode reward: [(0, '30.588')] +[2024-09-01 18:08:44,579][47741] Updated weights for policy 0, policy_version 2406 (0.1492) +[2024-09-01 18:08:46,895][47728] Signal inference workers to stop experience collection... (450 times) +[2024-09-01 18:08:46,933][47741] InferenceWorker_p0-w0: stopping experience collection (450 times) +[2024-09-01 18:08:47,909][47728] Signal inference workers to resume experience collection... (450 times) +[2024-09-01 18:08:47,910][47741] InferenceWorker_p0-w0: resuming experience collection (450 times) +[2024-09-01 18:08:47,912][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002407_9859072.pth... +[2024-09-01 18:08:48,022][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002354_9641984.pth +[2024-09-01 18:08:49,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9859072. Throughput: 0: 237.6. Samples: 463522. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:08:49,078][00194] Avg episode reward: [(0, '30.072')] +[2024-09-01 18:08:54,073][00194] Fps is (10 sec: 819.7, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9863168. Throughput: 0: 226.3. Samples: 464552. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:08:54,081][00194] Avg episode reward: [(0, '30.718')] +[2024-09-01 18:08:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9867264. Throughput: 0: 220.9. Samples: 465270. Policy #0 lag: (min: 1.0, avg: 1.7, max: 3.0) +[2024-09-01 18:08:59,079][00194] Avg episode reward: [(0, '30.633')] +[2024-09-01 18:09:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9871360. Throughput: 0: 231.8. Samples: 466760. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:09:04,076][00194] Avg episode reward: [(0, '30.259')] +[2024-09-01 18:09:09,077][00194] Fps is (10 sec: 818.9, 60 sec: 887.4, 300 sec: 902.5). Total num frames: 9875456. Throughput: 0: 231.8. Samples: 468270. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:09:09,081][00194] Avg episode reward: [(0, '29.800')] +[2024-09-01 18:09:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9879552. Throughput: 0: 223.7. Samples: 468610. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 18:09:14,088][00194] Avg episode reward: [(0, '29.922')] +[2024-09-01 18:09:19,073][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9883648. Throughput: 0: 217.3. Samples: 469908. Policy #0 lag: (min: 1.0, avg: 1.7, max: 2.0) +[2024-09-01 18:09:19,076][00194] Avg episode reward: [(0, '29.090')] +[2024-09-01 18:09:24,074][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9891840. Throughput: 0: 232.0. Samples: 471638. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:24,084][00194] Avg episode reward: [(0, '28.728')] +[2024-09-01 18:09:29,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9891840. Throughput: 0: 227.6. Samples: 472226. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:29,077][00194] Avg episode reward: [(0, '28.489')] +[2024-09-01 18:09:29,460][47741] Updated weights for policy 0, policy_version 2416 (0.1013) +[2024-09-01 18:09:34,074][00194] Fps is (10 sec: 409.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9895936. Throughput: 0: 220.9. Samples: 473464. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:34,082][00194] Avg episode reward: [(0, '28.908')] +[2024-09-01 18:09:39,073][00194] Fps is (10 sec: 1228.9, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9904128. Throughput: 0: 227.5. Samples: 474788. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:39,076][00194] Avg episode reward: [(0, '28.223')] +[2024-09-01 18:09:44,073][00194] Fps is (10 sec: 1228.8, 60 sec: 887.6, 300 sec: 902.5). Total num frames: 9908224. Throughput: 0: 232.3. Samples: 475724. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:44,077][00194] Avg episode reward: [(0, '29.015')] +[2024-09-01 18:09:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9912320. Throughput: 0: 222.4. Samples: 476766. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:09:49,081][00194] Avg episode reward: [(0, '29.693')] +[2024-09-01 18:09:54,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9916416. Throughput: 0: 217.4. Samples: 478054. Policy #0 lag: (min: 1.0, avg: 1.6, max: 3.0) +[2024-09-01 18:09:54,076][00194] Avg episode reward: [(0, '30.028')] +[2024-09-01 18:09:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9920512. Throughput: 0: 228.7. Samples: 478902. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:09:59,081][00194] Avg episode reward: [(0, '30.401')] +[2024-09-01 18:10:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9924608. Throughput: 0: 236.1. Samples: 480534. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:04,076][00194] Avg episode reward: [(0, '29.947')] +[2024-09-01 18:10:09,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9928704. Throughput: 0: 219.7. Samples: 481526. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:09,076][00194] Avg episode reward: [(0, '29.859')] +[2024-09-01 18:10:14,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9932800. Throughput: 0: 220.9. Samples: 482166. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:14,076][00194] Avg episode reward: [(0, '29.654')] +[2024-09-01 18:10:14,473][47741] Updated weights for policy 0, policy_version 2426 (0.2010) +[2024-09-01 18:10:19,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9940992. Throughput: 0: 229.9. Samples: 483810. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:19,079][00194] Avg episode reward: [(0, '29.392')] +[2024-09-01 18:10:24,073][00194] Fps is (10 sec: 1228.8, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9945088. Throughput: 0: 223.9. Samples: 484862. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:24,076][00194] Avg episode reward: [(0, '29.002')] +[2024-09-01 18:10:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9949184. Throughput: 0: 218.8. Samples: 485570. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:29,079][00194] Avg episode reward: [(0, '28.779')] +[2024-09-01 18:10:34,073][00194] Fps is (10 sec: 819.2, 60 sec: 955.7, 300 sec: 902.5). Total num frames: 9953280. Throughput: 0: 227.9. Samples: 487022. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:34,081][00194] Avg episode reward: [(0, '28.756')] +[2024-09-01 18:10:39,074][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9957376. Throughput: 0: 235.3. Samples: 488642. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:39,081][00194] Avg episode reward: [(0, '27.985')] +[2024-09-01 18:10:44,078][00194] Fps is (10 sec: 818.8, 60 sec: 887.4, 300 sec: 888.6). Total num frames: 9961472. Throughput: 0: 223.0. Samples: 488938. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:44,085][00194] Avg episode reward: [(0, '27.912')] +[2024-09-01 18:10:46,673][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002433_9965568.pth... +[2024-09-01 18:10:46,784][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002380_9748480.pth +[2024-09-01 18:10:49,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9965568. Throughput: 0: 218.4. Samples: 490360. Policy #0 lag: (min: 1.0, avg: 1.6, max: 2.0) +[2024-09-01 18:10:49,085][00194] Avg episode reward: [(0, '27.814')] +[2024-09-01 18:10:54,073][00194] Fps is (10 sec: 819.6, 60 sec: 887.5, 300 sec: 888.6). Total num frames: 9969664. Throughput: 0: 233.8. Samples: 492046. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:54,085][00194] Avg episode reward: [(0, '28.309')] +[2024-09-01 18:10:59,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9973760. Throughput: 0: 235.5. Samples: 492762. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:10:59,076][00194] Avg episode reward: [(0, '28.908')] +[2024-09-01 18:10:59,718][47741] Updated weights for policy 0, policy_version 2436 (0.0040) +[2024-09-01 18:11:04,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 902.5). Total num frames: 9977856. Throughput: 0: 221.2. Samples: 493766. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:11:04,077][00194] Avg episode reward: [(0, '28.967')] +[2024-09-01 18:11:09,073][00194] Fps is (10 sec: 1228.8, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9986048. Throughput: 0: 226.7. Samples: 495062. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:11:09,076][00194] Avg episode reward: [(0, '28.999')] +[2024-09-01 18:11:14,076][00194] Fps is (10 sec: 1228.4, 60 sec: 955.7, 300 sec: 916.4). Total num frames: 9990144. Throughput: 0: 233.6. Samples: 496082. Policy #0 lag: (min: 1.0, avg: 1.5, max: 2.0) +[2024-09-01 18:11:14,079][00194] Avg episode reward: [(0, '28.937')] +[2024-09-01 18:11:19,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9994240. Throughput: 0: 223.0. Samples: 497058. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:11:19,081][00194] Avg episode reward: [(0, '29.295')] +[2024-09-01 18:11:24,073][00194] Fps is (10 sec: 819.5, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 9998336. Throughput: 0: 218.9. Samples: 498492. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:11:24,082][00194] Avg episode reward: [(0, '30.067')] +[2024-09-01 18:11:29,073][00194] Fps is (10 sec: 819.2, 60 sec: 887.5, 300 sec: 916.4). Total num frames: 10002432. Throughput: 0: 227.8. Samples: 499188. Policy #0 lag: (min: 1.0, avg: 1.4, max: 2.0) +[2024-09-01 18:11:29,081][00194] Avg episode reward: [(0, '30.010')] +[2024-09-01 18:11:30,298][47728] Stopping Batcher_0... +[2024-09-01 18:11:30,300][47728] Loop batcher_evt_loop terminating... +[2024-09-01 18:11:30,298][00194] Component Batcher_0 stopped! +[2024-09-01 18:11:30,443][47741] Weights refcount: 2 0 +[2024-09-01 18:11:30,445][47741] Stopping InferenceWorker_p0-w0... +[2024-09-01 18:11:30,450][47741] Loop inference_proc0-0_evt_loop terminating... +[2024-09-01 18:11:30,446][00194] Component InferenceWorker_p0-w0 stopped! +[2024-09-01 18:11:30,788][00194] Component RolloutWorker_w0 stopped! +[2024-09-01 18:11:30,797][47742] Stopping RolloutWorker_w0... +[2024-09-01 18:11:30,798][47742] Loop rollout_proc0_evt_loop terminating... +[2024-09-01 18:11:30,801][00194] Component RolloutWorker_w2 stopped! +[2024-09-01 18:11:30,810][47744] Stopping RolloutWorker_w2... +[2024-09-01 18:11:30,810][47744] Loop rollout_proc2_evt_loop terminating... +[2024-09-01 18:11:30,824][00194] Component RolloutWorker_w4 stopped! +[2024-09-01 18:11:30,836][47745] Stopping RolloutWorker_w4... +[2024-09-01 18:11:30,837][47745] Loop rollout_proc4_evt_loop terminating... +[2024-09-01 18:11:30,861][47747] Stopping RolloutWorker_w5... +[2024-09-01 18:11:30,870][47747] Loop rollout_proc5_evt_loop terminating... +[2024-09-01 18:11:30,861][00194] Component RolloutWorker_w5 stopped! +[2024-09-01 18:11:30,876][00194] Component RolloutWorker_w6 stopped! +[2024-09-01 18:11:30,885][47748] Stopping RolloutWorker_w6... +[2024-09-01 18:11:30,886][47748] Loop rollout_proc6_evt_loop terminating... +[2024-09-01 18:11:30,935][47743] Stopping RolloutWorker_w1... +[2024-09-01 18:11:30,935][00194] Component RolloutWorker_w1 stopped! +[2024-09-01 18:11:30,959][47743] Loop rollout_proc1_evt_loop terminating... +[2024-09-01 18:11:30,988][47749] Stopping RolloutWorker_w7... +[2024-09-01 18:11:30,988][00194] Component RolloutWorker_w7 stopped! +[2024-09-01 18:11:30,988][47749] Loop rollout_proc7_evt_loop terminating... +[2024-09-01 18:11:31,044][47746] Stopping RolloutWorker_w3... +[2024-09-01 18:11:31,051][47746] Loop rollout_proc3_evt_loop terminating... +[2024-09-01 18:11:31,046][00194] Component RolloutWorker_w3 stopped! +[2024-09-01 18:11:36,252][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002444_10010624.pth... +[2024-09-01 18:11:36,375][47728] Removing /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002407_9859072.pth +[2024-09-01 18:11:36,389][47728] Saving /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002444_10010624.pth... +[2024-09-01 18:11:36,596][47728] Stopping LearnerWorker_p0... +[2024-09-01 18:11:36,598][47728] Loop learner_proc0_evt_loop terminating... +[2024-09-01 18:11:36,603][00194] Component LearnerWorker_p0 stopped! +[2024-09-01 18:11:36,608][00194] Waiting for process learner_proc0 to stop... +[2024-09-01 18:11:37,494][00194] Waiting for process inference_proc0-0 to join... +[2024-09-01 18:11:37,502][00194] Waiting for process rollout_proc0 to join... +[2024-09-01 18:11:37,589][00194] Waiting for process rollout_proc1 to join... +[2024-09-01 18:11:37,782][00194] Waiting for process rollout_proc2 to join... +[2024-09-01 18:11:37,788][00194] Waiting for process rollout_proc3 to join... +[2024-09-01 18:11:37,799][00194] Waiting for process rollout_proc4 to join... +[2024-09-01 18:11:37,807][00194] Waiting for process rollout_proc5 to join... +[2024-09-01 18:11:37,815][00194] Waiting for process rollout_proc6 to join... +[2024-09-01 18:11:37,820][00194] Waiting for process rollout_proc7 to join... +[2024-09-01 18:11:37,826][00194] Batcher 0 profile tree view: +batching: 10.3939, releasing_batches: 0.1251 +[2024-09-01 18:11:37,829][00194] InferenceWorker_p0-w0 profile tree view: +wait_policy: 0.0054 + wait_policy_total: 30.4037 +update_model: 76.2508 + weight_update: 0.1034 +one_step: 0.0511 + handle_policy_step: 1401.8384 + deserialize: 44.4015, stack: 7.9121, obs_to_device_normalize: 242.5023, forward: 1021.7831, send_messages: 32.5878 + prepare_outputs: 16.4232 + to_cpu: 1.7060 +[2024-09-01 18:11:37,832][00194] Learner 0 profile tree view: +misc: 0.0031, prepare_batch: 612.7118 +train: 1557.2614 + epoch_init: 0.0054, minibatch_init: 0.0073, losses_postprocess: 0.0876, kl_divergence: 0.2529, after_optimizer: 1.2758 + calculate_losses: 758.4823 + losses_init: 0.0021, forward_head: 677.4795, bptt_initial: 1.9713, tail: 1.6671, advantages_returns: 0.1196, losses: 0.8321 + bptt: 76.1046 + bptt_forward_core: 75.6967 + update: 796.8090 + clip: 1.8411 +[2024-09-01 18:11:37,834][00194] RolloutWorker_w0 profile tree view: +wait_for_trajectories: 0.5442, enqueue_policy_requests: 27.1591, env_step: 810.8432, overhead: 19.9112, complete_rollouts: 8.6195 +save_policy_outputs: 21.0470 + split_output_tensors: 6.7242 +[2024-09-01 18:11:37,835][00194] RolloutWorker_w7 profile tree view: +wait_for_trajectories: 0.3561, enqueue_policy_requests: 27.7497, env_step: 799.2099, overhead: 18.5998, complete_rollouts: 8.5441 +save_policy_outputs: 19.9770 + split_output_tensors: 6.8093 +[2024-09-01 18:11:37,837][00194] Loop Runner_EvtLoop terminating... +[2024-09-01 18:11:37,839][00194] Runner profile tree view: +main_loop: 2211.5626 +[2024-09-01 18:11:37,841][00194] Collected {0: 10010624}, FPS: 903.8 +[2024-09-01 18:11:37,890][00194] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json +[2024-09-01 18:11:37,898][00194] Overriding arg 'num_workers' with value 1 passed from command line +[2024-09-01 18:11:37,903][00194] Adding new argument 'no_render'=True that is not in the saved config file! +[2024-09-01 18:11:37,905][00194] Adding new argument 'save_video'=True that is not in the saved config file! +[2024-09-01 18:11:37,909][00194] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file! +[2024-09-01 18:11:37,912][00194] Adding new argument 'video_name'=None that is not in the saved config file! +[2024-09-01 18:11:37,913][00194] Adding new argument 'max_num_frames'=100000 that is not in the saved config file! +[2024-09-01 18:11:37,919][00194] Adding new argument 'max_num_episodes'=10 that is not in the saved config file! +[2024-09-01 18:11:37,921][00194] Adding new argument 'push_to_hub'=True that is not in the saved config file! +[2024-09-01 18:11:37,922][00194] Adding new argument 'hf_repository'='jarski/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file! +[2024-09-01 18:11:37,924][00194] Adding new argument 'policy_index'=0 that is not in the saved config file! +[2024-09-01 18:11:37,925][00194] Adding new argument 'eval_deterministic'=False that is not in the saved config file! +[2024-09-01 18:11:37,926][00194] Adding new argument 'train_script'=None that is not in the saved config file! +[2024-09-01 18:11:37,928][00194] Adding new argument 'enjoy_script'=None that is not in the saved config file! +[2024-09-01 18:11:37,930][00194] Using frameskip 1 and render_action_repeat=4 for evaluation +[2024-09-01 18:11:37,944][00194] RunningMeanStd input shape: (3, 72, 128) +[2024-09-01 18:11:37,955][00194] RunningMeanStd input shape: (1,) +[2024-09-01 18:11:37,980][00194] ConvEncoder: input_channels=3 +[2024-09-01 18:11:38,038][00194] Conv encoder output size: 512 +[2024-09-01 18:11:38,041][00194] Policy head output size: 512 +[2024-09-01 18:11:38,070][00194] Loading state from checkpoint /content/train_dir/default_experiment/checkpoint_p0/checkpoint_000002444_10010624.pth... +[2024-09-01 18:11:38,731][00194] Num frames 100... +[2024-09-01 18:11:38,935][00194] Num frames 200... +[2024-09-01 18:11:39,168][00194] Num frames 300... +[2024-09-01 18:11:39,401][00194] Num frames 400... +[2024-09-01 18:11:39,613][00194] Num frames 500... +[2024-09-01 18:11:39,834][00194] Num frames 600... +[2024-09-01 18:11:39,946][00194] Avg episode rewards: #0: 12.250, true rewards: #0: 6.250 +[2024-09-01 18:11:39,948][00194] Avg episode reward: 12.250, avg true_objective: 6.250 +[2024-09-01 18:11:40,113][00194] Num frames 700... +[2024-09-01 18:11:40,322][00194] Num frames 800... +[2024-09-01 18:11:40,521][00194] Num frames 900... +[2024-09-01 18:11:40,738][00194] Num frames 1000... +[2024-09-01 18:11:40,946][00194] Num frames 1100... +[2024-09-01 18:11:41,160][00194] Num frames 1200... +[2024-09-01 18:11:41,357][00194] Num frames 1300... +[2024-09-01 18:11:41,473][00194] Avg episode rewards: #0: 13.145, true rewards: #0: 6.645 +[2024-09-01 18:11:41,476][00194] Avg episode reward: 13.145, avg true_objective: 6.645 +[2024-09-01 18:11:41,625][00194] Num frames 1400... +[2024-09-01 18:11:41,835][00194] Num frames 1500... +[2024-09-01 18:11:42,042][00194] Num frames 1600... +[2024-09-01 18:11:42,279][00194] Num frames 1700... +[2024-09-01 18:11:42,473][00194] Num frames 1800... +[2024-09-01 18:11:42,684][00194] Avg episode rewards: #0: 12.243, true rewards: #0: 6.243 +[2024-09-01 18:11:42,686][00194] Avg episode reward: 12.243, avg true_objective: 6.243 +[2024-09-01 18:11:42,743][00194] Num frames 1900... +[2024-09-01 18:11:42,953][00194] Num frames 2000... +[2024-09-01 18:11:43,175][00194] Num frames 2100... +[2024-09-01 18:11:43,408][00194] Num frames 2200... +[2024-09-01 18:11:43,817][00194] Num frames 2300... +[2024-09-01 18:11:44,193][00194] Num frames 2400... +[2024-09-01 18:11:44,413][00194] Num frames 2500... +[2024-09-01 18:11:44,636][00194] Num frames 2600... +[2024-09-01 18:11:44,857][00194] Num frames 2700... +[2024-09-01 18:11:44,927][00194] Avg episode rewards: #0: 12.763, true rewards: #0: 6.762 +[2024-09-01 18:11:44,930][00194] Avg episode reward: 12.763, avg true_objective: 6.762 +[2024-09-01 18:11:45,137][00194] Num frames 2800... +[2024-09-01 18:11:45,356][00194] Num frames 2900... +[2024-09-01 18:11:45,566][00194] Num frames 3000... +[2024-09-01 18:11:45,802][00194] Num frames 3100... +[2024-09-01 18:11:46,017][00194] Num frames 3200... +[2024-09-01 18:11:46,239][00194] Num frames 3300... +[2024-09-01 18:11:46,460][00194] Num frames 3400... +[2024-09-01 18:11:46,684][00194] Num frames 3500... +[2024-09-01 18:11:46,918][00194] Num frames 3600... +[2024-09-01 18:11:47,248][00194] Num frames 3700... +[2024-09-01 18:11:47,533][00194] Num frames 3800... +[2024-09-01 18:11:47,831][00194] Num frames 3900... +[2024-09-01 18:11:48,120][00194] Num frames 4000... +[2024-09-01 18:11:48,416][00194] Num frames 4100... +[2024-09-01 18:11:48,725][00194] Num frames 4200... +[2024-09-01 18:11:49,072][00194] Num frames 4300... +[2024-09-01 18:11:49,281][00194] Avg episode rewards: #0: 19.292, true rewards: #0: 8.692 +[2024-09-01 18:11:49,285][00194] Avg episode reward: 19.292, avg true_objective: 8.692 +[2024-09-01 18:11:49,485][00194] Num frames 4400... +[2024-09-01 18:11:49,830][00194] Num frames 4500... +[2024-09-01 18:11:50,087][00194] Num frames 4600... +[2024-09-01 18:11:50,306][00194] Num frames 4700... +[2024-09-01 18:11:50,510][00194] Num frames 4800... +[2024-09-01 18:11:50,725][00194] Num frames 4900... +[2024-09-01 18:11:50,953][00194] Num frames 5000... +[2024-09-01 18:11:51,203][00194] Num frames 5100... +[2024-09-01 18:11:51,365][00194] Avg episode rewards: #0: 19.243, true rewards: #0: 8.577 +[2024-09-01 18:11:51,367][00194] Avg episode reward: 19.243, avg true_objective: 8.577 +[2024-09-01 18:11:51,488][00194] Num frames 5200... +[2024-09-01 18:11:51,699][00194] Num frames 5300... +[2024-09-01 18:11:51,911][00194] Num frames 5400... +[2024-09-01 18:11:52,132][00194] Num frames 5500... +[2024-09-01 18:11:52,337][00194] Num frames 5600... +[2024-09-01 18:11:52,550][00194] Num frames 5700... +[2024-09-01 18:11:52,780][00194] Num frames 5800... +[2024-09-01 18:11:53,000][00194] Num frames 5900... +[2024-09-01 18:11:53,226][00194] Num frames 6000... +[2024-09-01 18:11:53,435][00194] Num frames 6100... +[2024-09-01 18:11:53,656][00194] Num frames 6200... +[2024-09-01 18:11:53,877][00194] Num frames 6300... +[2024-09-01 18:11:54,108][00194] Num frames 6400... +[2024-09-01 18:11:54,340][00194] Num frames 6500... +[2024-09-01 18:11:54,555][00194] Num frames 6600... +[2024-09-01 18:11:54,777][00194] Num frames 6700... +[2024-09-01 18:11:55,000][00194] Num frames 6800... +[2024-09-01 18:11:55,233][00194] Num frames 6900... +[2024-09-01 18:11:55,448][00194] Num frames 7000... +[2024-09-01 18:11:55,668][00194] Num frames 7100... +[2024-09-01 18:11:55,878][00194] Num frames 7200... +[2024-09-01 18:11:56,038][00194] Avg episode rewards: #0: 24.637, true rewards: #0: 10.351 +[2024-09-01 18:11:56,040][00194] Avg episode reward: 24.637, avg true_objective: 10.351 +[2024-09-01 18:11:56,170][00194] Num frames 7300... +[2024-09-01 18:11:56,380][00194] Num frames 7400... +[2024-09-01 18:11:56,602][00194] Num frames 7500... +[2024-09-01 18:11:56,812][00194] Num frames 7600... +[2024-09-01 18:11:57,023][00194] Num frames 7700... +[2024-09-01 18:11:57,268][00194] Num frames 7800... +[2024-09-01 18:11:57,487][00194] Num frames 7900... +[2024-09-01 18:11:57,722][00194] Num frames 8000... +[2024-09-01 18:11:57,956][00194] Num frames 8100... +[2024-09-01 18:11:58,193][00194] Num frames 8200... +[2024-09-01 18:11:58,425][00194] Num frames 8300... +[2024-09-01 18:11:58,640][00194] Num frames 8400... +[2024-09-01 18:11:58,850][00194] Num frames 8500... +[2024-09-01 18:11:59,069][00194] Num frames 8600... +[2024-09-01 18:11:59,285][00194] Num frames 8700... +[2024-09-01 18:11:59,488][00194] Num frames 8800... +[2024-09-01 18:11:59,710][00194] Avg episode rewards: #0: 26.972, true rewards: #0: 11.097 +[2024-09-01 18:11:59,711][00194] Avg episode reward: 26.972, avg true_objective: 11.097 +[2024-09-01 18:11:59,758][00194] Num frames 8900... +[2024-09-01 18:11:59,995][00194] Num frames 9000... +[2024-09-01 18:12:00,319][00194] Num frames 9100... +[2024-09-01 18:12:00,590][00194] Num frames 9200... +[2024-09-01 18:12:00,872][00194] Num frames 9300... +[2024-09-01 18:12:01,151][00194] Num frames 9400... +[2024-09-01 18:12:01,434][00194] Num frames 9500... +[2024-09-01 18:12:01,727][00194] Num frames 9600... +[2024-09-01 18:12:02,050][00194] Num frames 9700... +[2024-09-01 18:12:02,386][00194] Num frames 9800... +[2024-09-01 18:12:02,696][00194] Num frames 9900... +[2024-09-01 18:12:03,013][00194] Num frames 10000... +[2024-09-01 18:12:03,293][00194] Num frames 10100... +[2024-09-01 18:12:03,519][00194] Num frames 10200... +[2024-09-01 18:12:03,735][00194] Num frames 10300... +[2024-09-01 18:12:03,973][00194] Num frames 10400... +[2024-09-01 18:12:04,211][00194] Num frames 10500... +[2024-09-01 18:12:04,451][00194] Num frames 10600... +[2024-09-01 18:12:04,551][00194] Avg episode rewards: #0: 28.908, true rewards: #0: 11.797 +[2024-09-01 18:12:04,553][00194] Avg episode reward: 28.908, avg true_objective: 11.797 +[2024-09-01 18:12:04,735][00194] Num frames 10700... +[2024-09-01 18:12:04,952][00194] Num frames 10800... +[2024-09-01 18:12:05,173][00194] Num frames 10900... +[2024-09-01 18:12:05,378][00194] Num frames 11000... +[2024-09-01 18:12:05,604][00194] Num frames 11100... +[2024-09-01 18:12:05,821][00194] Num frames 11200... +[2024-09-01 18:12:06,025][00194] Num frames 11300... +[2024-09-01 18:12:06,240][00194] Num frames 11400... +[2024-09-01 18:12:06,449][00194] Num frames 11500... +[2024-09-01 18:12:06,664][00194] Num frames 11600... +[2024-09-01 18:12:06,878][00194] Num frames 11700... +[2024-09-01 18:12:07,103][00194] Num frames 11800... +[2024-09-01 18:12:07,318][00194] Num frames 11900... +[2024-09-01 18:12:07,539][00194] Num frames 12000... +[2024-09-01 18:12:07,751][00194] Num frames 12100... +[2024-09-01 18:12:07,865][00194] Avg episode rewards: #0: 30.226, true rewards: #0: 12.126 +[2024-09-01 18:12:07,867][00194] Avg episode reward: 30.226, avg true_objective: 12.126 +[2024-09-01 18:13:29,016][00194] Replay video saved to /content/train_dir/default_experiment/replay.mp4!