poca-SoccerTwos / run_logs /timers.json
ZhaoxiZheng's picture
First Push
3c3747d verified
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 2.895887851715088,
"min": 2.882753849029541,
"max": 3.295729160308838,
"count": 226
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 57454.4140625,
"min": 25651.0703125,
"max": 105463.2890625,
"count": 226
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 146.83333333333334,
"min": 105.3695652173913,
"max": 999.0,
"count": 226
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 21144.0,
"min": 12844.0,
"max": 28068.0,
"count": 226
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1316.8759414108188,
"min": 1198.1496415677789,
"max": 1319.7872740790808,
"count": 219
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 94815.06778157895,
"min": 2400.1128337129485,
"max": 121098.57561141098,
"count": 219
},
"SoccerTwos.Step.mean": {
"value": 2259678.0,
"min": 9050.0,
"max": 2259678.0,
"count": 226
},
"SoccerTwos.Step.sum": {
"value": 2259678.0,
"min": 9050.0,
"max": 2259678.0,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": 0.07486466318368912,
"min": -0.07765861600637436,
"max": 0.11771281063556671,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": 5.3153910636901855,
"min": -2.096782684326172,
"max": 7.782065391540527,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": 0.07389528304338455,
"min": -0.07905006408691406,
"max": 0.12164086103439331,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": 5.246565341949463,
"min": -2.1343517303466797,
"max": 7.895816802978516,
"count": 226
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 226
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.016436619657865713,
"min": -0.7077000001445413,
"max": 0.5363320775751798,
"count": 226
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 1.1669999957084656,
"min": -16.336400032043457,
"max": 28.425600111484528,
"count": 226
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.016436619657865713,
"min": -0.7077000001445413,
"max": 0.5363320775751798,
"count": 226
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 1.1669999957084656,
"min": -16.336400032043457,
"max": 28.425600111484528,
"count": 226
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 226
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 226
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.016837426551501267,
"min": 0.01198086826504247,
"max": 0.024409048135081927,
"count": 106
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.016837426551501267,
"min": 0.01198086826504247,
"max": 0.024409048135081927,
"count": 106
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.03583123578379552,
"min": 0.00015843015717109665,
"max": 0.04081849145392577,
"count": 106
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.03583123578379552,
"min": 0.00015843015717109665,
"max": 0.04081849145392577,
"count": 106
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.036340306202570595,
"min": 0.00016699927970572996,
"max": 0.04152804675201575,
"count": 106
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.036340306202570595,
"min": 0.00016699927970572996,
"max": 0.04152804675201575,
"count": 106
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 106
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 106
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 106
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 106
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 106
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 106
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1735163089",
"python_version": "3.10.12 (main, Jul 5 2023, 15:34:07) [Clang 14.0.6 ]",
"command_line_arguments": "/usr/local/anaconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos/SoccerTwos.app --run-id=SoccerTwos --no-graphics",
"mlagents_version": "1.2.0.dev0",
"mlagents_envs_version": "1.2.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "2.2.2",
"numpy_version": "1.23.5",
"end_time_seconds": "1735172997"
},
"total": 9908.326268667006,
"count": 1,
"self": 0.416268749977462,
"children": {
"run_training.setup": {
"total": 0.030486042029224336,
"count": 1,
"self": 0.030486042029224336
},
"TrainerController.start_learning": {
"total": 9907.879513875,
"count": 1,
"self": 1.8073161386419088,
"children": {
"TrainerController._reset_env": {
"total": 3.9221587099600583,
"count": 12,
"self": 3.9221587099600583
},
"TrainerController.advance": {
"total": 9902.007278526435,
"count": 146925,
"self": 1.6621134666493163,
"children": {
"env_step": {
"total": 7814.5486751960125,
"count": 146925,
"self": 7559.627500034054,
"children": {
"SubprocessEnvManager._take_step": {
"total": 253.58589280885644,
"count": 146925,
"self": 8.642564501496963,
"children": {
"TorchPolicy.evaluate": {
"total": 244.94332830735948,
"count": 289916,
"self": 244.94332830735948
}
}
},
"workers": {
"total": 1.3352823531022295,
"count": 146924,
"self": 0.0,
"children": {
"worker_root": {
"total": 9902.382326544495,
"count": 146924,
"is_parallel": true,
"self": 2541.4676400698954,
"children": {
"steps_from_proto": {
"total": 0.020308081060647964,
"count": 24,
"is_parallel": true,
"self": 0.002620324376039207,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.017687756684608757,
"count": 96,
"is_parallel": true,
"self": 0.017687756684608757
}
}
},
"UnityEnvironment.step": {
"total": 7360.894378393539,
"count": 146924,
"is_parallel": true,
"self": 18.734035621280782,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 123.97077023831662,
"count": 146924,
"is_parallel": true,
"self": 123.97077023831662
},
"communicator.exchange": {
"total": 6978.391198601341,
"count": 146924,
"is_parallel": true,
"self": 6978.391198601341
},
"steps_from_proto": {
"total": 239.79837393260095,
"count": 293848,
"is_parallel": true,
"self": 28.120688659138978,
"children": {
"_process_rank_one_or_two_observation": {
"total": 211.67768527346198,
"count": 1175392,
"is_parallel": true,
"self": 211.67768527346198
}
}
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 2085.796489863773,
"count": 146924,
"self": 14.571734376833774,
"children": {
"process_trajectory": {
"total": 277.76263502566144,
"count": 146924,
"self": 277.1710022756597,
"children": {
"RLTrainer._checkpoint": {
"total": 0.591632750001736,
"count": 4,
"self": 0.591632750001736
}
}
},
"_update_policy": {
"total": 1793.4621204612777,
"count": 106,
"self": 173.52280944015365,
"children": {
"TorchPOCAOptimizer.update": {
"total": 1619.939311021124,
"count": 3180,
"self": 1619.939311021124
}
}
}
}
}
}
},
"trainer_threads": {
"total": 6.670597940683365e-07,
"count": 1,
"self": 6.670597940683365e-07
},
"TrainerController._save_models": {
"total": 0.14275983290281147,
"count": 1,
"self": 0.0012041248846799135,
"children": {
"RLTrainer._checkpoint": {
"total": 0.14155570801813155,
"count": 1,
"self": 0.14155570801813155
}
}
}
}
}
}
}