butchland's picture
Add poca SoccerTwos MARL model
c968226
{
"name": "root",
"gauges": {
"SoccerTwos.Policy.Entropy.mean": {
"value": 3.183333396911621,
"min": 3.175123691558838,
"max": 3.295633316040039,
"count": 100
},
"SoccerTwos.Policy.Entropy.sum": {
"value": 55008.0,
"min": 32932.35546875,
"max": 146079.828125,
"count": 100
},
"SoccerTwos.Environment.EpisodeLength.mean": {
"value": 999.0,
"min": 378.9230769230769,
"max": 999.0,
"count": 100
},
"SoccerTwos.Environment.EpisodeLength.sum": {
"value": 19980.0,
"min": 16392.0,
"max": 23396.0,
"count": 100
},
"SoccerTwos.Self-play.ELO.mean": {
"value": 1225.009838754105,
"min": 1195.553510493238,
"max": 1225.009838754105,
"count": 84
},
"SoccerTwos.Self-play.ELO.sum": {
"value": 9800.07871003284,
"min": 2392.555713048816,
"max": 16858.30160533153,
"count": 84
},
"SoccerTwos.Step.mean": {
"value": 999726.0,
"min": 9446.0,
"max": 999726.0,
"count": 100
},
"SoccerTwos.Step.sum": {
"value": 999726.0,
"min": 9446.0,
"max": 999726.0,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.mean": {
"value": -0.006927570793777704,
"min": -0.08005482703447342,
"max": 0.018411612138152122,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicBaselineEstimate.sum": {
"value": -0.06927570700645447,
"min": -1.8708744049072266,
"max": 0.2761741876602173,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.mean": {
"value": -0.005961025599390268,
"min": -0.0799911767244339,
"max": 0.018505746498703957,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicValueEstimate.sum": {
"value": -0.05961025506258011,
"min": -1.986641526222229,
"max": 0.2775861918926239,
"count": 100
},
"SoccerTwos.Environment.CumulativeReward.mean": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 100
},
"SoccerTwos.Environment.CumulativeReward.sum": {
"value": 0.0,
"min": 0.0,
"max": 0.0,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicReward.mean": {
"value": 0.0,
"min": -0.3333333333333333,
"max": 0.23001904714675175,
"count": 100
},
"SoccerTwos.Policy.ExtrinsicReward.sum": {
"value": 0.0,
"min": -7.63320004940033,
"max": 5.381599962711334,
"count": 100
},
"SoccerTwos.Environment.GroupCumulativeReward.mean": {
"value": 0.0,
"min": -0.3333333333333333,
"max": 0.23001904714675175,
"count": 100
},
"SoccerTwos.Environment.GroupCumulativeReward.sum": {
"value": 0.0,
"min": -7.63320004940033,
"max": 5.381599962711334,
"count": 100
},
"SoccerTwos.IsTraining.mean": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"SoccerTwos.IsTraining.sum": {
"value": 1.0,
"min": 1.0,
"max": 1.0,
"count": 100
},
"SoccerTwos.Losses.PolicyLoss.mean": {
"value": 0.020165784686772287,
"min": 0.012667080728958051,
"max": 0.021834981829548874,
"count": 47
},
"SoccerTwos.Losses.PolicyLoss.sum": {
"value": 0.020165784686772287,
"min": 0.012667080728958051,
"max": 0.021834981829548874,
"count": 47
},
"SoccerTwos.Losses.ValueLoss.mean": {
"value": 0.0033051648642867804,
"min": 1.966373759690517e-05,
"max": 0.006081393399896721,
"count": 47
},
"SoccerTwos.Losses.ValueLoss.sum": {
"value": 0.0033051648642867804,
"min": 1.966373759690517e-05,
"max": 0.006081393399896721,
"count": 47
},
"SoccerTwos.Losses.BaselineLoss.mean": {
"value": 0.0032961511014339825,
"min": 1.9480713702553962e-05,
"max": 0.005507372653422256,
"count": 47
},
"SoccerTwos.Losses.BaselineLoss.sum": {
"value": 0.0032961511014339825,
"min": 1.9480713702553962e-05,
"max": 0.005507372653422256,
"count": 47
},
"SoccerTwos.Policy.LearningRate.mean": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 47
},
"SoccerTwos.Policy.LearningRate.sum": {
"value": 0.0003,
"min": 0.0003,
"max": 0.0003,
"count": 47
},
"SoccerTwos.Policy.Epsilon.mean": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 47
},
"SoccerTwos.Policy.Epsilon.sum": {
"value": 0.20000000000000007,
"min": 0.20000000000000007,
"max": 0.20000000000000007,
"count": 47
},
"SoccerTwos.Policy.Beta.mean": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 47
},
"SoccerTwos.Policy.Beta.sum": {
"value": 0.005000000000000001,
"min": 0.005000000000000001,
"max": 0.005000000000000001,
"count": 47
}
},
"metadata": {
"timer_format_version": "0.1.0",
"start_time_seconds": "1679716667",
"python_version": "3.9.16 (main, Mar 8 2023, 14:00:05) \n[GCC 11.2.0]",
"command_line_arguments": "/home/butch2/miniconda3/envs/rl/bin/mlagents-learn ./config/poca/SoccerTwos.yaml --env=./training-envs-executables/SoccerTwos.x86_64 --run-id=SoccerTwos --no-graphics",
"mlagents_version": "0.31.0.dev0",
"mlagents_envs_version": "0.31.0.dev0",
"communication_protocol_version": "1.5.0",
"pytorch_version": "1.11.0+cu102",
"numpy_version": "1.21.2",
"end_time_seconds": "1679722719"
},
"total": 6052.4713283070005,
"count": 1,
"self": 10.029550414001278,
"children": {
"run_training.setup": {
"total": 0.025254018999476102,
"count": 1,
"self": 0.025254018999476102
},
"TrainerController.start_learning": {
"total": 6042.416523874,
"count": 1,
"self": 3.090648444080216,
"children": {
"TrainerController._reset_env": {
"total": 8.808450219000406,
"count": 48,
"self": 8.808450219000406
},
"TrainerController.advance": {
"total": 6030.014997633918,
"count": 85130,
"self": 3.51347794809044,
"children": {
"env_step": {
"total": 3387.099965223041,
"count": 85130,
"self": 2857.35695773122,
"children": {
"SubprocessEnvManager._take_step": {
"total": 527.5617834530613,
"count": 85130,
"self": 23.003023083606422,
"children": {
"TorchPolicy.evaluate": {
"total": 504.55876036945483,
"count": 169136,
"self": 504.55876036945483
}
}
},
"workers": {
"total": 2.181224038759865,
"count": 85130,
"self": 0.0,
"children": {
"worker_root": {
"total": 6030.703722336968,
"count": 85130,
"is_parallel": true,
"self": 3571.9866418877964,
"children": {
"run_training.setup": {
"total": 0.0,
"count": 0,
"is_parallel": true,
"self": 0.0,
"children": {
"steps_from_proto": {
"total": 0.0064254680000885855,
"count": 2,
"is_parallel": true,
"self": 0.0024875500002963236,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.003937917999792262,
"count": 8,
"is_parallel": true,
"self": 0.003937917999792262
}
}
},
"UnityEnvironment.step": {
"total": 0.03386911899997358,
"count": 1,
"is_parallel": true,
"self": 0.0004891250009677606,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 0.0003880499998558662,
"count": 1,
"is_parallel": true,
"self": 0.0003880499998558662
},
"communicator.exchange": {
"total": 0.03149792000021989,
"count": 1,
"is_parallel": true,
"self": 0.03149792000021989
},
"steps_from_proto": {
"total": 0.0014940239989300608,
"count": 2,
"is_parallel": true,
"self": 0.00033843100027297623,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.0011555929986570845,
"count": 8,
"is_parallel": true,
"self": 0.0011555929986570845
}
}
}
}
}
}
},
"UnityEnvironment.step": {
"total": 2458.576440918172,
"count": 85129,
"is_parallel": true,
"self": 84.49239270501675,
"children": {
"UnityEnvironment._generate_step_input": {
"total": 63.64907529815446,
"count": 85129,
"is_parallel": true,
"self": 63.64907529815446
},
"communicator.exchange": {
"total": 2052.7096116610655,
"count": 85129,
"is_parallel": true,
"self": 2052.7096116610655
},
"steps_from_proto": {
"total": 257.72536125393526,
"count": 170258,
"is_parallel": true,
"self": 53.75866134829448,
"children": {
"_process_rank_one_or_two_observation": {
"total": 203.96669990564078,
"count": 681032,
"is_parallel": true,
"self": 203.96669990564078
}
}
}
}
},
"steps_from_proto": {
"total": 0.1406395309995787,
"count": 94,
"is_parallel": true,
"self": 0.02893658700031665,
"children": {
"_process_rank_one_or_two_observation": {
"total": 0.11170294399926206,
"count": 376,
"is_parallel": true,
"self": 0.11170294399926206
}
}
}
}
}
}
}
}
},
"trainer_advance": {
"total": 2639.4015544627864,
"count": 85130,
"self": 19.574614394817218,
"children": {
"process_trajectory": {
"total": 310.0487905389673,
"count": 85130,
"self": 308.67745128396746,
"children": {
"RLTrainer._checkpoint": {
"total": 1.3713392549998389,
"count": 2,
"self": 1.3713392549998389
}
}
},
"_update_policy": {
"total": 2309.778149529002,
"count": 47,
"self": 241.54033889600487,
"children": {
"TorchPOCAOptimizer.update": {
"total": 2068.237810632997,
"count": 1410,
"self": 2068.237810632997
}
}
}
}
}
}
},
"trainer_threads": {
"total": 1.1920001270482317e-06,
"count": 1,
"self": 1.1920001270482317e-06
},
"TrainerController._save_models": {
"total": 0.5024263850009447,
"count": 1,
"self": 0.015784387001986033,
"children": {
"RLTrainer._checkpoint": {
"total": 0.4866419979989587,
"count": 1,
"self": 0.4866419979989587
}
}
}
}
}
}
}