PPO playing MountainCar-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/983cb75e43e51cf4ef57f177194ab9a4a1a8808b
3cc5c1d
| from typing import Sequence, Type | |
| import numpy as np | |
| import torch.nn as nn | |
| def mlp( | |
| layer_sizes: Sequence[int], | |
| activation: Type[nn.Module], | |
| output_activation: Type[nn.Module] = nn.Identity, | |
| init_layers_orthogonal: bool = False, | |
| final_layer_gain: float = np.sqrt(2), | |
| hidden_layer_gain: float = np.sqrt(2), | |
| ) -> nn.Module: | |
| layers = [] | |
| for i in range(len(layer_sizes) - 2): | |
| layers.append( | |
| layer_init( | |
| nn.Linear(layer_sizes[i], layer_sizes[i + 1]), | |
| init_layers_orthogonal, | |
| std=hidden_layer_gain, | |
| ) | |
| ) | |
| layers.append(activation()) | |
| layers.append( | |
| layer_init( | |
| nn.Linear(layer_sizes[-2], layer_sizes[-1]), | |
| init_layers_orthogonal, | |
| std=final_layer_gain, | |
| ) | |
| ) | |
| layers.append(output_activation()) | |
| return nn.Sequential(*layers) | |
| def layer_init( | |
| layer: nn.Module, init_layers_orthogonal: bool, std: float = np.sqrt(2) | |
| ) -> nn.Module: | |
| if not init_layers_orthogonal: | |
| return layer | |
| nn.init.orthogonal_(layer.weight, std) # type: ignore | |
| nn.init.constant_(layer.bias, 0.0) # type: ignore | |
| return layer | |