从examples入手,理解ExperimentGrid
ppo_pytorch
from spinup.algos.pytorch.ppo.ppo import ppo as ppo_pytorch
----ppo
self.pi是actor网络(动作空间是box-连续,使用mlpgaussian; 动作空间是离散使用mlpcategorical), self.v 是评论家网络。
当参数是[32,32]时的网络。
--------------当从脚本启动多个实验时----
理解是,隐藏层最多一层。整个网络最少是两层的。最多是三层的。
from spinup.utils.run_utils import ExperimentGrid
from spinup import ppo_pytorch
import torch
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--cpu', type=int, default=4)
parser.add_argument('--num_runs', type=int, default=3)
args = parser.parse_args()
eg = ExperimentGrid(name='ppo-pyt-bench')
eg.add('env_name', 'CartPole-v0', '', True)
eg.add('seed', [10*i for i in range(args.num_runs)])
eg.add('epochs', 10)
eg.add('steps_per_epoch', 4000)
eg.add('ac_kwargs:hidden_sizes', [(32,), (64,64)], 'hid')
eg.add('ac_kwargs:activation', [torch.nn.Tanh, torch.nn.ReLU], '')
eg.run(ppo_pytorch, num_cpu=args.cpu)
critic_net MLPCritic(
(v_net): Sequential(
(0): Linear(in_features=4, out_features=32, bias=True)
(1): Tanh()
(2): Linear(in_features=32, out_features=1, bias=True)
(3): Identity()
)
actor_net MLPCategoricalActor(
(logits_net): Sequential(
(0): Linear(in_features=4, out_features=32, bias=True)
(1): ReLU()
(2): Linear(in_features=32, out_features=2, bias=True)
(3): Identity()
)
)
actor_net MLPCategoricalActor(
(logits_net): Sequential(
(0): Linear(in_features=4, out_features=64, bias=True)
(1): ReLU()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): ReLU()
(4): Linear(in_features=64, out_features=2, bias=True)
(5): Identity()
)
)
actor_net MLPCategoricalActor(
(logits_net): Sequential(
(0): Linear(in_features=4, out_features=64, bias=True)
(1): Tanh()
(2): Linear(in_features=64, out_features=64, bias=True)
(3): Tanh()
(4): Linear(in_features=64, out_features=2, bias=True)
(5): Identity()
)
)
本次脚本的结果对比图
(spinningup) rlon@ubuntu:~/spinningup/spinningup$ python -m spinup.run plot /home/rlon/spinningup/spinningup/data/