参考资料
https://zhuanlan.zhihu.com/p/26985029
https://morvanzhou.github.io/tutorials/machine-learning/reinforcement-learning/4-4-gym/
import gym //导入模块
from policynet import PolicyGradient
import matplotlib.pyplot as plt
import time
DISPLAY_REWARD_THRESHOLD = 1000
RENDER = False
#创建一个环境
env = gym.make('CartPole-v0') //创建一个小车倒立摆模型
env.seed(1)
env = env.unwrapped
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)
RL = PolicyGradient(
n_actions=env.action_space.n,
n_features=env.observation_space.shape[0],
learning_rate=0.02,
reward_decay=0.99,
)
#学习过程
for i_episode in range(85):
observation = env.reset()
while True:
if RENDER: env.render()
#采样动作,探索环境
# action = RL.choos