import sys
import time
sys.path.append("/home/caomaolin/miniconda3/envs/tf/lib/python3.9/site-packages")
import gym
env = gym.make("CartPole-v1")
observation = env.reset()
index = 0
p=0.1 # 比例参数
d=0.005 # 微分参数
i=0.0
ii = 0.0001
action = env.action_space.sample()
for _ in range(50000):
index = index + 1
env.render()
# observation[2]是角度,observation[3]是角速度
# 小车位置、小车速度、杆子夹角及角变化率
if observation[2]*p + observation[1]*i + observation[3]*d + observation[0]*ii> 0:
action = 1
else:
action = 0
observation, reward, done, info = env.step(action)
# print(observation)
# print(reward)
time.sleep(0.01)
if done:
observation = env.reset()
print("game over, index : " + str(index))
index = 0
env.close()
执行结果:
将最大次数调到了5000次,每次都能运行到5000次。
如果只加比例和微分参数,杆子可以直立,但是会往一边移动,加上位置的反馈后,杆子可以稳定
(tf) xx@DESKTOP-EHHP25L:~/works/python/tensorflow/chapter14$ python chapter14.py
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000
game over, index : 5000