如何玩转gym游戏:

小车代码:

import gym
env = gym.make('MountainCar-v0', render_mode = 'human')
for i_episode in range(10):
    observation = env.reset()
    for t in range(100):
        env.render()
        print(observation)
        action = env.action_space.sample()
        observation, reward, done, info, _ = env.step(action)
    if done:
        print("Episode finished after {} timesteps".format(t+1))
        break
env.close()
倒立摆代码:

import gym
env = gym.make('CartPole-v1', render_mode = "human")
for episode in range(10):
  env.reset()
  print("Episode finished after {} timesteps".format(episode))
  for _ in range(100):
    env.render()
    env.step(env.action_space.sample())
env.close()

查看gym版本:

运行

import gym #导入gym
env = gym.make("MountainCar-v0") #导入环境MountainCar-v0,你也可以试试其他环境,比如CartPole-v0等
env.reset() #初始化环境
for step in range(1000): #开始交互
env.render() #env.render()方法用于显示环境状态,渲染画面
action = env.action_space.sample() #利用env.action_space.sample()方法在动作空间中随机抽取一个动作,并把这个动作赋给action
observation, reward, done, info = env.step(action) #把动作提交给环境,并存储环境反馈的信息
print(observation, reward, done, info) #打印环境反馈的信息
env.close()

需要安装gym-0.25.2

gym小车登顶游戏

import gym
import numpy as np

env = gym.make("MountainCar-v0") # 构建实验环境


class BespokeAgent: # 自定义的智能体类
def __init__(self, env):
pass

def decide(self, observation): # 决策
position, velocity = observation
lb = min(-0.09 * (position + 0.25) ** 2 + 0.03, 0.3 * (position + 0.9) ** 4 - 0.008)
ub = -0.07 * (position + 0.38) ** 2 + 0.07
if lb < velocity < ub:
action = 2
else:
action = 0
return action

def learn(self, *args): # 学习
pass


def play_montecarlo(env, agent, render=False, train=False):
episode_reward = 0.
observation = env.reset()
while True:
if render:
env.render()
action = agent.decide(observation)
next_observation, reward, done, _ = env.step(action)
episode_reward += reward
if train:
agent.learn(observation, action, reward, done)
if done:
break
observation = next_observation
return episode_reward


agent = BespokeAgent(env)
env.reset(seed=0) # 重置一个回合,设置了一个随机数种子,只是为了方便复现实验结果,通常可以把seed=0删掉

episode_reward = play_montecarlo(env, agent, render=True)
print("回合奖励={}".format(episode_reward))

episode_rewards = [play_montecarlo(env, agent) for _ in range(100)] # 为了评估智能体性能求出连续交互100回合的平均奖励
print("平均回合奖励={}".format(np.mean(episode_rewards)))
env.close() # 关闭环境

  • 23
    点赞
  • 23
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值