gym库的基本使用方法

1、gym编程范式

import gym
env = gym.make('CartPole-v0')
for episode in range(20):
    observation = env.reset() #环境重置
    for timestep in range(100):
        env.render() #可视化
        print(observation)
        action = env.action_space.sample() #动作采样
        observation, reward, done, info = env.step(action) #单步交互
        if done:
            print(observation)
            print("Episode {} finished after {} timestep".format(episode, timesteps+1))
            break
env.close()

2、环境对象env

属性:

  • env.observation_space:状态空间 (Box连续型还有属性low / high)
  • env.action_space:动作空间 (Box连续型还有属性low / high)

方法:

  • observation = env.reset():环境重置
  • env.render() :可视化
  • observation, reward, done, info = env.step(action):单步交互
  • env.close():关闭环境
  • env.seed():提供随机数工具

3、gym.spaces

定义状态空间(observation space)、动作空间(action space)。

from gym import spaces

离散对象:

  • spaces.Discrete(3, start=-1) # {-1, 0, 1}

连续对象:

  • spaces.Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) #Box(3, 4)
  • spaces.Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) #Box(2,)

方法:

  • sample():在空间中抽样
  • contains(x):判断x是否属于空间

4、gym.envs

from gym import envs
print(envs.registry.all()) #查看当前所有环境

5、自定义环境

import gym
import numpy as np
from gym import spaces
from gym.envs.classic_control import rendering

class GridMapEnv(gym.Env):

    def __init__(self):
        self.x = spaces.Discrete(4)
        self.y = spaces.Discrete(4)
        self.observation_space = np.array([self.x, self.y])
        self.action_space = spaces.Discrete(4)

        self.action_dict = {}
        self.action_dict[0] = np.array([1, 0])
        self.action_dict[1] = np.array([0, -1])
        self.action_dict[2] = np.array([-1, 0])
        self.action_dict[3] = np.array([0, 1])

        self.viewer = None
        self.terminate_states = np.array([[4, 2],])
        self.obstacle_region = np.array([[3,0], [3,1], [0,2], [1,2], [0,2], [0,3], [0,4]])

    def seed(self, seed=None):
        pass

    def reset(self):
        self.state = np.array([self.x.sample(), self.y.sample()])
        return self.state

    def step(self, action):
        action = self.action_dict[action]
        self.state = self.state + action  # 状态转移
        if self.state in self.obstacle_region:
            reward = -1
            done = False
        elif self.state in self.terminate_states:
            reward = 1
            done = True
        else:
            reward = -0.1
            done = False
        return self.state, reward, done, {}

    def render(self, mode="human"):
    # 定义窗口 -> 定义形状 -> 添加形状 -> 可视化
        if self.viewer is None:
            # 创建窗口
            screen_width = 600
            screen_height = 600
            self.viewer = rendering.Viewer(screen_width, screen_height)
            
            # 创建网格环境
            self.line1 = rendering.Line((100,100), (500,100)) # 定义线段(用两端点坐标)
            self.line2 = rendering.Line((100,180), (500,180))
            self.line3 = rendering.Line((100,260), (500,260))
            self.line4 = rendering.Line((100,340), (500,340))
            self.line5 = rendering.Line((100,420), (500,420))
            self.line6 = rendering.Line((100,500), (500,500))

            self.line7 = rendering.Line((100,100), (100,500))
            self.line8 = rendering.Line((180,100), (180,500))
            self.line9 = rendering.Line((260,100), (260,500))
            self.line10 = rendering.Line((340,100), (340,500))
            self.line11 = rendering.Line((420,100), (420,500))
            self.line12 = rendering.Line((500,100), (500,500))
            
            self.line1.set_color(0, 0, 0) # 设置线段颜色
            self.line2.set_color(0, 0, 0)
            self.line3.set_color(0, 0, 0)
            self.line4.set_color(0, 0, 0)
            self.line5.set_color(0, 0, 0)
            self.line6.set_color(0, 0, 0)
            self.line7.set_color(0, 0, 0)
            self.line8.set_color(0, 0, 0)
            self.line9.set_color(0, 0, 0)
            self.line10.set_color(0, 0, 0)
            self.line11.set_color(0, 0, 0)
            self.line12.set_color(0, 0, 0)
            
            # 障碍物
            self.wall1 = rendering.FilledPolygon([(100,260), (260,260), (260,340), (100,340)]) # 定义多边形(四边形用四个顶点坐标)
            self.wall1.set_color(0, 0, 0)

            self.wall2 = rendering.FilledPolygon([(340,340), (420,340), (420,500), (340,500)])
            self.wall2.set_color(0, 0, 0)

            self.wall3 = rendering.FilledPolygon([(260,100), (500,100), (500,180), (260,180)])
            self.wall3.set_color(0, 0, 0)

            # 出口
            self.exit = rendering.make_circle(40) # 定义圆形,设置半径
            self.circle_trans = rendering.Transform(translation=(460, 300)) # 设置圆心
            self.exit.add_attr(self.circle_trans) #给圆添加圆心属性
            self.exit.set_color(1, 0.9, 0)

            # 智能体
            self.left_vertex = 100 + 80 * self.state # 以左顶点为参考顶点,用self是为了扩大作用域,以便更新智能体位置
            right_vertex = (self.left_vertex[0]+80, self.left_vertex[1])
            up_vertex = (self.left_vertex[0]+40, self.left_vertex[1]+80)
            self.agent = rendering.FilledPolygon([self.left_vertex, right_vertex, up_vertex]) 
            self.agentrans = rendering.Transform() #定义Transform对象
            self.agent.add_attr(self.agentrans) #将Transform对象加入智能体(后面通过self.agentrans.set_translation(相对初始坐标变化)可以更新智能体位置)
            self.agent.set_color(1, 0, 0)

            # 在窗口展示列表中添加图形
            self.viewer.add_geom(self.line1)
            self.viewer.add_geom(self.line2)
            self.viewer.add_geom(self.line3)
            self.viewer.add_geom(self.line4)
            self.viewer.add_geom(self.line5)
            self.viewer.add_geom(self.line6)
            self.viewer.add_geom(self.line7)
            self.viewer.add_geom(self.line8)
            self.viewer.add_geom(self.line9)
            self.viewer.add_geom(self.line10)
            self.viewer.add_geom(self.line11)
            self.viewer.add_geom(self.line12)
            
            self.viewer.add_geom(self.wall1)
            self.viewer.add_geom(self.wall2)
            self.viewer.add_geom(self.wall3)
            self.viewer.add_geom(self.exit)
            self.viewer.add_geom(self.agent)
        
        # 更新智能体相对位置变化
        trans = 100 + 80 * self.state - self.left_vertex
        self.agentrans.set_translation(trans[0], trans[1])

        # 在窗口中展示图形
        return self.viewer.render(return_rgb_array=mode == "rgb_array")

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值