gym库的基本使用方法
1、gym编程范式
import gym
env = gym.make('CartPole-v0')
for episode in range(20):
observation = env.reset() #环境重置
for timestep in range(100):
env.render() #可视化
print(observation)
action = env.action_space.sample() #动作采样
observation, reward, done, info = env.step(action) #单步交互
if done:
print(observation)
print("Episode {} finished after {} timestep".format(episode, timesteps+1))
break
env.close()
2、环境对象env
属性:
- env.observation_space:状态空间 (Box连续型还有属性low / high)
- env.action_space:动作空间 (Box连续型还有属性low / high)
方法:
- observation = env.reset():环境重置
- env.render() :可视化
- observation, reward, done, info = env.step(action):单步交互
- env.close():关闭环境
- env.seed():提供随机数工具
3、gym.spaces
定义状态空间(observation space)、动作空间(action space)。
from gym import spaces
离散对象:
- spaces.Discrete(3, start=-1) # {-1, 0, 1}
连续对象:
- spaces.Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) #Box(3, 4)
- spaces.Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) #Box(2,)
方法:
- sample():在空间中抽样
- contains(x):判断x是否属于空间
4、gym.envs
from gym import envs
print(envs.registry.all()) #查看当前所有环境
5、自定义环境
import gym
import numpy as np
from gym import spaces
from gym.envs.classic_control import rendering
class GridMapEnv(gym.Env):
def __init__(self):
self.x = spaces.Discrete(4)
self.y = spaces.Discrete(4)
self.observation_space = np.array([self.x, self.y])
self.action_space = spaces.Discrete(4)
self.action_dict = {}
self.action_dict[0] = np.array([1, 0])
self.action_dict[1] = np.array([0, -1])
self.action_dict[2] = np.array([-1, 0])
self.action_dict[3] = np.array([0, 1])
self.viewer = None
self.terminate_states = np.array([[4, 2],])
self.obstacle_region = np.array([[3,0], [3,1], [0,2], [1,2], [0,2], [0,3], [0,4]])
def seed(self, seed=None):
pass
def reset(self):
self.state = np.array([self.x.sample(), self.y.sample()])
return self.state
def step(self, action):
action = self.action_dict[action]
self.state = self.state + action # 状态转移
if self.state in self.obstacle_region:
reward = -1
done = False
elif self.state in self.terminate_states:
reward = 1
done = True
else:
reward = -0.1
done = False
return self.state, reward, done, {}
def render(self, mode="human"):
# 定义窗口 -> 定义形状 -> 添加形状 -> 可视化
if self.viewer is None:
# 创建窗口
screen_width = 600
screen_height = 600
self.viewer = rendering.Viewer(screen_width, screen_height)
# 创建网格环境
self.line1 = rendering.Line((100,100), (500,100)) # 定义线段(用两端点坐标)
self.line2 = rendering.Line((100,180), (500,180))
self.line3 = rendering.Line((100,260), (500,260))
self.line4 = rendering.Line((100,340), (500,340))
self.line5 = rendering.Line((100,420), (500,420))
self.line6 = rendering.Line((100,500), (500,500))
self.line7 = rendering.Line((100,100), (100,500))
self.line8 = rendering.Line((180,100), (180,500))
self.line9 = rendering.Line((260,100), (260,500))
self.line10 = rendering.Line((340,100), (340,500))
self.line11 = rendering.Line((420,100), (420,500))
self.line12 = rendering.Line((500,100), (500,500))
self.line1.set_color(0, 0, 0) # 设置线段颜色
self.line2.set_color(0, 0, 0)
self.line3.set_color(0, 0, 0)
self.line4.set_color(0, 0, 0)
self.line5.set_color(0, 0, 0)
self.line6.set_color(0, 0, 0)
self.line7.set_color(0, 0, 0)
self.line8.set_color(0, 0, 0)
self.line9.set_color(0, 0, 0)
self.line10.set_color(0, 0, 0)
self.line11.set_color(0, 0, 0)
self.line12.set_color(0, 0, 0)
# 障碍物
self.wall1 = rendering.FilledPolygon([(100,260), (260,260), (260,340), (100,340)]) # 定义多边形(四边形用四个顶点坐标)
self.wall1.set_color(0, 0, 0)
self.wall2 = rendering.FilledPolygon([(340,340), (420,340), (420,500), (340,500)])
self.wall2.set_color(0, 0, 0)
self.wall3 = rendering.FilledPolygon([(260,100), (500,100), (500,180), (260,180)])
self.wall3.set_color(0, 0, 0)
# 出口
self.exit = rendering.make_circle(40) # 定义圆形,设置半径
self.circle_trans = rendering.Transform(translation=(460, 300)) # 设置圆心
self.exit.add_attr(self.circle_trans) #给圆添加圆心属性
self.exit.set_color(1, 0.9, 0)
# 智能体
self.left_vertex = 100 + 80 * self.state # 以左顶点为参考顶点,用self是为了扩大作用域,以便更新智能体位置
right_vertex = (self.left_vertex[0]+80, self.left_vertex[1])
up_vertex = (self.left_vertex[0]+40, self.left_vertex[1]+80)
self.agent = rendering.FilledPolygon([self.left_vertex, right_vertex, up_vertex])
self.agentrans = rendering.Transform() #定义Transform对象
self.agent.add_attr(self.agentrans) #将Transform对象加入智能体(后面通过self.agentrans.set_translation(相对初始坐标变化)可以更新智能体位置)
self.agent.set_color(1, 0, 0)
# 在窗口展示列表中添加图形
self.viewer.add_geom(self.line1)
self.viewer.add_geom(self.line2)
self.viewer.add_geom(self.line3)
self.viewer.add_geom(self.line4)
self.viewer.add_geom(self.line5)
self.viewer.add_geom(self.line6)
self.viewer.add_geom(self.line7)
self.viewer.add_geom(self.line8)
self.viewer.add_geom(self.line9)
self.viewer.add_geom(self.line10)
self.viewer.add_geom(self.line11)
self.viewer.add_geom(self.line12)
self.viewer.add_geom(self.wall1)
self.viewer.add_geom(self.wall2)
self.viewer.add_geom(self.wall3)
self.viewer.add_geom(self.exit)
self.viewer.add_geom(self.agent)
# 更新智能体相对位置变化
trans = 100 + 80 * self.state - self.left_vertex
self.agentrans.set_translation(trans[0], trans[1])
# 在窗口中展示图形
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None