import numpy as np
import gym
from gym.spaces import Discrete
class SnakeEnv(gym.Env):
SIZE=100 # 格子数
def __init__(self, ladder_num, dices):
self.dices = dices # 不同骰子方法的最大值
self.ladder_num = ladder_num # 梯子数
# 构建梯子
self.ladders = dict(np.random.randint(1, self.SIZE, size=(self.ladder_num, 2)))
temp = dict()
for k,v in self.ladders.items():
temp[v] = k
self.ladders.update(temp)
self.pos = 1
self.observation_space=Discrete(self.SIZE+1) # 状态空间
self.action_space=Discrete(len(dices)) # 行为
def reset(self):
self.pos = 1
return self.pos
def step(self, a):
step = np.random.randint(1, self.dices[a] + 1) # a为选择的骰子编号,然后随机
self.pos += step
if self.pos == SIZE:
return SIZE, SIZE, 1, {}
elif self.pos > SIZE:
self.pos = SIZE*2 - self.pos
if self.pos in self.ladders: # 是否有梯子
self.pos = self.ladders[self.pos]
return self.pos, -1, 0, {}
def reward(self, s):
if s == SIZE: # 到达终点
return SIZE
else:
return -1
def render(self):
pass
测试
env = SnakeEnv(10, [3,6])
env.reset()
while True:
state, reward, terminate, _ = env.step(0)
print(reward, state)
if terminate == 1:
break