附加补全:基于一个微分对策问题的机器学习.pdf-机器学习文档类资源-CSDN下载
test12就是第一问和第二问文件
以下是test3_904_19.py
import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from math import cos,sin,pi,sqrt,asin
import random
from os import path
class TEST3(gym.Env):
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}
def __init__(self):
self.v_sheep = 3
self.v_dog = 15
self.dt = 0.1
self.r_fix_sheep = self.v_sheep * self.dt
self.r_dog = self.v_dog * self.dt
self.R = 5
self.include_angle_dog = pi*self.R/(self.r_dog*180)
self.dis_now_dogsheep = self.R
self.reward = 0
self.viewer = None
# theta action
self.action_space = spaces.Box(
low=0, high=180, shape=(1,), dtype = np.float32
)
# sheep&dog theta & r_sheep
self.observation_space = spaces.Box(low=np.array([-179, 180, self.r_fix_sheep]), high=np.array([-179, 180, self.R]),
dtype=np.float32)
self.seed()
def sheep_nextstep(self,theta_sheep, th, r0):
if th < 90:
alpha = th + 90
r1 = sqrt(r0 ** 2 + self.r_fix_sheep ** 2 - 2 * r0 *self.r_fix_sheep* cos(alpha / 180 * pi))
beta = asin(sin(alpha / 180 * pi)) * self.r_fix_sheep / r1 # 夹角
angle = theta_sheep - beta
if th > 90:
alpha = 270 - th
r1 = sqrt(r0 ** 2 + self.r_fix_sheep ** 2 - 2 * r0 *self.r_fix_sheep* cos(alpha / 180 * pi))
beta = asin(sin(alpha / 180 * pi)) * self.r_fix_sheep / r1 # 夹角
angle = theta_sheep + beta
if th == 90:
angle = theta_sheep
r1 = self.r_fix_sheep + r0
return [angle, r1]
def dog_nextstep(self,theta_sheep, theta_dog):
dis_sheep_dog_r = theta_dog + self.include_angle_dog
dis_sheep_dog_l = theta_dog - self.include_angle_dog
if abs(dis_sheep_dog_r - theta_sheep) < 180:
angle_dog = dis_sheep_dog_r
else:
angle_dog = dis_sheep_dog_l
return angle_dog
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, state):
self.state = state
theta_sheep, theta_dog, r_sheep_old = self.state # th := theta
v_sheep = self.v_sheep
v_dog = self.v_dog
dt = self.dt
# r_sheep_old = self.r_sheep
r_dog = self.r_dog
R = self.R
done = 0
th = random.randint(0, 180)
self.last_th = th
next_sheep = self.sheep_nextstep(theta_sheep, th, r_sheep_old)
theta_sheep += next_sheep[0]
if theta_sheep > 180:
theta_sheep = theta_sheep - 360
elif theta_sheep <= -180:
theta_sheep = theta_sheep + 360
r_sheep = next_sheep[1]
theta_dog += self.dog_nextstep(theta_sheep, theta_dog)
if theta_dog > 180:
theta_dog = theta_dog - 360
elif theta_dog <= -180:
theta_dog = theta_dog + 360
self.state = np.array([theta_sheep, theta_dog, r_sheep])
include_angle = abs(theta_sheep - theta_dog)
dis_sheep_dog = sqrt(R ** 2 + r_sheep ** 2 - 2 * R * r_sheep * cos(include_angle))
# 如果当前距离比上次近,则扣分,反之加分
if self.dis_now_dogsheep > dis_sheep_dog:
self.reward -= 1
elif self.dis_now_dogsheep < dis_sheep_dog:
self.reward += 1
self.dis_now_dogsheep = dis_sheep_dog
# 先判断有没有跑出去,再判断被狗抓
if R <= r_sheep:
done = 1
elif dis_sheep_dog <= r_dog:
done = 1
return self._get_obs(), self.reward, done, {}
def reset(self):
# theta_sheep,theta_dog,r_sheep
self.state = self.np_random.uniform(low=np.array([-179, -179, 0]),
high=np.array([180, 180, self.v_sheep * self.dt]))
self.last_th = None
return self._get_obs()
def _get_obs(self):
theta_sheep, theta_dog, r_sheep = self.state
return np.array([theta_sheep, theta_dog, r_sheep])
def render(self, mode="human"):
if self.viewer is None:
from gym.envs.classic_control import rendering
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(-50, 50, -50, 50)
# 外场的圆
line = rendering.make_circle(self.R, filled=0)
self.line_transfrom = rendering.Transform()
line.add_attr(self.line_transfrom)
self.viewer.add_geom(line)
# sheep
yang = rendering.make_circle(1)
yang.set_color(1, 240 / 255, 245 / 255)
self.sheep_transfrom = rendering.Transform()
yang.add_attr(self.sheep_transfrom)
self.viewer.add_geom(yang)
# dog
gou = rendering.make_circle(1)
gou.set_color(1, 106 / 255, 106 / 255)
self.dog_transfrom = rendering.Transform()
gou.add_attr(self.dog_transfrom)
self.viewer.add_geom(gou)
# self.dog_transfrom.set_rotation(self.state[1])
self.dog_transfrom.set_translation(self.R*cos(self.state[1]/180*pi),self.R*sin(self.state[1]/180*pi))
self.sheep_transfrom.set_translation(self.state[2]*cos(self.state[0]/180*pi),self.state[2]*sin(self.state[0]/180*pi))
return self.viewer.render(return_rgb_array=mode == "rgb_array")
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
以下是test3_main.py
import gym
import time
import random
# env = gym.make('test3-v0')
# # env = gym.make('Pendulum-v0')
# env. reset ()
# env. render ()
# time.sleep(10) # 停顿10s
# env.close() # 关闭图形化界面
class TEST3:
def __init__(self, env):
pass
def play_ones(self, env, render=False ):
episode_reward = 0 # 记录回合总奖励,初始值为0
observation = env.reset() # 重置游戏环境,开始新回合
while True: # 不断循环,直到回合结束
if render: # 判断是否显示
env.render() # 显示图形界面,可以用env.close()关闭
next_observation, reward, done, _ = env.step(observation) # 执行动作
episode_reward += reward # 搜集回合奖励
if done: # 判断是否训练智能体
break
observation = next_observation
return episode_reward # 返回回合总奖励
if __name__ == '__main__':
env = gym.make('test3-v0')
env.seed(0) # 设置随机数种子,只是为了让结果可以精确复现,一般情况下可以删除
agent = TEST3(env)
for i in range(100):
episode_reward = agent.play_ones(env, render=True)
print('第',i,'回合奖励={}'.format(episode_reward))
time.sleep(10) # 停顿10s
env.close() # 关闭图形化界面