【专用链接】文件补档

附加补全:基于一个微分对策问题的机器学习.pdf-机器学习文档类资源-CSDN下载

test12就是第一问和第二问文件

 

 

 以下是test3_904_19.py

import gym
from gym import spaces
from gym.utils import seeding
import numpy as np
from math import cos,sin,pi,sqrt,asin
import random

from os import path




class TEST3(gym.Env):
    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 30}


    def __init__(self):
        self.v_sheep = 3
        self.v_dog = 15
        self.dt = 0.1
        self.r_fix_sheep = self.v_sheep * self.dt
        self.r_dog = self.v_dog * self.dt
        self.R = 5
        self.include_angle_dog = pi*self.R/(self.r_dog*180)
        self.dis_now_dogsheep = self.R
        self.reward = 0

        self.viewer = None

        # theta action
        self.action_space = spaces.Box(
            low=0, high=180, shape=(1,), dtype = np.float32
        )
        # sheep&dog theta & r_sheep
        self.observation_space = spaces.Box(low=np.array([-179, 180, self.r_fix_sheep]), high=np.array([-179, 180, self.R]),
                                            dtype=np.float32)

        self.seed()

    def sheep_nextstep(self,theta_sheep, th, r0):
        if th < 90:
            alpha = th + 90
            r1 = sqrt(r0 ** 2 + self.r_fix_sheep ** 2 - 2 * r0 *self.r_fix_sheep* cos(alpha / 180 * pi))
            beta = asin(sin(alpha / 180 * pi)) * self.r_fix_sheep / r1  # 夹角
            angle = theta_sheep - beta

        if th > 90:
            alpha = 270 - th
            r1 = sqrt(r0 ** 2 + self.r_fix_sheep ** 2 - 2 * r0 *self.r_fix_sheep* cos(alpha / 180 * pi))
            beta = asin(sin(alpha / 180 * pi)) * self.r_fix_sheep / r1  # 夹角
            angle = theta_sheep + beta

        if th == 90:
            angle = theta_sheep
            r1 = self.r_fix_sheep + r0

        return [angle, r1]

    def dog_nextstep(self,theta_sheep, theta_dog):
        dis_sheep_dog_r = theta_dog + self.include_angle_dog
        dis_sheep_dog_l = theta_dog - self.include_angle_dog
        if abs(dis_sheep_dog_r - theta_sheep) < 180:
            angle_dog = dis_sheep_dog_r
        else:
            angle_dog = dis_sheep_dog_l
        return angle_dog

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]


    def step(self, state):
        self.state = state
        theta_sheep, theta_dog, r_sheep_old = self.state  # th := theta

        v_sheep = self.v_sheep
        v_dog = self.v_dog
        dt = self.dt
        # r_sheep_old = self.r_sheep
        r_dog = self.r_dog
        R = self.R

        done = 0

        th = random.randint(0, 180)


        self.last_th = th
        next_sheep = self.sheep_nextstep(theta_sheep, th, r_sheep_old)
        theta_sheep += next_sheep[0]
        if theta_sheep > 180:
            theta_sheep = theta_sheep - 360
        elif theta_sheep <= -180:
            theta_sheep = theta_sheep + 360
        r_sheep = next_sheep[1]
        theta_dog += self.dog_nextstep(theta_sheep, theta_dog)
        if theta_dog > 180:
            theta_dog = theta_dog - 360
        elif theta_dog <= -180:
            theta_dog = theta_dog + 360

        self.state = np.array([theta_sheep, theta_dog, r_sheep])

        include_angle = abs(theta_sheep - theta_dog)

        dis_sheep_dog = sqrt(R ** 2 + r_sheep ** 2 - 2 * R * r_sheep * cos(include_angle))
        # 如果当前距离比上次近,则扣分,反之加分
        if self.dis_now_dogsheep > dis_sheep_dog:
            self.reward -= 1
        elif self.dis_now_dogsheep < dis_sheep_dog:
            self.reward += 1
        self.dis_now_dogsheep = dis_sheep_dog
        # 先判断有没有跑出去,再判断被狗抓
        if R <= r_sheep:
            done = 1
        elif dis_sheep_dog <= r_dog:
            done = 1

        return self._get_obs(), self.reward, done, {}


    def reset(self):
        # theta_sheep,theta_dog,r_sheep
        self.state = self.np_random.uniform(low=np.array([-179, -179, 0]),
                                            high=np.array([180, 180, self.v_sheep * self.dt]))
        self.last_th = None
        return self._get_obs()


    def _get_obs(self):
        theta_sheep, theta_dog, r_sheep = self.state
        return np.array([theta_sheep, theta_dog, r_sheep])


    def render(self, mode="human"):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(500, 500)
            self.viewer.set_bounds(-50, 50, -50, 50)
            # 外场的圆
            line = rendering.make_circle(self.R, filled=0)
            self.line_transfrom = rendering.Transform()
            line.add_attr(self.line_transfrom)
            self.viewer.add_geom(line)
            # sheep
            yang = rendering.make_circle(1)
            yang.set_color(1, 240 / 255, 245 / 255)
            self.sheep_transfrom = rendering.Transform()
            yang.add_attr(self.sheep_transfrom)
            self.viewer.add_geom(yang)
            # dog
            gou = rendering.make_circle(1)
            gou.set_color(1, 106 / 255, 106 / 255)
            self.dog_transfrom = rendering.Transform()
            gou.add_attr(self.dog_transfrom)
            self.viewer.add_geom(gou)



        # self.dog_transfrom.set_rotation(self.state[1])
        self.dog_transfrom.set_translation(self.R*cos(self.state[1]/180*pi),self.R*sin(self.state[1]/180*pi))
        self.sheep_transfrom.set_translation(self.state[2]*cos(self.state[0]/180*pi),self.state[2]*sin(self.state[0]/180*pi))

        return self.viewer.render(return_rgb_array=mode == "rgb_array")


    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None


以下是test3_main.py

import gym
import time
import random
# env = gym.make('test3-v0')
# # env = gym.make('Pendulum-v0')
# env. reset ()
# env. render ()
# time.sleep(10)  # 停顿10s
# env.close()  # 关闭图形化界面


class TEST3:
    def __init__(self, env):
        pass


    def play_ones(self, env, render=False ):
        episode_reward = 0  # 记录回合总奖励,初始值为0
        observation = env.reset()  # 重置游戏环境,开始新回合
        while True:  # 不断循环,直到回合结束
            if render:  # 判断是否显示
                env.render()  # 显示图形界面,可以用env.close()关闭

            next_observation, reward, done, _ = env.step(observation)  # 执行动作
            episode_reward += reward  # 搜集回合奖励
            if done:  # 判断是否训练智能体
                break
            observation = next_observation
        return episode_reward  # 返回回合总奖励

if __name__ == '__main__':
    env = gym.make('test3-v0')
    env.seed(0)  # 设置随机数种子,只是为了让结果可以精确复现,一般情况下可以删除

    agent = TEST3(env)
    for i in range(100):
        episode_reward = agent.play_ones(env, render=True)
        print('第',i,'回合奖励={}'.format(episode_reward))

    time.sleep(10)  # 停顿10s
    env.close()  # 关闭图形化界面

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值