gym atari 游戏安装和使用

最新推荐文章于 2024-03-28 10:18:58 发布

阿豪boy

最新推荐文章于 2024-03-28 10:18:58 发布

阅读量561

点赞数

文章标签：游戏强化学习 bluetooth interop timestamp

本文链接：https://blog.csdn.net/qq_35516360/article/details/122065848

版权

原文链接: gym atari 游戏安装和使用

上一篇: 使用 tkinter 监听键盘和鼠标事件

下一篇: opencv HITMISS 基本操作

github

https://github.com/openai/gym

安装

pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py

测试是否成功

import gym
import time

'''
pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
'''
# env = gym.make('Pong-v0')
env = gym.make('SpaceInvaders-ram-v4')
# env = gym.make('Breakout-v4')

for i_episode in range(1):
    env.reset()
    while True:
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished")
            break
        time.sleep(.1)
# 避免结束报错
env.close()

打砖块游戏

返回的observation 是一个 h,w,c 的uint8 数组表示一幅游戏画面

0 等待

1 发射小球（如果没有的话，有的话什么都不做）

2 右

3 左

import gym
import time
env = gym.make('Breakout-v4')
# Discrete(4) set{0, 1, 2, 3}
print(env.action_space)
# Box(210, 160, 3)
print(env.observation_space)

for i_episode in range(1):
    env.reset()
    while True:
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished")
            break
        time.sleep(.05)
# 避免结束报错
env.close()

、

随机动作采样，符合均匀分布

import gym
from collections import Counter
env = gym.make('Breakout-v4')

a = []
for i_episode in range(1000):
    env.reset()
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    a.append(action)
    if done:
        env.reset()

print(Counter(a))

Counter({0: 255, 1: 253, 3: 251, 2: 241})

手动玩, 标准动作

ACTION_MEANING = {
    0 : "NOOP",
    1 : "FIRE",
    2 : "UP",
    3 : "RIGHT",
    4 : "LEFT",
    5 : "DOWN",
    6 : "UPRIGHT",
    7 : "UPLEFT",
    8 : "DOWNRIGHT",
    9 : "DOWNLEFT",
    10 : "UPFIRE",
    11 : "RIGHTFIRE",
    12 : "LEFTFIRE",
    13 : "DOWNFIRE",
    14 : "UPRIGHTFIRE",
    15 : "UPLEFTFIRE",
    16 : "DOWNRIGHTFIRE",
    17 : "DOWNLEFTFIRE",
}

通过线程实现控制，但是不是很流畅

import gym
import time
from threading import Thread


class Game(Thread):
    def __init__(self):
        super(Game, self).__init__()
        self.env = gym.make('Breakout-v4')
        self.env.reset()
        self.action = 0

    def run(self):
        while True:
            self.env.render()
            observation, reward, done, info = self.env.step(self.action)
            if done:
                print("Episode finished")
                break
            time.sleep(.2)


g = Game()
g.start()
print('start')

while True:
    s = input('action:\n')
    if s != '':
        print(s, int(s))
        g.action = int(s)

使用多线程

一个线程渲染游戏画面，主线程获取键盘事件并控制游戏中的动作

注意执行动作返回的reward 是该次动作的奖励，只有该动作打到砖块后才会返回正值，全局奖励需要进行求和

import gym
import time
from threading import Thread
from tkinter import *


class Game(Thread):
    def __init__(self):
        super(Game, self).__init__()
        self.env = gym.make('Breakout-v4')
        self.env.reset()
        self.action = 0
        self.total_reward = 0

    def run(self):
        while True:
            self.env.render()
            observation, reward, done, info = self.env.step(self.action)

            self.total_reward += reward
            if done:
                print("Episode finished")
                print(self.total_reward)
                break
            time.sleep(.05)


g = Game()
g.start()
print('start')

root = Tk()


def key(event):
    # print(event.char)
    key_map = {
        'w': 1,  # 发射
        's': 0,  # 停止
        'a': 3,  # 左
        'd': 2  # 右
    }
    if event.char != '':
        g.action = key_map[event.char]

    print(f'reword:{g.total_reward}')


frame = Frame(root, width=100, height=100)
frame.focus_set()
frame.bind("<Key>", key)
frame.pack()
root.mainloop()