gym atari 游戏安装和使用

原文链接: gym atari 游戏安装和使用

上一篇: 使用 tkinter 监听键盘和鼠标事件

下一篇: opencv HITMISS 基本操作

github

https://github.com/openai/gym

安装

pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py

测试是否成功

import gym
import time

'''
pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
'''
# env = gym.make('Pong-v0')
env = gym.make('SpaceInvaders-ram-v4')
# env = gym.make('Breakout-v4')

for i_episode in range(1):
    env.reset()
    while True:
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished")
            break
        time.sleep(.1)
# 避免结束报错
env.close()

打砖块游戏

返回的observation 是一个 h,w,c 的uint8 数组 表示一幅游戏画面

0 等待

1 发射 小球 (如果没有的话,有的话什么都不做)

2 右

3 左

import gym
import time
env = gym.make('Breakout-v4')
# Discrete(4) set{0, 1, 2, 3}
print(env.action_space)
# Box(210, 160, 3)
print(env.observation_space)

for i_episode in range(1):
    env.reset()
    while True:
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished")
            break
        time.sleep(.05)
# 避免结束报错
env.close()

3b632556260412cebb53922f5dc6b03bdd7.jpg

随机动作采样,符合均匀分布

import gym
from collections import Counter
env = gym.make('Breakout-v4')

a = []
for i_episode in range(1000):
    env.reset()
    action = env.action_space.sample()
    observation, reward, done, info = env.step(action)
    a.append(action)
    if done:
        env.reset()

print(Counter(a))

Counter({0: 255, 1: 253, 3: 251, 2: 241})

手动玩, 标准动作

ACTION_MEANING = {
    0 : "NOOP",
    1 : "FIRE",
    2 : "UP",
    3 : "RIGHT",
    4 : "LEFT",
    5 : "DOWN",
    6 : "UPRIGHT",
    7 : "UPLEFT",
    8 : "DOWNRIGHT",
    9 : "DOWNLEFT",
    10 : "UPFIRE",
    11 : "RIGHTFIRE",
    12 : "LEFTFIRE",
    13 : "DOWNFIRE",
    14 : "UPRIGHTFIRE",
    15 : "UPLEFTFIRE",
    16 : "DOWNRIGHTFIRE",
    17 : "DOWNLEFTFIRE",
}


通过线程 实现控制,但是不是很流畅

import gym
import time
from threading import Thread


class Game(Thread):
    def __init__(self):
        super(Game, self).__init__()
        self.env = gym.make('Breakout-v4')
        self.env.reset()
        self.action = 0

    def run(self):
        while True:
            self.env.render()
            observation, reward, done, info = self.env.step(self.action)
            if done:
                print("Episode finished")
                break
            time.sleep(.2)


g = Game()
g.start()
print('start')

while True:
    s = input('action:\n')
    if s != '':
        print(s, int(s))
        g.action = int(s)

使用多线程

一个线程渲染游戏画面,主线程获取键盘事件并控制游戏中的动作

注意执行动作返回的reward 是该次动作的奖励,只有该动作打到砖块后才会返回正值,全局奖励需要进行求和

import gym
import time
from threading import Thread
from tkinter import *


class Game(Thread):
    def __init__(self):
        super(Game, self).__init__()
        self.env = gym.make('Breakout-v4')
        self.env.reset()
        self.action = 0
        self.total_reward = 0

    def run(self):
        while True:
            self.env.render()
            observation, reward, done, info = self.env.step(self.action)

            self.total_reward += reward
            if done:
                print("Episode finished")
                print(self.total_reward)
                break
            time.sleep(.05)


g = Game()
g.start()
print('start')

root = Tk()


def key(event):
    # print(event.char)
    key_map = {
        'w': 1,  # 发射
        's': 0,  # 停止
        'a': 3,  # 左
        'd': 2  # 右
    }
    if event.char != '':
        g.action = key_map[event.char]

    print(f'reword:{g.total_reward}')


frame = Frame(root, width=100, height=100)
frame.focus_set()
frame.bind("<Key>", key)
frame.pack()
root.mainloop()

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值