原文链接: gym atari 游戏安装和使用
上一篇: 使用 tkinter 监听键盘和鼠标事件
下一篇: opencv HITMISS 基本操作
github
安装
pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
测试是否成功
import gym
import time
'''
pip install gym
pip install --no-index -f https://github.com/Kojoley/atari-py/releases atari_py
'''
# env = gym.make('Pong-v0')
env = gym.make('SpaceInvaders-ram-v4')
# env = gym.make('Breakout-v4')
for i_episode in range(1):
env.reset()
while True:
env.render()
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
print("Episode finished")
break
time.sleep(.1)
# 避免结束报错
env.close()
打砖块游戏
返回的observation 是一个 h,w,c 的uint8 数组 表示一幅游戏画面
0 等待
1 发射 小球 (如果没有的话,有的话什么都不做)
2 右
3 左
import gym
import time
env = gym.make('Breakout-v4')
# Discrete(4) set{0, 1, 2, 3}
print(env.action_space)
# Box(210, 160, 3)
print(env.observation_space)
for i_episode in range(1):
env.reset()
while True:
env.render()
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
if done:
print("Episode finished")
break
time.sleep(.05)
# 避免结束报错
env.close()
、
随机动作采样,符合均匀分布
import gym
from collections import Counter
env = gym.make('Breakout-v4')
a = []
for i_episode in range(1000):
env.reset()
action = env.action_space.sample()
observation, reward, done, info = env.step(action)
a.append(action)
if done:
env.reset()
print(Counter(a))
Counter({0: 255, 1: 253, 3: 251, 2: 241})
手动玩, 标准动作
ACTION_MEANING = {
0 : "NOOP",
1 : "FIRE",
2 : "UP",
3 : "RIGHT",
4 : "LEFT",
5 : "DOWN",
6 : "UPRIGHT",
7 : "UPLEFT",
8 : "DOWNRIGHT",
9 : "DOWNLEFT",
10 : "UPFIRE",
11 : "RIGHTFIRE",
12 : "LEFTFIRE",
13 : "DOWNFIRE",
14 : "UPRIGHTFIRE",
15 : "UPLEFTFIRE",
16 : "DOWNRIGHTFIRE",
17 : "DOWNLEFTFIRE",
}
通过线程 实现控制,但是不是很流畅
import gym
import time
from threading import Thread
class Game(Thread):
def __init__(self):
super(Game, self).__init__()
self.env = gym.make('Breakout-v4')
self.env.reset()
self.action = 0
def run(self):
while True:
self.env.render()
observation, reward, done, info = self.env.step(self.action)
if done:
print("Episode finished")
break
time.sleep(.2)
g = Game()
g.start()
print('start')
while True:
s = input('action:\n')
if s != '':
print(s, int(s))
g.action = int(s)
使用多线程
一个线程渲染游戏画面,主线程获取键盘事件并控制游戏中的动作
注意执行动作返回的reward 是该次动作的奖励,只有该动作打到砖块后才会返回正值,全局奖励需要进行求和
import gym
import time
from threading import Thread
from tkinter import *
class Game(Thread):
def __init__(self):
super(Game, self).__init__()
self.env = gym.make('Breakout-v4')
self.env.reset()
self.action = 0
self.total_reward = 0
def run(self):
while True:
self.env.render()
observation, reward, done, info = self.env.step(self.action)
self.total_reward += reward
if done:
print("Episode finished")
print(self.total_reward)
break
time.sleep(.05)
g = Game()
g.start()
print('start')
root = Tk()
def key(event):
# print(event.char)
key_map = {
'w': 1, # 发射
's': 0, # 停止
'a': 3, # 左
'd': 2 # 右
}
if event.char != '':
g.action = key_map[event.char]
print(f'reword:{g.total_reward}')
frame = Frame(root, width=100, height=100)
frame.focus_set()
frame.bind("<Key>", key)
frame.pack()
root.mainloop()