Pytorch版本实现的原始DQN

最新推荐文章于 2024-08-06 16:55:08 发布

菜之川

最新推荐文章于 2024-08-06 16:55:08 发布

阅读量441

点赞数 7

文章标签： pytorch 人工智能 python

本文链接：https://blog.csdn.net/weixin_63198616/article/details/134627103

版权

import os
import random
import sys
import time

import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

from game.flappy_bird import GameState

class NeuralNetwork(nn.Module):

def __init__(self):
super(NeuralNetwork, self).__init__()

self.number_of_actions = 2
self.gamma = 0.99
self.final_epsilon = 0.0001
self.initial_epsilon = 0.1
self.number_of_iterations = 2000000
self.replay_memory_size = 10000
self.minibatch_size = 32

self.conv1 = nn.Conv2d(4, 32, 8, 4)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(32, 64, 4, 2)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = nn.Conv2d(64, 64, 3, 1)
self.relu3 = nn.ReLU(inplace=True)
self.fc4 = nn.Linear(3136, 512)
self.relu4 = nn.ReLU(inplace=True)
self.fc5 = nn.Linear(512, self.number_of_actions)

def forward(self, x):
out = self.conv1(x)
out = self.relu1(out)
out = self.conv2(out)
out = self.relu2(out)
out = self.conv3(out)
out = self.relu3(out)
out = out.view(out.size()[0], -1)
out = self.fc4(out)
out = self.relu4(out)
out = self.fc5(out)

return out

def init_weights(m):
if type(m) == nn.Conv2d or type(m) == nn.Linear:
torch.nn.init.uniform(m.weight, -0.01, 0.01)
m.bias.data.fill_(0.01)

def image_to_tensor(image):
image_tensor = image.transpose(2, 0, 1)
image_tensor = image_tensor.astype(np.float32)
image_tensor = torch.from_numpy(image_tensor)
if torch.cuda.is_available(): # put on GPU if CUDA is available
image_tensor = image_tensor.cuda()
return image_tensor

def resize_and_bgr2gray(image):
image = image[0:288, 0:404]
image_data = cv2.cvtColor(cv2.resize(image, (84, 84)), cv2.COLOR_BGR2GRAY)
image_data[image_data > 0] = 255
image_data = np.reshape(image_data, (84, 84, 1))
return image_data

def train(model, start):
# define Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-6)

# initialize mean squared error loss
criterion = nn.MSELoss()

# instantiate game
game_state = GameState()

# initialize replay memory
replay_memory = []

# initial action is do nothing
action = torch.zeros([model.number_of_actions], dtype=torch.float32)
action[0] = 1
image_data, reward, terminal = game_state.frame_step(action)
image_data = resize_and_bgr2gray(image_data)
image_data = image_to_tensor(image_data)
state = torch.cat((image_data, image_data, image_data, image_data)).unsqueeze(0)

# initialize epsilon value
epsilon = model.initial_epsilon
iteration = 0

epsilon_decrements = np.linspace(model.initial_epsilon, model.final_epsilon, model.number_of_iterations)

# 初始化用于绘制奖励变化的列表
rewards = []

# 初始化用于绘制 x 轴的回合数列表
episodes = []

# 设置图表大小
plt.figure(figsize=(10, 6))
# 创建初始图表
plt.ion() # 开启交互模式，使得每次更新图表后都能立即显示
fig, ax = plt.subplots()
line, = ax.plot(episodes, rewards)
plt.title("DQN Reward")
plt.xlabel("Episode")
plt.ylabel("Reward")
# main infinite loop
while iteration < model.number_of_iterations:
# get output from the neural network
output = model(state)[0]

# initialize action
action = torch.zeros([model.number_of_actions], dtype=torch.float32)
if torch.cuda.is_available(): # put on GPU if CUDA is available
action = action.cuda()

# epsilon greedy exploration
random_action = random.random() <= epsilon
if random_action:
print("Performed random action!")
action_index = [torch.randint(model.number_of_actions, torch.Size([]), dtype=torch.int)
if random_action
else torch.argmax(output)][0]

if torch.cuda.is_available(): # put on GPU if CUDA is available
action_index = action_index.cuda()

action[action_index] = 1

# get next state and reward
image_data_1, reward, terminal = game_state.frame_step(action)
image_data_1 = resize_and_bgr2gray(image_data_1)
image_data_1 = image_to_tensor(image_data_1)
state_1 = torch.cat((state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0)

action = action.unsqueeze(0)
reward = torch.from_numpy(np.array([reward], dtype=np.float32)).unsqueeze(0)

# save transition to replay memory
replay_memory.append((state, action, reward, state_1, terminal))

# if replay memory is full, remove the oldest transition
if len(replay_memory) > model.replay_memory_size:
replay_memory.pop(0)

# epsilon annealing
epsilon = epsilon_decrements[iteration]

# sample random minibatch
minibatch = random.sample(replay_memory, min(len(replay_memory), model.minibatch_size))

# unpack minibatch
state_batch = torch.cat(tuple(d[0] for d in minibatch))
action_batch = torch.cat(tuple(d[1] for d in minibatch))
reward_batch = torch.cat(tuple(d[2] for d in minibatch))
state_1_batch = torch.cat(tuple(d[3] for d in minibatch))

if torch.cuda.is_available(): # put on GPU if CUDA is available
state_batch = state_batch.cuda()
action_batch = action_batch.cuda()
reward_batch = reward_batch.cuda()
state_1_batch = state_1_batch.cuda()

# get output for the next state
output_1_batch = model(state_1_batch)

# set y_j to r_j for terminal state, otherwise to r_j + gamma*max(Q)
y_batch = torch.cat(tuple(reward_batch[i] if minibatch[i][4]
else reward_batch[i] + model.gamma * torch.max(output_1_batch[i])
for i in range(len(minibatch))))

# extract Q-value
q_value = torch.sum(model(state_batch) * action_batch, dim=1)

# PyTorch accumulates gradients by default, so they need to be reset in each pass
optimizer.zero_grad()

# returns a new Tensor, detached from the current graph, the result will never require gradient
y_batch = y_batch.detach()

# calculate loss
loss = criterion(q_value, y_batch)

# do backward pass
loss.backward()
optimizer.step()

# set state to be state_1
state = state_1
iteration += 1

# update the plot
rewards.append(reward.numpy()[0][0])
episodes.append(iteration)
line.set_xdata(episodes)
line.set_ydata(rewards)
ax.relim() # recompute the data limits
ax.autoscale_view(True, True, True) # update the axes
fig.canvas.draw()
# 设置更新频率
plt.pause(0.001)

# save the model every 25000 iterations
if iteration % 25000 == 0:
torch.save(model, "pretrained_model/current_model_" + str(iteration) + ".pth")

# print the current status
print("iteration:", iteration, "elapsed time:", time.time() - start, "epsilon:", epsilon, "action:",
action_index.cpu().detach().numpy(), "reward:", reward.numpy()[0][0], "Q max:",
np.max(output.cpu().detach().numpy()))

# turn off interactive mode and show the final plot
plt.ioff()
plt.show(block=True) # 使用 block=True 阻塞 show() 函数

def test(model):
game_state = GameState()

while True:
# get output from the neural network
output = model(state)[0]

action = torch.zeros([model.number_of_actions], dtype=torch.float32)
if torch.cuda.is_available(): # put on GPU if CUDA is available
action = action.cuda()

# get action
action_index = torch.argmax(output)
if torch.cuda.is_available(): # put on GPU if CUDA is available
action_index = action_index.cuda()
action[action_index] = 1

# get next state
image_data_1, reward, terminal = game_state.frame_step(action)
image_data_1 = resize_and_bgr2gray(image_data_1)
image_data_1 = image_to_tensor(image_data_1)
state_1 = torch.cat((state.squeeze(0)[1:, :, :], image_data_1)).unsqueeze(0)

# set state to be state_1
state = state_1

def main(mode):
cuda_is_available = torch.cuda.is_available()

if mode == 'test':
model = torch.load(
'pretrained_model/current_model_2000000.pth',
map_location='cpu' if not cuda_is_available else None
).eval()

if cuda_is_available: # put on GPU if CUDA is available
model = model.cuda()

test(model)

elif mode == 'train':
if not os.path.exists('pretrained_model/'):
os.mkdir('pretrained_model/')

model = NeuralNetwork()

if cuda_is_available: # put on GPU if CUDA is available
model = model.cuda()

model.apply(init_weights)
start = time.time()

train(model, start)

if __name__ == "__main__":
main(sys.argv[1])

菜之川

关注

7
点赞
踩
8

收藏

觉得还不错? 一键收藏
1
评论
Pytorch版本实现的原始DQN

plt.show(block=True) # 使用 block=True 阻塞 show() 函数。plt.ion() # 开启交互模式，使得每次更新图表后都能立即显示。# 初始化用于绘制 x 轴的回合数列表。# 初始化用于绘制奖励变化的列表。
复制链接

扫一扫