代码使用python 2.x版本 ,tensorflow 使用1.1(cpu)版本
论文地址:https://arxiv.org/abs/1509.06461
===============第一个文件 replay_memory.py================================
import numpy as np
MEMORYSIZE = 600000
class Replay_memory:
def __init__(self):
self.previous_state = np.empty((MEMORYSIZE, 4), dtype=np.float32)
self.action = np.empty((MEMORYSIZE, 1), dtype=np.uint8)#0 is the 1st action,1 is the 2nd action
self.reward = np.empty((MEMORYSIZE, 1), dtype=np.float32)
self.next_state = np.empty((MEMORYSIZE, 4), dtype=np.float32)
self.terminal = np.empty((MEMORYSIZE, 1), dtype=np.bool)
self.index = 0
self.full_memory = False
def memory_in(self, previous_state, action, reward, next_state, terminal):
self.previous_state[self.index] = previous_state
self.action[self.index] = action
self.reward[self.index] = reward
self.next_state[self.index] = next_state
self.terminal[self.index] = terminal
self.index += 1
if self.index == MEMORYSIZE:
self.index = 0
self.full_memory = True
def memory_out(self,size_minibatch):
minib_previous_state = []
minib_action = []
minib_reward = []
minib_next_state = []
minib_terminal = []
if self.full_memory:
index_sample = np.random.randint(0,MEMORYSIZE,size=size_minibatch).tolist()
else:
index_sample = np.random.randint(0, self.index, size=size_minibatch).tolist()
for i in index_sample:
minib_previous_state.append(self.previous_state[i])
minib_action.append(self.action[i])
minib_reward.append(self.reward[i])
minib_next_state.append(self.next_state[i])
minib_terminal.append(self.terminal[i])
rs_minib_previous_state=np.asarray(minib_previous_state)
rs_minib_action=np.asarray(minib_action)
rs_minib_reward=np.asarray(minib_reward)
rs_minib_next_state=np.asarray(minib_next_state)
rs_minib_terminal=np.asarray(minib_terminal)
#return 5 np_mats with shape(size_minibatch,num_fea)
return rs_minib_previous_state, rs_minib_action, rs_minib_reward, rs_minib_next_state, rs_minib_terminal
def test_mempry_in(self):
for i in range(100):
self.memory_in([1., 1., 1., 1.], [0], [0.1], [1., 1., 1., 1.], [False])
#self.memory_in([1., 1., 1., 1.], [1], [0.1], [1., 1., 1., 1.], [False])
#self.memory_in([1., 1, 1., 1.], [0], [-1], [1., 1., 1., 1.], [True])
#test#test#test#test#test#test#test#test#test#test#test#test
'''
if __name__ == "__main__":
rm = Replay_memory()
for i in range(10):
rm.memory_in((1., 2., 3., 4.), [1], [0.1], [1., 2., 3., 4.], True)
rm.memory_in((2, 2, 3, 4), [0], [0.1], [2, 2, 3, 4], False)
rm.memory_in((3, 2, 3, 4), [1], [0.1], [3, 2, 3, 4], False)
s,a,r,ss,t = rm.memory_out(32)
print ss
'''
if __name__ == "__main__":
rm = Replay_memory()
rm.test_mempry_in()
s,a,r,ss,t = rm.memory_out(32)
print ss
Replay_memory主要有两个方法:memory_in和memory_out,分类用来往Replay_memory放置经验和从Replay_memory中提取经验。为了效率,使用numpy数组实现,而没有使用deque。方法test_mempry_in是测试时使用的,使用该方法后,将加载一些经验进入Replay_memory。
===========第二个文件 nn.py============================
import tensorflow as tf
import math
class Fcnn:
def __init__(self):
self.batch_size = 32
self.h1_size = 20
self