(Deep Reinforcement Learning with Double Q-learning, H. van Hasselt et al., arXiv, 2015)(dqn)练习

代码使用python 2.x版本 ,tensorflow 使用1.1(cpu)版本

论文地址:https://arxiv.org/abs/1509.06461

===============第一个文件 replay_memory.py================================

import numpy as np


MEMORYSIZE = 600000

class Replay_memory:
    def __init__(self):

        self.previous_state = np.empty((MEMORYSIZE, 4), dtype=np.float32)
        self.action = np.empty((MEMORYSIZE, 1), dtype=np.uint8)#0 is the 1st action,1 is the 2nd action
        self.reward = np.empty((MEMORYSIZE, 1), dtype=np.float32)
        self.next_state = np.empty((MEMORYSIZE, 4), dtype=np.float32)
        self.terminal = np.empty((MEMORYSIZE, 1), dtype=np.bool)

        self.index = 0
        self.full_memory = False

    def memory_in(self, previous_state, action, reward, next_state, terminal):
        self.previous_state[self.index] = previous_state
        self.action[self.index] = action
        self.reward[self.index] = reward
        self.next_state[self.index] = next_state

        self.terminal[self.index] = terminal

        self.index += 1
        if self.index == MEMORYSIZE:
            self.index = 0
            self.full_memory = True


    def memory_out(self,size_minibatch):
        minib_previous_state = []
        minib_action = []
        minib_reward = []
        minib_next_state = []
        minib_terminal = []

        if self.full_memory:
            index_sample = np.random.randint(0,MEMORYSIZE,size=size_minibatch).tolist()
        else:
            index_sample = np.random.randint(0, self.index, size=size_minibatch).tolist()

        for i in index_sample:
            minib_previous_state.append(self.previous_state[i])
            minib_action.append(self.action[i])
            minib_reward.append(self.reward[i])
            minib_next_state.append(self.next_state[i])
            minib_terminal.append(self.terminal[i])

        rs_minib_previous_state=np.asarray(minib_previous_state)
        rs_minib_action=np.asarray(minib_action)
        rs_minib_reward=np.asarray(minib_reward)
        rs_minib_next_state=np.asarray(minib_next_state)
        rs_minib_terminal=np.asarray(minib_terminal)
        #return 5 np_mats with shape(size_minibatch,num_fea)
        return rs_minib_previous_state, rs_minib_action, rs_minib_reward, rs_minib_next_state, rs_minib_terminal

    def test_mempry_in(self):
        for i in range(100):
            self.memory_in([1., 1., 1., 1.], [0], [0.1], [1., 1., 1., 1.], [False])
            #self.memory_in([1., 1., 1., 1.], [1], [0.1], [1., 1., 1., 1.], [False])
            #self.memory_in([1., 1, 1., 1.], [0], [-1], [1., 1., 1., 1.], [True])



#test#test#test#test#test#test#test#test#test#test#test#test
'''
if __name__ == "__main__":
    rm = Replay_memory()
    for i in range(10):
        rm.memory_in((1., 2., 3., 4.), [1], [0.1], [1., 2., 3., 4.], True)
        rm.memory_in((2, 2, 3, 4), [0], [0.1], [2, 2, 3, 4], False)
        rm.memory_in((3, 2, 3, 4), [1], [0.1], [3, 2, 3, 4], False)
    s,a,r,ss,t = rm.memory_out(32)
    print ss

'''

if __name__ == "__main__":
    rm = Replay_memory()
    rm.test_mempry_in()
    s,a,r,ss,t = rm.memory_out(32)
    print ss

Replay_memory主要有两个方法:memory_in和memory_out,分类用来往Replay_memory放置经验和从Replay_memory中提取经验。为了效率,使用numpy数组实现,而没有使用deque。方法test_mempry_in是测试时使用的,使用该方法后,将加载一些经验进入Replay_memory。

===========第二个文件 nn.py============================

import tensorflow as tf
import math

class Fcnn:
    def __init__(self):
        self.batch_size = 32
        self.h1_size = 20

        self
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值