import os
import time
import numpy as np
import gym
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input
from keras.layers.merge import Add, Concatenate
from keras.optimizers import Adam
import keras.backend as K
import random
from collections import deque
def stack_samples(samples):
array = np.array(samples)
s_ts = np.stack(array[:,0]).reshape((array.shape[0],-1))
actions = np.stack(array[:,1]).reshape((array.shape[0],-1))
rewards = np.stack(array[:,2]).reshape((array.shape[0],-1))
s_ts1 = np.stack(array[:,3]).reshape((array.shape[0],-1))
dones = np.stack(array[:,4]).reshape((array.shape[0],-1))
return s_ts, actions, rewards, s_ts1, dones
class Agent(object):
def __init__(self,sess):
self.sess = sess
self.epsilon = 0.9
self.gamma = 0.99
self.epsilon_decay = 0.99995
self.tau = 0.01
self.memory = deque(maxlen=4000)
self.actor_state_input, self.actor_model = self.create_actor_model()
_, self.target_actor_model = self.create_actor_model()
self.actor_critic_grad = tf.placeholder(tf.float32,[None,1])
actor_model_w
keras-ddpg
最新推荐文章于 2024-04-11 13:44:31 发布
本文详细介绍了如何利用Keras库来实现Deep Deterministic Policy Gradient (DDPG)算法,这是一种用于连续动作空间强化学习的方法。通过实例代码,展示了从环境设置到模型构建、训练过程,帮助读者理解DDPG在实际问题中的应用。
摘要由CSDN通过智能技术生成