import gym
import tensorflow as tf
import numpy as np
import random
from collections import deque
# Hyper Parameters
GAMMA = 0.95 # discount factor
LEARNING_RATE=0.01
class Actor():# PI
def __init__(self, env, sess):
# init some parameters
self.time_step = 0
self.state_dim = env.observation_space.shape[0]
self.action_dim = env.action_space.n
# 策略
self.create_softmax_network()
# Init session
self.session = sess
self.session.run(tf.global_variables_initializer())
def create_softmax_network(self):
# network weights
W1 = self.weight_variable([self.state_dim, 20])
b1 = self.bias_variable([20])
W2 = self.weight_variable([20, self.action_dim])
b2 = self.bias_variable([self.action_dim])
# input layer
self.state_input = tf.placeholder("float", [None, self.state_dim])
self.tf_acts = tf.placeholder(tf.int32, [None,2], name="actions_num")
self.td_error = tf.placeholder(tf.float32, None, "td_error") # TD_error
# hidden layers
h_layer = tf.nn.relu(tf.matmul(self.state_input, W1) + b1)
# softmax layer
self.softmax_input = tf.matmul(h_layer, W2) + b2
# softmax o
强化学习AC框架
最新推荐文章于 2025-01-05 13:16:53 发布
本文深入探讨了强化学习中的Actor-Critic框架,解释了其工作原理和优缺点。通过实例,展示了如何在实践中应用这一框架进行智能体的学习和决策。同时,讨论了它在解决复杂环境问题中的潜力和挑战。

最低0.47元/天 解锁文章
156

被折叠的 条评论
为什么被折叠?



