强化学习:A3C算法Tensorflow实现
最近在看A3C,理论知识很容易理解,代码还是有一定难度,先分享本人学习莫烦大佬A3C代码的注释,理论知识后补!!!
具体的算法伪代码如下:
tensorflow代码如下:
"""
Asynchronous Advantage Actor Critic (A3C) with continuous action space, Reinforcement Learning.
The Pendulum example.
View more on my tutorial page: https://morvanzhou.github.io/tutorials/
Using:
tensorflow 1.8.0
gym 0.10.5
"""
import multiprocessing # 多线程模块
import threading # 线程模块
import tensorflow as tf
import numpy as np
import gym
import os
import shutil # 拷贝文件用
import matplotlib.pyplot as plt
GAME = 'Pendulum-v0'
OUTPUT_GRAPH = True
LOG_DIR = './log'
N_WORKERS = multiprocessing.cpu_count() # 独立玩家个体数为cpu数
MAX_EP_STEP = 200
MAX_GLOBAL_EP = 2000 # 中央大脑最大回合数
GLOBAL_NET_SCOPE = 'Global_Net' # 中央大脑的名字
UPDATE_GLOBAL_ITER = 10 # 中央大脑每N次更新一次
GAMMA = 0.9 # 衰减度
ENTROPY_BETA = 0.01 # β项熵
LR_A = 0.0001 # learning rate for actor
LR_C = 0.001 # learning rate for critic
GLOBAL_RUNNING_R = [] # 存储总的reward
GLOBAL_EP = 0 # 中央大脑步数
env = gym.make(GAME) # 定义游戏环境
N_S = env.observation_space.shape[0] # 观测值个数
N_A = env.action_space.shape[0] # 动作值个数
A_BOUND = [env.action_space.low, env.action_space.high] # 动作界限
# 这个 class 可以被调用生成一个 global net.
# 也能被调用生成一个 worker 的 net, 因为他们的结构是一样的,
# 所以这个 class 可以被重复利用.
class ACNet(object):
def __init__(self, scope, globalAC=None):
if scope == GLOBAL_NET_SCOPE: # get global network
with tf.variable_scope(scope):
self.s = tf.placeholder(tf.float32, [None, N_S], 'S') # [None, N_S]数据形状,None代表batch,N_S是每个state的观测值个数
self.a_params, self.c_params = self._build_net(scope)[-2:] # 定义中央大脑actor和critic的参数
else: # local net, calculate losses
with tf.variable_scope(scope):
self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A')
self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget')
mu, sigma, self.v, self.a_params, self.c_params = self._build_net(scope) # 均值μ,方差σ,
td = tf.subtract(self