本文隶属于一个完整小项目,建议读者按照顺序阅读。
本文仅仅展示最关键的代码部分,并不会列举所有代码细节,相信具备RL基础的同学理解起来没有困难。
全部的AI代码可以在【Python小游戏】用AI玩Python小游戏FlappyBird【源码】中找到开源地址。
如果本文对您有帮助,欢迎点赞支持!
文章目录
前言
一、封装基本操作
1、卷积操作
def __conv2d(self, input, weights, stride, padding='SAME'):
layer = tf.nn.conv2d(input=input, # 输入的原始张量
filter=weights, # 卷积核张量,(filter_height、 filter_width、in_channels,out_channels)
strides=[1, stride, stride, 1],
padding=padding)
return layer
2、最大池化操作
def __maxpool2d(self, input, stride=2, padding='SAME'):
layer = tf.nn.max_pool(value=input, # 这是一个float32元素和形状的四维张量(批长度、高度、宽度和通道)
ksize=[1, stride, stride, 1], # 一个整型list,表示每个维度的窗口大小
strides=[1, stride, stride, 1], # 在每个维度上移动窗口的步长。
padding=padding) # VALID或SAME
return layer
3、扁平化操作
def __flattenlayer(self,layer):
layer_shape = layer.get_shape() # 扁平前 (?, 8, 8, 64)
num_features = layer_shape[1:4].num_elements() # [1:4]: (8, 8, 64),num_features: 4096
re_layer = tf.reshape(layer, [-1, num_features])# 扁平后 (?, 4096)
return re_layer
二、封装基本神经层
1、卷积操作
def _define_fc_layer(self,inputs, # 输入数据
num_inputs,# 输入通道数
num_outputs,# 输出通道数
activation_function, # 激活函数
layer_name, # 卷积层名字
c_names=None,
regularizer__function=None,
is_historgram=True):
""" 定义一个全连接神经层"""
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE):
weights = self.__define_weights(shape=[num_inputs, num_outputs], c_names=c_names, regularizer__function=regularizer__function)
biases = self.__define_biases(size=num_outputs, c_names=c_names)
with tf.variable_scope('wx_plus_b'):
# 神经元未激活的值,矩阵乘法size
wx_plus_b = tf.matmul(inputs, weights) + biases
# 使用激活函数进行激活
if activation_function is None:
outputs = wx_plus_b
else:
outputs = activation_function(wx_plus_b)
if is_historgram: # 是否记录该变量用于TensorBoard中显示
tf.summary.histogram(layer_name + '/outputs', outputs)
# 返回神经层的输出
return outputs
2、封装卷积神经层
def _define_conv2d_layer(self,inputs,# 输入数据
num_input_channels,# 输入通道数
conv_filter_size, # 卷积核尺寸
num_filters,# 卷积核数量,即输出通道数
stride ,# 卷积核步长
activation_function, # 激活函数
layer_name, # 卷积层名字
c_names=None,
regularizer__function=None,
is_historgram=True):
""" 定义一个卷积神经层"""
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE):
weights = self.__define_weights( shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters],c_names=c_names,regularizer__function=regularizer__function)
biases = self.__define_biases( size=num_filters,c_names=c_names)
with tf.variable_scope('conv_plus_b'):
# 神经元未激活的值,卷积操作
conv_plus_b = self.__conv2d(inputs, weights, stride) + biases
# 使用激活函数进行激活
if activation_function is None:
outputs = conv_plus_b
else:
outputs = activation_function(conv_plus_b)
if is_historgram: # 是否记录该变量用于TensorBoard中显示
tf.summary.histogram(layer_name + '/outputs', outputs)
# 返回神经层的输出
return outputs
三、封装基本神经网络
# 定义卷积池化层L1
layer_conv1 = self._define_conv2d_layer(inputs=input,
conv_filter_size=8,
num_input_channels=4,
num_filters=32,
stride =4 ,
activation_function=tf.nn.relu,
c_names = c_names,
layer_name = 'layer_conv1')
layer_conv_pool1 = self.__maxpool2d(layer_conv1)
# 定义卷积层L2
layer_conv2 = self._define_conv2d_layer(inputs=layer_conv_pool1,
conv_filter_size=4,
num_input_channels=32,
num_filters=64,
stride=2,
activation_function=tf.nn.relu,
c_names=c_names,
layer_name='layer_conv2')
# 定义卷积层L3
layer_conv3 = self._define_conv2d_layer(inputs=layer_conv2,
conv_filter_size=3,
num_input_channels=64,
num_filters=64,
stride=1,
activation_function=tf.nn.relu,
c_names=c_names,
layer_name='layer_conv3')
layer_conv3_flat = tf.reshape(layer_conv3, [-1, 1600])
self.__flattenlayer(layer_conv3)
# 定义全连接层L4
layer_fnn4 = self._define_fc_layer(inputs=layer_conv3_flat,
num_inputs=1600,
num_outputs=512,
activation_function=tf.nn.relu,
c_names=c_names,
layer_name='layer_fnn4')
# 定义全连接层L5
output = self._define_fc_layer(inputs=layer_fnn4,
num_inputs=512,
num_outputs=self.n_actions,
activation_function=None,
c_names=c_names,
layer_name='layer_fnn5')
return output