《Natural Evolution Strategy (NES).py》详解

最新推荐文章于 2024-04-26 14:33:29 发布

王铁柱子哟-

最新推荐文章于 2024-04-26 14:33:29 发布

阅读量304

点赞数

文章标签：机器学习算法 python

本文链接：https://blog.csdn.net/wtzszzx/article/details/132745076

版权

"""
The basic idea about Nature Evolution Strategy with visualation.

Visit my tutorial website for more: https://mofanpy.com/tutorials/

Dependencies:
Tensorflow >= r1.2
numpy
matplotlib
"""
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.contrib.distributions import MultivariateNormalFullCovariance

DNA_SIZE = 2         # parameter (solution) number
N_POP = 20           # population size
N_GENERATION = 100   # training step
LR = 0.02            # learning rate


# fitness function
def get_fitness(pred): return -((pred[:, 0])**2 + pred[:, 1]**2)


# 在 TensorFlow 中，所有在节点之间传递的数据都为 Tensor 对象。
# 也就是说，Tensor本身是不存储数据的，创建一个Tensor实际就是声明了一个数据节点。只有开启Session进行运算的时候，才会获取到数据。

# build multivariate distribution  建立多元分布
# tf.random_normal(shape, mean, stddev, dtype, seed, name)，生成正太分布随机值
# shape：表示生成随机数的维度；mean：正太分布的均值，默认为0；stddev：正太分布的标准差；dtype：生成正太分布数据的类型
# seed：一个整数，当设置之后，每次生成的随机数都一样；name：正太分布的名字
# tf.Variable()函数用于创建变量(Variable),变量是一个特殊的张量()，其可以是任意的形状和类型的张量。
# 2表示的横纵坐标2个指标，13为正态分布的均值，1为正态分布的标准差。
mean = tf.Variable(tf.random_normal([2, ], 13., 1.), dtype=tf.float32)
# print('tf.random_normal([2, ], 13., 1.):\t' + str(tf.random_normal([2, ], 13., 1.)))
# print('mean:\t' + str(mean))

# cov为方差
# tf.eye 它用于创建指定行和列的单位矩阵。形状为 [m, n] 的单位矩阵由所有对角线元素的值 1 和其余位置的值 0 组成。
cov = tf.Variable(5. * tf.eye(DNA_SIZE), dtype=tf.float32)

# 建立tensorflow的图纸，mean是生成的点，cov是方差
mvn = MultivariateNormalFullCovariance(loc=mean, covariance_matrix=cov)
# 从mvn分布中抽取样本
make_kid = mvn.sample(N_POP)                                    # sampling operation
# print('tf.eye(DNA_SIZE):\t' + str(tf.eye(DNA_SIZE)))
# print('mvn:\t' + str(mvn))
# print('make_kid:\t' + str(make_kid))

# compute gradient and update mean and covariance matrix from sample and fitness
# tf.placeholder(dtype, shape=None, name=None) 为占位符
# 孩子的适应度
tfkids_fit = tf.placeholder(tf.float32, [N_POP, ])
# 孩子的DNA
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
# log_prob返回密度或概率的对数，损失函数 ylna+(1-y)ln(1-a)
# tf.reduce_mean()用于计算tensor(张量)沿着指定的数轴(即tensor的某一维度)上的平均值，用作降维或者计算tensor的平均值。
# 适应度越高下降得越快
loss = -tf.reduce_mean(mvn.log_prob(tfkids)*tfkids_fit)         # log prob * fitness
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss) # compute and apply gradients for mean and cov

# 搭建图纸，激活节点
sess = tf.Session()
sess.run(tf.global_variables_initializer())                     # initialize tf variables

# something about plotting (can be ignored)
n = 300
x = np.linspace(-20, 20, n)
# X, Y = np.meshgrid(x, y) 代表的是将x中每一个数据和y中每一个数据组合生成很多点,然后将这些点的x坐标放入到X中,y坐标放入Y中,并且相应位置是对应的
X, Y = np.meshgrid(x, x)
# np.zeros_like(a)的目的是构建一个与a同维度的数组，并初始化所有变量为零
Z = np.zeros_like(X)
for i in range(n):
    for j in range(n):
        Z[i, j] = get_fitness(np.array([[ x[i], x[j] ]]))

# plt.contourf用来画出不同分类的边界线
plt.contourf(X, Y, -Z, 100, cmap=plt.cm.rainbow)
plt.ylim(-20, 20)
plt.xlim(-20, 20)
plt.ion()

# training
for g in range(N_GENERATION):
    # if g % 10 == 0:
    #     plt.contourf(X, Y, -Z, 100, cmap=plt.cm.rainbow);
    # plt.ylim(-20, 20);
    # plt.xlim(-20, 20);
    kids = sess.run(make_kid)
    kids_fit = get_fitness(kids)
    sess.run(train_op, {tfkids_fit: kids_fit, tfkids: kids})    # update distribution parameters

    # plotting update
    if 'sca' in globals(): sca.remove()
    sca = plt.scatter(kids[:, 0], kids[:, 1], s=30, c='k');plt.pause(0.01)
    print('-'*20 + str(g))

print('Finished'); plt.ioff(); plt.show()