tf学习笔记–BP算法(下)
这里主要介绍:BP单层算法应用,BP多层神经网络应用
即:存在隐藏层的BP算法
1.单层:
样本设计:
特征数据:病人年龄, 肿瘤大小
标签:良性肿瘤, 恶性肿瘤
# -*- coding: utf-8 -*-
# import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
# 模拟数据点
def generate(sample_size, mean, cov, diff, regression):
# 利用正态分布生成不同簇(均值有较大差别)的样本数据
# 由于设计问题,这个数据生成函数只有当样本量为簇的整数倍时才能返回足够量的样本,否则只能返回小于sample_size的样本
# sample_size 样本数据量
# mean 正太分布的均值
# cov 正太分布的协方差
# diff 步长,不同簇各自符合正态分布,但是赋予不同的均值
# regression 是否对数据标签进行onehot处理
num_classes = 2 # len(diff), 簇的数量
samples_per_class = int(sample_size / 2) # 每组簇当中的元素个数
# 生成第一组簇的数据(正态分布)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
# 逐步生成其他组簇的元素,并将它们粘贴于第一组簇后面,知道构成完整的样本数据
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
# onehot处理
if regression == False: # one-hot 0 into the vector "1 0
class_ind = [Y == class_number for class_number in range(num_classes)]
Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
# 将 数据一一对应的打乱
X, Y = shuffle(X0, Y0)
return X, Y
input_dim = 2
np.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X, Y = generate(1000, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in Y[:]] # 颜色设置
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
模型网络构建
# tf Graph Input --占位符
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights --模型参数
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.nn.sigmoid(tf.matmul(input_features, W) + b) # 正向运算结果
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output)) # 交叉熵
loss = tf.reduce_mean(cross_entropy) # 交叉熵均值
ser = tf.square(input_lables - output) # 平方差
err = tf.reduce_mean(ser) # 平方差均值
optimizer = tf.train.AdamOptimizer(0.04) # Adam优化器尽量用这个--收敛快,会动态调节梯度
train = optimizer.minimize(loss) # let the optimizer train 模型训练
模型训练:
maxEpochs = 50 # 迭代次数
minibatchSize = 25 # 每次训练数据量,每一次的迭代都是分多次将数据送入模型当中进行训练的
# 启动session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(maxEpochs):
sumerr = 0
for i in range(np.int32(len(Y) / minibatchSize)):
x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
y1 = np.reshape(Y[i * minibatchSize:(i + 1) * minibatchSize], [-1, 1])
tf.reshape(y1, [-1, 1])
_, lossval, outputval, errval = sess.run([train, loss, output, err],
feed_dict={input_features: x1, input_lables: y1})
sumerr = sumerr + errval # 每次迭代中不同批次数据计算的平方差总和,sumerr / minibatchSize才表示平方差
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
Epoch: 0001 cost= 0.268924475 err= 0.25267374858260155
Epoch: 0002 cost= 0.178438604 err= 0.106455118060112
Epoch: 0003 cost= 0.134621084 err= 0.06998902931809425
Epoch: 0004 cost= 0.108830765 err= 0.05257240012288093
Epoch: 0005 cost= 0.092330381 err= 0.04308566868305206
Epoch: 0006 cost= 0.081110679 err= 0.0373367234878242
Epoch: 0007 cost= 0.073094845 err= 0.03355297286994755
Epoch: 0008 cost= 0.067109369 err= 0.030904537616297603
Epoch: 0009 cost= 0.062455665 err= 0.028961292980238796
Epoch: 0010 cost= 0.058705330 err= 0.027481697462499143
Epoch: 0011 cost= 0.055590209 err= 0.026321355565451086
Epoch: 0012 cost= 0.052938920 err= 0.025389384776353836
Epoch: 0013 cost= 0.050638434 err= 0.02462607268244028
Epoch: 0014 cost= 0.048612185 err= 0.02399067289894447
Epoch: 0015 cost= 0.046806142 err= 0.02345448062987998
Epoch: 0016 cost= 0.045180764 err= 0.022996704415418207
Epoch: 0017 cost= 0.043706585 err= 0.022601926571223886
Epoch: 0018 cost= 0.042360697 err= 0.022258484875783323
Epoch: 0019 cost= 0.041125130 err= 0.02195738084963523
Epoch: 0020 cost= 0.039985355 err= 0.021691581240156667
Epoch: 0021 cost= 0.038929585 err= 0.021455503829056397
Epoch: 0022 cost= 0.037947953 err= 0.021244666749844328
Epoch: 0023 cost= 0.037032306 err= 0.02105543064768426
Epoch: 0024 cost= 0.036175679 err= 0.020884824533713982
Epoch: 0025 cost= 0.035372160 err= 0.02073037245310843
Epoch: 0026 cost= 0.034616470 err= 0.020589999506482853
Epoch: 0027 cost= 0.033904381 err= 0.020461992079508492
Epoch: 0028 cost= 0.033231940 err= 0.020344889392144978
Epoch: 0029 cost= 0.032595668 err= 0.020237436260213144
Epoch: 0030 cost= 0.031992689 err= 0.020138559713959694
Epoch: 0031 cost= 0.031420287 err= 0.020047353037516587
Epoch: 0032 cost= 0.030876029 err= 0.019963002075674013
Epoch: 0033 cost= 0.030357935 err= 0.01988483140768949
Epoch: 0034 cost= 0.029864036 err= 0.019812226174981334
Epoch: 0035 cost= 0.029392464 err= 0.01974464670056477
Epoch: 0036 cost= 0.028941941 err= 0.019681639238842762
Epoch: 0037 cost= 0.028511010 err= 0.01962278827879345
Epoch: 0038 cost= 0.028098190 err= 0.019567722373176367
Epoch: 0039 cost= 0.027702546 err= 0.019516119370236992
Epoch: 0040 cost= 0.027322976 err= 0.019467693150800186
Epoch: 0041 cost= 0.026958434 err= 0.019422175618528853
Epoch: 0042 cost= 0.026608067 err= 0.019379322144086473
Epoch: 0043 cost= 0.026271122 err= 0.019338947585201823
Epoch: 0044 cost= 0.025946712 err= 0.019300847756967415
Epoch: 0045 cost= 0.025634322 err= 0.01926485348085407
Epoch: 0046 cost= 0.025333069 err= 0.019230803142127117
Epoch: 0047 cost= 0.025042571 err= 0.01919855373300379
Epoch: 0048 cost= 0.024762176 err= 0.01916799627462751
Epoch: 0049 cost= 0.024491362 err= 0.019138996220281115
Epoch: 0050 cost= 0.024229772 err= 0.019111458481202135
结果输出
train_X, train_Y = generate(100, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
x = np.linspace(-1, 8, 200)
y = -x * (sess.run(W)[0] / sess.run(W)[1]) - sess.run(b) / sess.run(W)[1]
plt.plot(x, y, label='Fitted line')
plt.legend()
plt.show()
2.多层神经网络
数据集采用MNIST手写数字的图片封装集
# -*- coding: utf-8 -*-
import pylab
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
# 导入 MINST 数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/MNIST_data/", one_hot=True)
#参数设置
learning_rate = 0.001
training_epochs = 50
batch_size = 100
display_step = 1
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features--第一个隐藏层节点数目
n_hidden_2 = 256 # 2nd layer number of features--第二个隐藏层节点数目
n_input = 784 # MNIST data 输入 (img shape: 28*28)
n_classes = 10 # MNIST 列别 (0-9 ,一共10类)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# 构建模型
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化变量
init = tf.global_variables_initializer()
saver = tf.train.Saver()
model_path = "logtest/521model.ckpt"
# 启动session
with tf.Session() as sess:
sess.run(init)
# 启动循环开始训练
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# 遍历全部数据集
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# mnist.train.next_batch是专门用于由tensorflow提供的MNIST教程的函数。
# 它的工作原理是在开始时将训练图像和标签对随机化,并在每次调用该函数时选择每个随后的batch_size张图像。
# 一旦到达末尾,图像标签对将再次随机分配,并重复该过程。仅在使用所有可用对后,才重新组合和重复整个数据集。
# 链接:https://www.zhihu.com/question/305605841/answer/915298352
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# 显示训练中的详细信息
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print (" Finished!")
ppp = sess.run([pred], feed_dict={x: batch_x})
print("ppp : ", ppp)
print("y-lables", batch_y)
# 测试 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
saver.save(sess, model_path)
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
output = tf.argmax(pred, 1)
batch_xs, batch_ys = mnist.train.next_batch(10)
outputval, predv = sess.run([output, pred], feed_dict={x: batch_xs})
print(outputval, predv, batch_ys)
for i in range(10):
im = batch_xs[i]
im = im.reshape(-1, 28)
pylab.imshow(im)
pylab.show()
模型精度:Accuracy: 0.9632
下面是自己想做一份自己手写的图片的识别,但是可能对图片的数据处理有一些问题,所以预测精度较低,等我后面做出来再补齐
'''
with tf.Session() as sess:
sess.run(init)
saver.restore(sess, save_path=model_path)
output = tf.argmax(pred, 1)
batch_xs, batch_ys = mnist.train.next_batch(10)
outputval, predv = sess.run([output, pred], feed_dict={x: batch_xs})
print(outputval, predv, batch_ys)
# im = batch_xs[0]
# im = im.reshape(-1, 28)
# pylab.imshow(im)
# pylab.show()
#
# im = batch_xs[1]
# im = im.reshape(-1, 28)
# pylab.imshow(im)
# pylab.show()
for i in range(11):
im = batch_xs[i]
im = im.reshape(-1, 28)
pylab.imshow(im)
pylab.show()
'''
对于模型中出现欠拟合跟过拟合问题,一般解决方法如下
欠拟合:
1.增加“节点”数量
2.增加“隐藏层”数量
过拟合:
1.正则化(L1范数:所有学习参数绝对值的和; L2范数:所有学习参数的平方和的开根号)
2.dropout(剔除部分数据节点)