7.1.1 用线性单分逻辑回归分析肿瘤是良性还是恶性的
假设某肿瘤医院想用神经网络对已有的病例数据进行分类,数据的样本特征包括病人的年龄和肿瘤的大小,对应的标签为该病例是良性肿瘤还是恶性肿瘤
程序:
#1 生成样本集
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
# 模拟数据点
def generate(sample_size, mean, cov, diff, regression):
num_classes = 2 # len(diff)
samples_per_class = int(sample_size / 2)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
if regression == False: # one-hot 0 into the vector "1 0
print("ssss")
class_ind = [Y0 == class_number for class_number in range(num_classes)]
Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
X, Y = shuffle(X0, Y0)
return X, Y
input_dim = 2
np.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X, Y = generate(1000, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
lab_dim = 1
print('-------------------------------------------- ')
#2 构建网络结构
# tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.nn.sigmoid(tf.matmul(input_features, W) + b)
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
ser = tf.square(input_lables - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
optimizer = tf.train.AdamOptimizer(0.04) # 尽量用这个--收敛快,会动态调节梯度
train = optimizer.minimize(loss) # let the optimizer train
print('-------------------------------------------- ')
#3 设置参数进行训练
maxEpochs = 50 #迭代次数
minibatchSize = 25
#启动session Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#向模型输入数据
for epoch in range(maxEpochs):
sumerr = 0
for i in range(np.int32(len(Y) / minibatchSize)):
x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
y1 = np.reshape(Y[i * minibatchSize:(i + 1) * minibatchSize], [-1, 1])
tf.reshape(y1, [-1, 1])
_, lossval, outputval, errval = sess.run([train, loss, output, err],
feed_dict={input_features: x1, input_lables: y1})
sumerr = sumerr + errval
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
print('-------------------------------------------- ')
#4 数据可视化
# Graphic display
train_X, train_Y = generate(100, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
# plt.scatter(train_X[:, 0], train_X[:, 1], c=train_Y)
# plt.colorbar()
# z=x1w1+x2w2+b
# x1w1+x2*w2+b=0
# x2=-x1* w1/w2-b/w2 x2=y x1=x
x = np.linspace(-1, 8, 200)
y = -x * (sess.run(W)[0] / sess.run(W)[1]) - sess.run(b) / sess.run(W)[1]
plt.plot(x, y, label='Fitted line')
plt.legend()
plt.show()
结果:
--------------------------------------------
--------------------------------------------
Epoch: 0001 cost= 0.419653773 err= 0.46671962559223173
Epoch: 0002 cost= 0.252073109 err= 0.19052305445075035
Epoch: 0003 cost= 0.181691363 err= 0.104353082254529
Epoch: 0004 cost= 0.144048572 err= 0.07146473601460457
Epoch: 0005 cost= 0.120770387 err= 0.05554016921669245
Epoch: 0006 cost= 0.104860924 err= 0.046304389499127865
Epoch: 0007 cost= 0.093224488 err= 0.04033656094223261
Epoch: 0008 cost= 0.084319353 err= 0.036203236244618894
Epoch: 0009 cost= 0.077273600 err= 0.03319647421129048
Epoch: 0010 cost= 0.071551934 err= 0.030926703792065382
Epoch: 0011 cost= 0.066806793 err= 0.029162730602547527
Epoch: 0012 cost= 0.062802620 err= 0.027759284852072595
Epoch: 0013 cost= 0.059374008 err= 0.02662088710349053
Epoch: 0014 cost= 0.056401681 err= 0.02568237894680351
Epoch: 0015 cost= 0.053797163 err= 0.02489794176071882
Epoch: 0016 cost= 0.051493753 err= 0.02423448855057359
Epoch: 0017 cost= 0.049440056 err= 0.0236675892630592
Epoch: 0018 cost= 0.047595870 err= 0.023178818095475437
Epoch: 0019 cost= 0.045929298 err= 0.02275408128276467
Epoch: 0020 cost= 0.044414707 err= 0.022382401695940645
Epoch: 0021 cost= 0.043031238 err= 0.022055116919800638
Epoch: 0022 cost= 0.041761700 err= 0.021765318417456003
Epoch: 0023 cost= 0.040592026 err= 0.0215074169379659
Epoch: 0024 cost= 0.039510176 err= 0.021276861702790483
Epoch: 0025 cost= 0.038506214 err= 0.021069888626225292
Epoch: 0026 cost= 0.037571594 err= 0.020883415575372054
Epoch: 0027 cost= 0.036698919 err= 0.02071481025428511
Epoch: 0028 cost= 0.035882033 err= 0.020561894453130662
Epoch: 0029 cost= 0.035115529 err= 0.020422800251981243
Epoch: 0030 cost= 0.034394540 err= 0.020295942740049214
Epoch: 0031 cost= 0.033715032 err= 0.020179953374317848
Epoch: 0032 cost= 0.033073138 err= 0.020073665907257236
Epoch: 0033 cost= 0.032466043 err= 0.019976052087149584
Epoch: 0034 cost= 0.031890534 err= 0.01988623993413057
Epoch: 0035 cost= 0.031344324 err= 0.01980343531933613
Epoch: 0036 cost= 0.030824943 err= 0.01972696366778109
Epoch: 0037 cost= 0.030330520 err= 0.019656223325873724
Epoch: 0038 cost= 0.029859124 err= 0.0195906922762515
Epoch: 0039 cost= 0.029409129 err= 0.019529875045991504
Epoch: 0040 cost= 0.028979069 err= 0.019473378873663023
Epoch: 0041 cost= 0.028567536 err= 0.019420802227105013
Epoch: 0042 cost= 0.028173458 err= 0.019371829834999518
Epoch: 0043 cost= 0.027795522 err= 0.019326153407164384
Epoch: 0044 cost= 0.027432874 err= 0.01928348909743363
Epoch: 0045 cost= 0.027084429 err= 0.019243609809782358
Epoch: 0046 cost= 0.026749423 err= 0.01920627281884663
Epoch: 0047 cost= 0.026426943 err= 0.019171290711092297
Epoch: 0048 cost= 0.026116420 err= 0.019138470003090335
Epoch: 0049 cost= 0.025817052 err= 0.019107659834844526
Epoch: 0050 cost= 0.025528323 err= 0.01907870334020117
--------------------------------------------
7.1.2 用线性逻辑回归处理多分类问题
构建网络模型完成将3类样本分开的任务:先生成3类样本模拟数据,构造神经网络,通过softmax分类的方法计算神经网络的输出值,并将其分开
程序:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y, start, end):
ohe = OneHotEncoder()
a = np.linspace(start, end - 1, end - start)
b = np.reshape(a, [-1, 1]).astype(np.int32)
ohe.fit(b)
c = ohe.transform(y).toarray()
return c
#
def generate(sample_size, num_classes, diff, regression=False):
np.random.seed(10)
mean = np.random.randn(2)
cov = np.eye(2)
# len(diff)
samples_per_class = int(sample_size / num_classes)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
# print(X0, Y0)
if regression == False: # one-hot 0 into the vector "1 0
Y0 = np.reshape(Y0, [-1, 1])
# print(Y0.astype(np.int32))
Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
# print(Y0)
X, Y = shuffle(X0, Y0)
# print(X, Y)
return X, Y
#1 生成样本集
# Ensure we always get the same amount of randomness
np.random.seed(10)
input_dim = 2
num_classes = 3
X, Y = generate(2000, num_classes, [[3.0], [3.0, 0]], False)
aa = [np.argmax(l) for l in Y]
colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
#将具体的点依照不同的颜色显示出来
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
print('----------------------------------')
#2 构建网络结构
lab_dim = num_classes
#定义占位符 tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.matmul(input_features, W) + b
z = tf.nn.softmax(output)
a1 = tf.argmax(tf.nn.softmax(output), axis=1) # 按行找出最大索引,生成数组
b1 = tf.argmax(input_lables, axis=1)
err = tf.count_nonzero(a1 - b1) # 两个数组相减,不为0的就是错误个数
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=input_lables, logits=output)
loss = tf.reduce_mean(cross_entropy) # 对交叉熵取均值很有必要
optimizer = tf.train.AdamOptimizer(0.04) # 尽量Adam算法的优化器函数--收敛快,会动态调节梯度
train = optimizer.minimize(loss) # let the optimizer train
print('----------------------------------')
#3 设置参数进行训练
maxEpochs = 50
minibatchSize = 25
# 启动session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(maxEpochs):
sumerr = 0
for i in range(np.int32(len(Y) / minibatchSize)):
x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
y1 = Y[i * minibatchSize:(i + 1) * minibatchSize, :]
_, lossval, outputval, errval = sess.run([train, loss, output, err],
feed_dict={input_features: x1, input_lables: y1})
sumerr = sumerr + (errval / minibatchSize)
print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
print('----------------------------------')
#4 数据可视化
train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
aa = [np.argmax(l) for l in train_Y]
colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
x = np.linspace(-1, 8, 200)
y = -x * (sess.run(W)[0][0] / sess.run(W)[1][0]) - sess.run(b)[0] / sess.run(W)[1][0]
plt.plot(x, y, label='first line', lw=3)
y = -x * (sess.run(W)[0][1] / sess.run(W)[1][1]) - sess.run(b)[1] / sess.run(W)[1][1]
plt.plot(x, y, label='second line', lw=2)
y = -x * (sess.run(W)[0][2] / sess.run(W)[1][2]) - sess.run(b)[2] / sess.run(W)[1][2]
plt.plot(x, y, label='third line', lw=1)
plt.legend()
plt.show()
print(sess.run(W), sess.run(b))
print('----------------------------------')
#5 模型可视化
train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
aa = [np.argmax(l) for l in train_Y]
colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) #创建网络 create the grid
#初始化和填充 Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(a1, feed_dict={input_features: [[xx[i, j], yy[i, j]]]})
#创建color map用于显示 Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30),
colorConverter.to_rgba('y', alpha=0.30)])
#图示各个样本边界 Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
结果:
----------------------------------
----------------------------------
Epoch: 0001 cost= 0.566720724 err= 1.3375999999999997
Epoch: 0002 cost= 0.385914028 err= 0.43200000000000005
Epoch: 0003 cost= 0.347154856 err= 0.33920000000000017
Epoch: 0004 cost= 0.335927367 err= 0.3280000000000001
Epoch: 0005 cost= 0.334472388 err= 0.3168000000000002
Epoch: 0006 cost= 0.337103307 err= 0.3056000000000002
Epoch: 0007 cost= 0.341525763 err= 0.3072000000000002
Epoch: 0008 cost= 0.346683890 err= 0.3024000000000002
Epoch: 0009 cost= 0.352053642 err= 0.2976000000000002
Epoch: 0010 cost= 0.357365191 err= 0.2976000000000002
Epoch: 0011 cost= 0.362479329 err= 0.2960000000000002
Epoch: 0012 cost= 0.367326468 err= 0.2912000000000002
Epoch: 0013 cost= 0.371876359 err= 0.2896000000000002
Epoch: 0014 cost= 0.376120597 err= 0.2832000000000002
Epoch: 0015 cost= 0.380063146 err= 0.2816000000000002
Epoch: 0016 cost= 0.383714765 err= 0.2816000000000002
Epoch: 0017 cost= 0.387089968 err= 0.2816000000000002
Epoch: 0018 cost= 0.390204966 err= 0.2816000000000002
Epoch: 0019 cost= 0.393076658 err= 0.2800000000000002
Epoch: 0020 cost= 0.395721585 err= 0.2800000000000002
Epoch: 0021 cost= 0.398156136 err= 0.2784000000000002
Epoch: 0022 cost= 0.400395811 err= 0.2784000000000002
Epoch: 0023 cost= 0.402455151 err= 0.2784000000000002
Epoch: 0024 cost= 0.404347986 err= 0.2784000000000002
Epoch: 0025 cost= 0.406087160 err= 0.27680000000000016
Epoch: 0026 cost= 0.407684505 err= 0.27680000000000016
Epoch: 0027 cost= 0.409151524 err= 0.27520000000000017
Epoch: 0028 cost= 0.410498321 err= 0.27520000000000017
Epoch: 0029 cost= 0.411734283 err= 0.27520000000000017
Epoch: 0030 cost= 0.412868589 err= 0.27520000000000017
Epoch: 0031 cost= 0.413909316 err= 0.27520000000000017
Epoch: 0032 cost= 0.414863855 err= 0.27520000000000017
Epoch: 0033 cost= 0.415739596 err= 0.27520000000000017
Epoch: 0034 cost= 0.416542560 err= 0.27520000000000017
Epoch: 0035 cost= 0.417278677 err= 0.27680000000000016
Epoch: 0036 cost= 0.417953581 err= 0.27680000000000016
Epoch: 0037 cost= 0.418572336 err= 0.27680000000000016
Epoch: 0038 cost= 0.419139326 err= 0.27680000000000016
Epoch: 0039 cost= 0.419658959 err= 0.27680000000000016
Epoch: 0040 cost= 0.420135260 err= 0.2784000000000002
Epoch: 0041 cost= 0.420571566 err= 0.2784000000000002
Epoch: 0042 cost= 0.420971215 err= 0.2784000000000002
Epoch: 0043 cost= 0.421337485 err= 0.2784000000000002
Epoch: 0044 cost= 0.421672970 err= 0.2784000000000002
Epoch: 0045 cost= 0.421980351 err= 0.2784000000000002
Epoch: 0046 cost= 0.422261894 err= 0.2784000000000002
Epoch: 0047 cost= 0.422519714 err= 0.2784000000000002
Epoch: 0048 cost= 0.422755808 err= 0.2784000000000002
Epoch: 0049 cost= 0.422972143 err= 0.2784000000000002
Epoch: 0050 cost= 0.423170060 err= 0.2784000000000002
----------------------------------
[[-1.8466457 1.1504047 1.2538404]
[-0.8650342 2.1870062 -0.8544501]] [ 6.7945867 -8.398895 -1.5356051]
----------------------------------
7.2.1 使用带隐藏层的神经网络拟合异或操作
通过构建符合异或规律的数据集作为模拟样本,构建一个简单的多层神经网络来拟合其样本特征完成分类任务
程序:
import tensorflow as tf
import numpy as np
# 网络结构:2维输入 --> 2维隐藏层 --> 1维输出
#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
n_hidden = 2
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
#定义学习率
weights = {
'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
'h1': tf.Variable(tf.zeros([n_hidden])),
'h2': tf.Variable(tf.zeros([n_label]))
}
#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解
# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# Leaky relus 40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# # y_pred = tf.maximum(layer2, 0.01 * layer2)
loss = tf.reduce_mean((y_pred - y) ** 2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
#构建模拟数据
# 生成数据
X = [[0, 0], [0, 1], [1, 0], [1, 1]]
Y = [[0], [1], [1], [0]]
X = np.array(X).astype('float32')
Y = np.array(Y).astype('int16')
#运行session,生成结果
# 加载session
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
# 训练
for i in range(10000):
sess.run(train_step, feed_dict={x: X, y: Y})
# 计算预测值
print(sess.run(y_pred, feed_dict={x: X}))
# 输出:已训练100000次
# 查看隐藏层的输出
print(sess.run(layer_1, feed_dict={x: X}))
结果:
7.3 利用全连接网络将图片进行分类
构建一个简单的多层神经网络,以拟合MNIST样本特征完成分类任务
程序:
import tensorflow as tf
# 导入 MINST 数据集
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("F:/shendu/MNIST_data/", one_hot=True)
#定义网络参数
# 参数设置
learning_rate = 0.001
training_epochs = 25
batch_size = 100
display_step = 1
# Network Parameters
n_hidden_1 = 256 #第一个隐藏层节点个数 1st layer number of features
n_hidden_2 = 256 #第二个隐藏层节点个数 2nd layer number of features
n_input = 784 #MNIST共784(28*28)维 MNIST data 输入 (img shape: 28*28)
n_classes = 10 # MNIST 列别 (0-9 ,一共10类)
#定义网络结构 tf Graph input
#定义占位符
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
#学习参数 Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
#输出值
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化变量
init = tf.global_variables_initializer()
# 启动session
with tf.Session() as sess:
sess.run(init)
# 启动循环开始训练
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples / batch_size)
# 遍历全部数据集
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# 显示训练中的详细信息
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost=",
"{:.9f}".format(avg_cost))
print(" Finished!")
# 测试 model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# 计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))
结果:
Extracting F:/shendu/MNIST_data/train-images-idx3-ubyte.gz
Extracting F:/shendu/MNIST_data/train-labels-idx1-ubyte.gz
Extracting F:/shendu/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting F:/shendu/MNIST_data/t10k-labels-idx1-ubyte.gz
Epoch: 0001 cost= 172.031030853
Epoch: 0002 cost= 40.217121029
Epoch: 0003 cost= 25.387143721
Epoch: 0004 cost= 17.630999616
Epoch: 0005 cost= 12.666866638
Epoch: 0006 cost= 9.388582074
Epoch: 0007 cost= 7.051628594
Epoch: 0008 cost= 5.250864131
Epoch: 0009 cost= 3.878638424
Epoch: 0010 cost= 3.044112996
Epoch: 0011 cost= 2.090644927
Epoch: 0012 cost= 1.621755744
Epoch: 0013 cost= 1.274370165
Epoch: 0014 cost= 1.030953568
Epoch: 0015 cost= 0.856483016
Epoch: 0016 cost= 0.633580475
Epoch: 0017 cost= 0.563832541
Epoch: 0018 cost= 0.594583921
Epoch: 0019 cost= 0.486230183
Epoch: 0020 cost= 0.441275119
Epoch: 0021 cost= 0.369737626
Epoch: 0022 cost= 0.412133130
Epoch: 0023 cost= 0.339861759
Epoch: 0024 cost= 0.408908168
Epoch: 0025 cost= 0.360051092
Finished!
Accuracy: 0.9533
7.4.1 利用异或数据集演示过拟合问题
程序:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y, start, end):
ohe = OneHotEncoder()
a = np.linspace(start, end - 1, end - start)
b = np.reshape(a, [-1, 1]).astype(np.int32)
ohe.fit(b)
c = ohe.transform(y).toarray()
return c
def generate(sample_size, num_classes, diff, regression=False):
np.random.seed(10)
mean = np.random.randn(2)
cov = np.eye(2)
# len(diff)
samples_per_class = int(sample_size / num_classes)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
if regression == False: # one-hot 0 into the vector "1 0
Y0 = np.reshape(Y0, [-1, 1])
# print(Y0.astype(np.int32))
Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
# print(Y0)
X, Y = shuffle(X0, Y0)
# print(X, Y)
return X, Y
#构建异或数据集
# Ensure we always get the same amount of randomness
np.random.seed(10)
input_dim = 2
num_classes = 4
X, Y = generate(320, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
plt.show()
Y = np.reshape(Y, [-1, 1])
print('-------------------------------------------')
#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
# n_hidden = 2#欠拟合
n_hidden = 2
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
#定义学习参数
weights = {
'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
'h1': tf.Variable(tf.zeros([n_hidden])),
'h2': tf.Variable(tf.zeros([n_label]))
}
#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解
# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# Leaky relus 40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# y_pred = tf.maximum(layer2, 0.01 * layer2)
loss = tf.reduce_mean((y_pred - y) ** 2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(20000): #
_, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})
if i % 1000 == 0:
print("Step:", i, "Current loss:", loss_val)
print('------------------------------------------')
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
#添加可视化
#生成120个点并放到模型里,然后将其在直角坐标系中显示出来
xTrain,yTrain = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
yTrain=np.reshape(yTrain,[-1,1])
print("loss:\n",sess.run(loss,feed_dict={x:xTrain,y:yTrain}))
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
print('---------------------------------------------------------------')
#验证过拟合
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
结果:
将隐藏层节点提高到200:
n_hidden = 200
新结果:
7-7 异或集的L2_loss
构建异或数据集模拟样本,使用多层神经网络将其分类,并使用正则化技术来改造过拟合情况
程序:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y, start, end):
ohe = OneHotEncoder()
a = np.linspace(start, end - 1, end - start)
b = np.reshape(a, [-1, 1]).astype(np.int32)
ohe.fit(b)
c = ohe.transform(y).toarray()
return c
def generate(sample_size, num_classes, diff, regression=False):
np.random.seed(10)
mean = np.random.randn(2)
cov = np.eye(2)
# len(diff)
samples_per_class = int(sample_size / num_classes)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
if regression == False: # one-hot 0 into the vector "1 0
Y0 = np.reshape(Y0, [-1, 1])
# print(Y0.astype(np.int32))
Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
# print(Y0)
X, Y = shuffle(X0, Y0)
# print(X, Y)
return X, Y
# Ensure we always get the same amount of randomness
np.random.seed(10)
input_dim = 2
num_classes = 4
X, Y = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
Y = np.reshape(Y, [-1, 1])
learning_rate = 1e-4
n_input = 2
n_label = 1
n_hidden = 200
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
weights = {
'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
'h1': tf.Variable(tf.zeros([n_hidden])),
'h2': tf.Variable(tf.zeros([n_label]))
}
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
# Leaky relus
layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
y_pred = tf.maximum(layer2, 0.01 * layer2)
reg = 0.01 #L2_loss参数
loss = tf.reduce_mean((y_pred - y) ** 2) + tf.nn.l2_loss(weights['h1']) * reg + tf.nn.l2_loss(weights['h2']) * reg
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(20000):#生成异或数据集
X, Y = generate(1000, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)#每次取1000个点
Y = Y % 2
Y = np.reshape(Y, [-1, 1])
_, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})
if i % 1000 == 0:
print("Step:", i, "Current loss:", loss_val)
colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
plt.scatter(xTrain[:, 0], xTrain[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
结果:
7-8 异或集dropout
使用dropout配合退化学习率的技术来改善过拟合
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder
def onehot(y, start, end):
ohe = OneHotEncoder()
a = np.linspace(start, end - 1, end - start)
b = np.reshape(a, [-1, 1]).astype(np.int32)
ohe.fit(b)
c = ohe.transform(y).toarray()
return c
def generate(sample_size, num_classes, diff, regression=False):
np.random.seed(10)
mean = np.random.randn(2)
cov = np.eye(2)
# len(diff)
samples_per_class = int(sample_size / num_classes)
X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
Y0 = np.zeros(samples_per_class)
for ci, d in enumerate(diff):
X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
Y1 = (ci + 1) * np.ones(samples_per_class)
X0 = np.concatenate((X0, X1))
Y0 = np.concatenate((Y0, Y1))
if regression == False: # one-hot 0 into the vector "1 0
Y0 = np.reshape(Y0, [-1, 1])
# print(Y0.astype(np.int32))
Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
# print(Y0)
X, Y = shuffle(X0, Y0)
# print(X, Y)
return X, Y
# Ensure we always get the same amount of randomness
np.random.seed(10)
input_dim = 2
num_classes = 4
X, Y = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
plt.show()
Y = np.reshape(Y, [-1, 1])
learning_rate = 0.01 # 1e-4
n_input = 2
n_label = 1
n_hidden = 200
x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
weights = {
'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
'h1': tf.Variable(tf.zeros([n_hidden])),
'h2': tf.Variable(tf.zeros([n_label]))
}
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
keep_prob = tf.placeholder("float")
layer_1_drop = tf.nn.dropout(layer_1, keep_prob)
# Leaky relus 激活函数
layer2 = tf.add(tf.matmul(layer_1_drop, weights['h2']), biases['h2'])
y_pred = tf.maximum(layer2, 0.01 * layer2)
reg = 0.01
# loss=tf.reduce_mean((y_pred-y)**2)+tf.nn.l2_loss(weights['h1'])*reg+tf.nn.l2_loss(weights['h2'])*reg
loss = tf.reduce_mean((y_pred - y) ** 2)
global_step = tf.Variable(0, trainable=False)
#设置总步数为20000,每执行1000步,学习率衰减0.9
decaylearning_rate = tf.train.exponential_decay(learning_rate, global_step, 1000, 0.9)
# train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
train_step = tf.train.AdamOptimizer(decaylearning_rate).minimize(loss, global_step=global_step)
# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(20000):
X, Y = generate(1000, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
Y = np.reshape(Y, [-1, 1])
_, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y, keep_prob: 0.6})
if i % 1000 == 0:
print("Step:", i, "Current loss:", loss_val)
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]], keep_prob: 1.0})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
# colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
# plt.scatter(xTrain[:,0], xTrain[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
if l == 0.0:
xr.append([k[0], k[1]])
else:
xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain, keep_prob: 1.0}))
nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2) # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
for j in range(nb_of_xs):
# classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]], keep_prob: 1.0})
classification_plane[i, j] = int(classification_plane[i, j])
# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
colorConverter.to_rgba('r', alpha=0.30),
colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
结果: