7 多层神经网络——解决非线性问题

最新推荐文章于 2024-06-26 00:28:45 发布

大春SSC

最新推荐文章于 2024-06-26 00:28:45 发布

阅读量1.9k

点赞数

分类专栏：深度学习之TensorFlow入门、原理与进阶实战

深度学习之TensorFlow入门、原理与进阶实战专栏收录该内容

19 篇文章 2 订阅

订阅专栏

7.1.1 用线性单分逻辑回归分析肿瘤是良性还是恶性的
假设某肿瘤医院想用神经网络对已有的病例数据进行分类，数据的样本特征包括病人的年龄和肿瘤的大小，对应的标签为该病例是良性肿瘤还是恶性肿瘤

程序：

#1 生成样本集
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle


# 模拟数据点
def generate(sample_size, mean, cov, diff, regression):
    num_classes = 2  # len(diff)
    samples_per_class = int(sample_size / 2)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        print("ssss")
        class_ind = [Y0 == class_number for class_number in range(num_classes)]
        Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
    X, Y = shuffle(X0, Y0)

    return X, Y


input_dim = 2
np.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X, Y = generate(1000, mean, cov, [3.0], True)
colors = ['r' if l == 0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
lab_dim = 1
print('-------------------------------------------- ')


#2 构建网络结构
# tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")

output = tf.nn.sigmoid(tf.matmul(input_features, W) + b)
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
ser = tf.square(input_lables - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
optimizer = tf.train.AdamOptimizer(0.04)  # 尽量用这个--收敛快，会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train
print('-------------------------------------------- ')

#3 设置参数进行训练
maxEpochs = 50 #迭代次数
minibatchSize = 25

#启动session  Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    #向模型输入数据
    for epoch in range(maxEpochs):
        sumerr = 0
        for i in range(np.int32(len(Y) / minibatchSize)):
            x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
            y1 = np.reshape(Y[i * minibatchSize:(i + 1) * minibatchSize], [-1, 1])
            tf.reshape(y1, [-1, 1])
            _, lossval, outputval, errval = sess.run([train, loss, output, err],
                                                     feed_dict={input_features: x1, input_lables: y1})
            sumerr = sumerr + errval

        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
    print('-------------------------------------------- ')

#4 数据可视化
    # Graphic display
    train_X, train_Y = generate(100, mean, cov, [3.0], True)
    colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
    # plt.scatter(train_X[:, 0], train_X[:, 1], c=train_Y)
    # plt.colorbar()

     #    z=x1w1+x2w2+b
    #    x1w1+x2*w2+b=0
    #    x2=-x1* w1/w2-b/w2    x2=y   x1=x
    x = np.linspace(-1, 8, 200)
    y = -x * (sess.run(W)[0] / sess.run(W)[1]) - sess.run(b) / sess.run(W)[1]
    plt.plot(x, y, label='Fitted line')
    plt.legend()
    plt.show()

结果：
在这里插入图片描述

-------------------------------------------- 


-------------------------------------------- 
Epoch: 0001 cost= 0.419653773 err= 0.46671962559223173
Epoch: 0002 cost= 0.252073109 err= 0.19052305445075035
Epoch: 0003 cost= 0.181691363 err= 0.104353082254529
Epoch: 0004 cost= 0.144048572 err= 0.07146473601460457
Epoch: 0005 cost= 0.120770387 err= 0.05554016921669245
Epoch: 0006 cost= 0.104860924 err= 0.046304389499127865
Epoch: 0007 cost= 0.093224488 err= 0.04033656094223261
Epoch: 0008 cost= 0.084319353 err= 0.036203236244618894
Epoch: 0009 cost= 0.077273600 err= 0.03319647421129048
Epoch: 0010 cost= 0.071551934 err= 0.030926703792065382
Epoch: 0011 cost= 0.066806793 err= 0.029162730602547527
Epoch: 0012 cost= 0.062802620 err= 0.027759284852072595
Epoch: 0013 cost= 0.059374008 err= 0.02662088710349053
Epoch: 0014 cost= 0.056401681 err= 0.02568237894680351
Epoch: 0015 cost= 0.053797163 err= 0.02489794176071882
Epoch: 0016 cost= 0.051493753 err= 0.02423448855057359
Epoch: 0017 cost= 0.049440056 err= 0.0236675892630592
Epoch: 0018 cost= 0.047595870 err= 0.023178818095475437
Epoch: 0019 cost= 0.045929298 err= 0.02275408128276467
Epoch: 0020 cost= 0.044414707 err= 0.022382401695940645
Epoch: 0021 cost= 0.043031238 err= 0.022055116919800638
Epoch: 0022 cost= 0.041761700 err= 0.021765318417456003
Epoch: 0023 cost= 0.040592026 err= 0.0215074169379659
Epoch: 0024 cost= 0.039510176 err= 0.021276861702790483
Epoch: 0025 cost= 0.038506214 err= 0.021069888626225292
Epoch: 0026 cost= 0.037571594 err= 0.020883415575372054
Epoch: 0027 cost= 0.036698919 err= 0.02071481025428511
Epoch: 0028 cost= 0.035882033 err= 0.020561894453130662
Epoch: 0029 cost= 0.035115529 err= 0.020422800251981243
Epoch: 0030 cost= 0.034394540 err= 0.020295942740049214
Epoch: 0031 cost= 0.033715032 err= 0.020179953374317848
Epoch: 0032 cost= 0.033073138 err= 0.020073665907257236
Epoch: 0033 cost= 0.032466043 err= 0.019976052087149584
Epoch: 0034 cost= 0.031890534 err= 0.01988623993413057
Epoch: 0035 cost= 0.031344324 err= 0.01980343531933613
Epoch: 0036 cost= 0.030824943 err= 0.01972696366778109
Epoch: 0037 cost= 0.030330520 err= 0.019656223325873724
Epoch: 0038 cost= 0.029859124 err= 0.0195906922762515
Epoch: 0039 cost= 0.029409129 err= 0.019529875045991504
Epoch: 0040 cost= 0.028979069 err= 0.019473378873663023
Epoch: 0041 cost= 0.028567536 err= 0.019420802227105013
Epoch: 0042 cost= 0.028173458 err= 0.019371829834999518
Epoch: 0043 cost= 0.027795522 err= 0.019326153407164384
Epoch: 0044 cost= 0.027432874 err= 0.01928348909743363
Epoch: 0045 cost= 0.027084429 err= 0.019243609809782358
Epoch: 0046 cost= 0.026749423 err= 0.01920627281884663
Epoch: 0047 cost= 0.026426943 err= 0.019171290711092297
Epoch: 0048 cost= 0.026116420 err= 0.019138470003090335
Epoch: 0049 cost= 0.025817052 err= 0.019107659834844526
Epoch: 0050 cost= 0.025528323 err= 0.01907870334020117
--------------------------------------------

7.1.2 用线性逻辑回归处理多分类问题

构建网络模型完成将3类样本分开的任务：先生成3类样本模拟数据，构造神经网络，通过softmax分类的方法计算神经网络的输出值，并将其分开

程序：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap

# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder


def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c


#

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))
        # print(X0, Y0)

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y


#1 生成样本集
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes = 3
X, Y = generate(2000, num_classes, [[3.0], [3.0, 0]], False)
aa = [np.argmax(l) for l in Y]
colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
#将具体的点依照不同的颜色显示出来
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
print('----------------------------------')


#2 构建网络结构
lab_dim = num_classes
#定义占位符 tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.matmul(input_features, W) + b

z = tf.nn.softmax(output)

a1 = tf.argmax(tf.nn.softmax(output), axis=1)  # 按行找出最大索引，生成数组
b1 = tf.argmax(input_lables, axis=1)
err = tf.count_nonzero(a1 - b1)  # 两个数组相减，不为0的就是错误个数

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=input_lables, logits=output)
loss = tf.reduce_mean(cross_entropy)  # 对交叉熵取均值很有必要

optimizer = tf.train.AdamOptimizer(0.04)  # 尽量Adam算法的优化器函数--收敛快，会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train
print('----------------------------------')


#3 设置参数进行训练
maxEpochs = 50
minibatchSize = 25

# 启动session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(maxEpochs):
        sumerr = 0
        for i in range(np.int32(len(Y) / minibatchSize)):
            x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
            y1 = Y[i * minibatchSize:(i + 1) * minibatchSize, :]

            _, lossval, outputval, errval = sess.run([train, loss, output, err],
                                                     feed_dict={input_features: x1, input_lables: y1})
            sumerr = sumerr + (errval / minibatchSize)

        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
    print('----------------------------------')

    #4 数据可视化
    train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
    aa = [np.argmax(l) for l in train_Y]
    colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)

    x = np.linspace(-1, 8, 200)

    y = -x * (sess.run(W)[0][0] / sess.run(W)[1][0]) - sess.run(b)[0] / sess.run(W)[1][0]
    plt.plot(x, y, label='first line', lw=3)

    y = -x * (sess.run(W)[0][1] / sess.run(W)[1][1]) - sess.run(b)[1] / sess.run(W)[1][1]
    plt.plot(x, y, label='second line', lw=2)

    y = -x * (sess.run(W)[0][2] / sess.run(W)[1][2]) - sess.run(b)[2] / sess.run(W)[1][2]
    plt.plot(x, y, label='third line', lw=1)

    plt.legend()
    plt.show()
    print(sess.run(W), sess.run(b))
    print('----------------------------------')

    #5 模型可视化
    train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
    aa = [np.argmax(l) for l in train_Y]
    colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)

    nb_of_xs = 200
    xs1 = np.linspace(-1, 8, num=nb_of_xs)
    xs2 = np.linspace(-1, 8, num=nb_of_xs)
    xx, yy = np.meshgrid(xs1, xs2)  #创建网络 create the grid
    #初始化和填充 Initialize and fill the classification plane
    classification_plane = np.zeros((nb_of_xs, nb_of_xs))
    for i in range(nb_of_xs):
        for j in range(nb_of_xs):
            # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
            classification_plane[i, j] = sess.run(a1, feed_dict={input_features: [[xx[i, j], yy[i, j]]]})

    #创建color map用于显示 Create a color map to show the classification colors of each grid point
    cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30),
        colorConverter.to_rgba('y', alpha=0.30)])
    #图示各个样本边界 Plot the classification plane with decision boundary and input samples
    plt.contourf(xx, yy, classification_plane, cmap=cmap)
    plt.show()

结果：
在这里插入图片描述

----------------------------------

----------------------------------

Epoch: 0001 cost= 0.566720724 err= 1.3375999999999997
Epoch: 0002 cost= 0.385914028 err= 0.43200000000000005
Epoch: 0003 cost= 0.347154856 err= 0.33920000000000017
Epoch: 0004 cost= 0.335927367 err= 0.3280000000000001
Epoch: 0005 cost= 0.334472388 err= 0.3168000000000002
Epoch: 0006 cost= 0.337103307 err= 0.3056000000000002
Epoch: 0007 cost= 0.341525763 err= 0.3072000000000002
Epoch: 0008 cost= 0.346683890 err= 0.3024000000000002
Epoch: 0009 cost= 0.352053642 err= 0.2976000000000002
Epoch: 0010 cost= 0.357365191 err= 0.2976000000000002
Epoch: 0011 cost= 0.362479329 err= 0.2960000000000002
Epoch: 0012 cost= 0.367326468 err= 0.2912000000000002
Epoch: 0013 cost= 0.371876359 err= 0.2896000000000002
Epoch: 0014 cost= 0.376120597 err= 0.2832000000000002
Epoch: 0015 cost= 0.380063146 err= 0.2816000000000002
Epoch: 0016 cost= 0.383714765 err= 0.2816000000000002
Epoch: 0017 cost= 0.387089968 err= 0.2816000000000002
Epoch: 0018 cost= 0.390204966 err= 0.2816000000000002
Epoch: 0019 cost= 0.393076658 err= 0.2800000000000002
Epoch: 0020 cost= 0.395721585 err= 0.2800000000000002
Epoch: 0021 cost= 0.398156136 err= 0.2784000000000002
Epoch: 0022 cost= 0.400395811 err= 0.2784000000000002
Epoch: 0023 cost= 0.402455151 err= 0.2784000000000002
Epoch: 0024 cost= 0.404347986 err= 0.2784000000000002
Epoch: 0025 cost= 0.406087160 err= 0.27680000000000016
Epoch: 0026 cost= 0.407684505 err= 0.27680000000000016
Epoch: 0027 cost= 0.409151524 err= 0.27520000000000017
Epoch: 0028 cost= 0.410498321 err= 0.27520000000000017
Epoch: 0029 cost= 0.411734283 err= 0.27520000000000017
Epoch: 0030 cost= 0.412868589 err= 0.27520000000000017
Epoch: 0031 cost= 0.413909316 err= 0.27520000000000017
Epoch: 0032 cost= 0.414863855 err= 0.27520000000000017
Epoch: 0033 cost= 0.415739596 err= 0.27520000000000017
Epoch: 0034 cost= 0.416542560 err= 0.27520000000000017
Epoch: 0035 cost= 0.417278677 err= 0.27680000000000016
Epoch: 0036 cost= 0.417953581 err= 0.27680000000000016
Epoch: 0037 cost= 0.418572336 err= 0.27680000000000016
Epoch: 0038 cost= 0.419139326 err= 0.27680000000000016
Epoch: 0039 cost= 0.419658959 err= 0.27680000000000016
Epoch: 0040 cost= 0.420135260 err= 0.2784000000000002
Epoch: 0041 cost= 0.420571566 err= 0.2784000000000002
Epoch: 0042 cost= 0.420971215 err= 0.2784000000000002
Epoch: 0043 cost= 0.421337485 err= 0.2784000000000002
Epoch: 0044 cost= 0.421672970 err= 0.2784000000000002
Epoch: 0045 cost= 0.421980351 err= 0.2784000000000002
Epoch: 0046 cost= 0.422261894 err= 0.2784000000000002
Epoch: 0047 cost= 0.422519714 err= 0.2784000000000002
Epoch: 0048 cost= 0.422755808 err= 0.2784000000000002
Epoch: 0049 cost= 0.422972143 err= 0.2784000000000002
Epoch: 0050 cost= 0.423170060 err= 0.2784000000000002
----------------------------------

[[-1.8466457  1.1504047  1.2538404]
 [-0.8650342  2.1870062 -0.8544501]] [ 6.7945867 -8.398895  -1.5356051]
----------------------------------

7.2.1 使用带隐藏层的神经网络拟合异或操作
通过构建符合异或规律的数据集作为模拟样本，构建一个简单的多层神经网络来拟合其样本特征完成分类任务

程序：

import tensorflow as tf
import numpy as np

# 网络结构：2维输入 --> 2维隐藏层 --> 1维输出
#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
n_hidden = 2

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])
#定义学习率
weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

# Leaky relus  40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# # y_pred = tf.maximum(layer2, 0.01 * layer2)

loss = tf.reduce_mean((y_pred - y) ** 2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#构建模拟数据
# 生成数据
X = [[0, 0], [0, 1], [1, 0], [1, 1]]
Y = [[0], [1], [1], [0]]
X = np.array(X).astype('float32')
Y = np.array(Y).astype('int16')

#运行session，生成结果
# 加载session
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

# 训练
for i in range(10000):
    sess.run(train_step, feed_dict={x: X, y: Y})

# 计算预测值
print(sess.run(y_pred, feed_dict={x: X}))
# 输出：已训练100000次

# 查看隐藏层的输出
print(sess.run(layer_1, feed_dict={x: X}))

结果：

在这里插入图片描述

7.3 利用全连接网络将图片进行分类

构建一个简单的多层神经网络，以拟合MNIST样本特征完成分类任务

程序：

import tensorflow as tf
# 导入 MINST 数据集
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("F:/shendu/MNIST_data/", one_hot=True)

#定义网络参数
# 参数设置
learning_rate = 0.001
training_epochs = 25
batch_size = 100
display_step = 1

# Network Parameters
n_hidden_1 = 256  #第一个隐藏层节点个数 1st layer number of features
n_hidden_2 = 256  #第二个隐藏层节点个数 2nd layer number of features
n_input = 784  #MNIST共784(28*28)维  MNIST data 输入 (img shape: 28*28)
n_classes = 10  # MNIST 列别 (0-9 ，一共10类)

#定义网络结构 tf Graph input
#定义占位符
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])


# Create model
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer


#学习参数 Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

#输出值
pred = multilayer_perceptron(x, weights, biases)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# 初始化变量
init = tf.global_variables_initializer()

# 启动session
with tf.Session() as sess:
    sess.run(init)

    # 启动循环开始训练
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # 遍历全部数据集
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
                                                          y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # 显示训练中的详细信息
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost=",
                  "{:.9f}".format(avg_cost))
    print(" Finished!")

    # 测试 model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))

结果：

Extracting F:/shendu/MNIST_data/train-images-idx3-ubyte.gz

Extracting F:/shendu/MNIST_data/train-labels-idx1-ubyte.gz
 
Extracting F:/shendu/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting F:/shendu/MNIST_data/t10k-labels-idx1-ubyte.gz

Epoch: 0001 cost= 172.031030853
Epoch: 0002 cost= 40.217121029
Epoch: 0003 cost= 25.387143721
Epoch: 0004 cost= 17.630999616
Epoch: 0005 cost= 12.666866638
Epoch: 0006 cost= 9.388582074
Epoch: 0007 cost= 7.051628594
Epoch: 0008 cost= 5.250864131
Epoch: 0009 cost= 3.878638424
Epoch: 0010 cost= 3.044112996
Epoch: 0011 cost= 2.090644927
Epoch: 0012 cost= 1.621755744
Epoch: 0013 cost= 1.274370165
Epoch: 0014 cost= 1.030953568
Epoch: 0015 cost= 0.856483016
Epoch: 0016 cost= 0.633580475
Epoch: 0017 cost= 0.563832541
Epoch: 0018 cost= 0.594583921
Epoch: 0019 cost= 0.486230183
Epoch: 0020 cost= 0.441275119
Epoch: 0021 cost= 0.369737626
Epoch: 0022 cost= 0.412133130
Epoch: 0023 cost= 0.339861759
Epoch: 0024 cost= 0.408908168
Epoch: 0025 cost= 0.360051092
 Finished!
Accuracy: 0.9533

7.4.1 利用异或数据集演示过拟合问题

程序：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder

def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y

#构建异或数据集
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes = 4
X, Y = generate(320, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

plt.show()

Y = np.reshape(Y, [-1, 1])
print('-------------------------------------------')

#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
# n_hidden = 2#欠拟合
n_hidden = 2

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])

#定义学习参数
weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

# Leaky relus  40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# y_pred = tf.maximum(layer2, 0.01 * layer2)

loss = tf.reduce_mean((y_pred - y) ** 2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(20000):  #

    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})

    if i % 1000 == 0:
        print("Step:", i, "Current loss:", loss_val)

print('------------------------------------------')

# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)

#添加可视化
#生成120个点并放到模型里，然后将其在直角坐标系中显示出来
xTrain,yTrain = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
yTrain=np.reshape(yTrain,[-1,1])
print("loss:\n",sess.run(loss,feed_dict={x:xTrain,y:yTrain}))



nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
print('---------------------------------------------------------------')

#验证过拟合
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2

xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

结果：
在这里插入图片描述

将隐藏层节点提高到200：

n_hidden = 200

新结果：
在这里插入图片描述

7-7 异或集的L2_loss

构建异或数据集模拟样本，使用多层神经网络将其分类，并使用正则化技术来改造过拟合情况

程序：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder


def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c


def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y


# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes = 4
X, Y = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
Y = np.reshape(Y, [-1, 1])

learning_rate = 1e-4
n_input = 2
n_label = 1
n_hidden = 200

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))

# Leaky relus
layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
y_pred = tf.maximum(layer2, 0.01 * layer2)

reg = 0.01 #L2_loss参数
loss = tf.reduce_mean((y_pred - y) ** 2) + tf.nn.l2_loss(weights['h1']) * reg + tf.nn.l2_loss(weights['h2']) * reg
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(20000):#生成异或数据集

    X, Y = generate(1000, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)#每次取1000个点
    Y = Y % 2
    Y = np.reshape(Y, [-1, 1])

    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})

    if i % 1000 == 0:
        print("Step:", i, "Current loss:", loss_val)

colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
plt.scatter(xTrain[:, 0], xTrain[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

结果：
在这里插入图片描述

7-8 异或集dropout

使用dropout配合退化学习率的技术来改善过拟合

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder

def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y


# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes = 4
X, Y = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
Y = Y % 2
# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
plt.show()
Y = np.reshape(Y, [-1, 1])

learning_rate = 0.01  # 1e-4
n_input = 2
n_label = 1
n_hidden = 200

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])

weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))

keep_prob = tf.placeholder("float")
layer_1_drop = tf.nn.dropout(layer_1, keep_prob)

# Leaky relus 激活函数
layer2 = tf.add(tf.matmul(layer_1_drop, weights['h2']), biases['h2'])
y_pred = tf.maximum(layer2, 0.01 * layer2)

reg = 0.01
# loss=tf.reduce_mean((y_pred-y)**2)+tf.nn.l2_loss(weights['h1'])*reg+tf.nn.l2_loss(weights['h2'])*reg
loss = tf.reduce_mean((y_pred - y) ** 2)

global_step = tf.Variable(0, trainable=False)
#设置总步数为20000，每执行1000步，学习率衰减0.9
decaylearning_rate = tf.train.exponential_decay(learning_rate, global_step, 1000, 0.9)
# train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
train_step = tf.train.AdamOptimizer(decaylearning_rate).minimize(loss, global_step=global_step)

# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(20000):

    X, Y = generate(1000, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
    Y = Y % 2
    Y = np.reshape(Y, [-1, 1])

    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y, keep_prob: 0.6})

    if i % 1000 == 0:
        print("Step:", i, "Current loss:", loss_val)

# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]], keep_prob: 1.0})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
# colors = ['r' if l == 0.0 else 'b' for l in yTrain[:]]
# plt.scatter(xTrain[:,0], xTrain[:,1], c=colors)


xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain, keep_prob: 1.0}))

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]], keep_prob: 1.0})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

结果：

在这里插入图片描述