第7章 多层神经网络

7.1 用线性单分逻辑回归分析肿瘤是良性还是恶性的
程序:


import numpy as np
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import tensorflow as tf

def generate(sample_size,mean,cov,diff,regression):
    num_classes = 2
    samples_per_class = int(sample_size/2)
    X0 = np.random.multivariate_normal(mean,cov,samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci,d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean+d,cov,samples_per_class)
        Y1 = (ci + 1)*np.ones(samples_per_class)

        X0 = np.concatenate((X0,X1))
        Y0 = np.concatenate((Y0,Y1))

    if regression == False: # one-hot 编码,将0转成 1 0

        class_ind = [Y0 == class_number for class_number in range(num_classes)]
        Y = np.asarray(np.hstack(class_ind), dtype=np.float32)
    X,Y = shuffle(X0,Y0)
    return X,Y

input_dim = 2
np.random.seed(10)
num_classes = 2
mean = np.random.randn(num_classes)
cov = np.eye(num_classes)
X,Y = generate(1000,mean,cov,[3.0],True)
colors = ['r' if l==0 else 'b' for l in Y[:]]
plt.scatter(X[:,0],X[:,1],c = colors)
plt.xlabel("Scaled  age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
lab_dim = 1
print('-------------------------------------------- ')

#2 构建网络结构
# tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")

output = tf.nn.sigmoid(tf.matmul(input_features, W) + b)
cross_entropy = -(input_lables * tf.log(output) + (1 - input_lables) * tf.log(1 - output))
ser = tf.square(input_lables - output)
loss = tf.reduce_mean(cross_entropy)
err = tf.reduce_mean(ser)
optimizer = tf.train.AdamOptimizer(0.04)  # 尽量用这个--收敛快,会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train
print('-------------------------------------------- ')

#3 设置参数进行训练
maxEpochs = 50 #迭代次数
minibatchSize = 25

#启动session  Launch the graph
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    #向模型输入数据
    for epoch in range(maxEpochs):
        sumerr = 0
        for i in range(np.int32(len(Y) / minibatchSize)):
            x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
            y1 = np.reshape(Y[i * minibatchSize:(i + 1) * minibatchSize], [-1, 1])
            tf.reshape(y1, [-1, 1])
            _, lossval, outputval, errval = sess.run([train, loss, output, err],
                                                     feed_dict={input_features: x1, input_lables: y1})
            sumerr = sumerr + errval

        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
    print('-------------------------------------------- ')

#4 数据可视化
    # Graphic display
    train_X, train_Y = generate(100, mean, cov, [3.0], True)
    colors = ['r' if l == 0 else 'b' for l in train_Y[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)
    # plt.scatter(train_X[:, 0], train_X[:, 1], c=train_Y)
    # plt.colorbar()

     #    z=x1w1+x2w2+b
    #    x1w1+x2*w2+b=0
    #    x2=-x1* w1/w2-b/w2    x2=y   x1=x
    x = np.linspace(-1, 8, 200)
    y = -x * (sess.run(W)[0] / sess.run(W)[1]) - sess.run(b) / sess.run(W)[1]
    plt.plot(x, y, label='Fitted line')
    plt.legend()
    plt.show()

结果:
生成的数据:
在这里插入图片描述

Epoch: 0001 cost= 0.402290761 err= 0.4220936095714569
Epoch: 0002 cost= 0.286549389 err= 0.21382913008332252
Epoch: 0003 cost= 0.205276847 err= 0.12938791811466216
Epoch: 0004 cost= 0.156711429 err= 0.08419329896569253
Epoch: 0005 cost= 0.127587527 err= 0.06144444540143013
Epoch: 0006 cost= 0.108569272 err= 0.04909050062298775
Epoch: 0007 cost= 0.095232576 err= 0.041673598643392325
Epoch: 0008 cost= 0.085361034 err= 0.03683308662846684
Epoch: 0009 cost= 0.077743709 err= 0.03346456308849156
Epoch: 0010 cost= 0.071672536 err= 0.03100297726690769
Epoch: 0011 cost= 0.066708282 err= 0.029135056268423797
Epoch: 0012 cost= 0.062564246 err= 0.027675016946159304
Epoch: 0013 cost= 0.059045564 err= 0.02650638537481427
Epoch: 0014 cost= 0.056014940 err= 0.02555271005257964
Epoch: 0015 cost= 0.053373095 err= 0.02476184282451868
Epoch: 0016 cost= 0.051046442 err= 0.024097083020024
Epoch: 0017 cost= 0.048978850 err= 0.02353185025509447
Epoch: 0018 cost= 0.047127292 err= 0.02304648211225867
Epoch: 0019 cost= 0.045457859 err= 0.0226260887295939
Epoch: 0020 cost= 0.043943625 err= 0.022259229691699148
Epoch: 0021 cost= 0.042562634 err= 0.02193694586632773
Epoch: 0022 cost= 0.041297209 err= 0.021652155232150107
Epoch: 0023 cost= 0.040132686 err= 0.021399166567716746
Epoch: 0024 cost= 0.039056841 err= 0.021173356383806095
Epoch: 0025 cost= 0.038059343 err= 0.02097094610799104
Epoch: 0026 cost= 0.037131526 err= 0.020788811224047094
Epoch: 0027 cost= 0.036265917 err= 0.020624336611945182
Epoch: 0028 cost= 0.035456408 err= 0.020475327033782378
Epoch: 0029 cost= 0.034697119 err= 0.020339942692080514
Epoch: 0030 cost= 0.033983592 err= 0.020216585372108967
Epoch: 0031 cost= 0.033311397 err= 0.0201039208896691
Epoch: 0032 cost= 0.032676920 err= 0.020000769441248848
Epoch: 0033 cost= 0.032077044 err= 0.019906134667107837
Epoch: 0034 cost= 0.031508893 err= 0.01981913345050998
Epoch: 0035 cost= 0.030969737 err= 0.019739002887508832
Epoch: 0036 cost= 0.030457499 err= 0.019665055116056464
Epoch: 0037 cost= 0.029970061 err= 0.019596726023009978
Epoch: 0038 cost= 0.029505625 err= 0.019533473667688667
Epoch: 0039 cost= 0.029062405 err= 0.019474825173965656
Epoch: 0040 cost= 0.028639056 err= 0.01942038996203337
Epoch: 0041 cost= 0.028234275 err= 0.019369786763272712
Epoch: 0042 cost= 0.027846718 err= 0.019322688550164457
Epoch: 0043 cost= 0.027475249 err= 0.019278797387960366
Epoch: 0044 cost= 0.027118854 err= 0.019237840565328953
Epoch: 0045 cost= 0.026776681 err= 0.019199586801696568
Epoch: 0046 cost= 0.026447790 err= 0.01916381584072951
Epoch: 0047 cost= 0.026131434 err= 0.019130324691941494
Epoch: 0048 cost= 0.025826823 err= 0.01909893815696705
Epoch: 0049 cost= 0.025533410 err= 0.019069498645840212
Epoch: 0050 cost= 0.025250457 err= 0.019041849277273287

在这里插入图片描述
7.2 用线性逻辑回归处理多分类问题

构建网络模型完成将3类样本分开的任务:先生成3类样本模拟数据,构造神经网络,通过softmax分类的方法计算神经网络的输出值,并将其分开

程序:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder

def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c


#

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))
        # print(X0, Y0)

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y


#1 生成样本集
# Ensure we always get the same amount of randomness
np.random.seed(10)

input_dim = 2
num_classes = 3
X, Y = generate(2000, num_classes, [[3.0], [3.0, 0]], False)
aa = [np.argmax(l) for l in Y]
colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
#将具体的点依照不同的颜色显示出来
plt.scatter(X[:, 0], X[:, 1], c=colors)
plt.xlabel("Scaled age (in yrs)")
plt.ylabel("Tumor size (in cm)")
plt.show()
print('----------------------------------')


#2 构建网络结构
lab_dim = num_classes
#定义占位符 tf Graph Input
input_features = tf.placeholder(tf.float32, [None, input_dim])
input_lables = tf.placeholder(tf.float32, [None, lab_dim])
# Set model weights
W = tf.Variable(tf.random_normal([input_dim, lab_dim]), name="weight")
b = tf.Variable(tf.zeros([lab_dim]), name="bias")
output = tf.matmul(input_features, W) + b

z = tf.nn.softmax(output)

a1 = tf.argmax(tf.nn.softmax(output), axis=1)  # 按行找出最大索引,生成数组
b1 = tf.argmax(input_lables, axis=1)
err = tf.count_nonzero(a1 - b1)  # 两个数组相减,不为0的就是错误个数

cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=input_lables, logits=output)
loss = tf.reduce_mean(cross_entropy)  # 对交叉熵取均值很有必要

optimizer = tf.train.AdamOptimizer(0.04)  # 尽量Adam算法的优化器函数--收敛快,会动态调节梯度
train = optimizer.minimize(loss)  # let the optimizer train
print('----------------------------------')


#3 设置参数进行训练
maxEpochs = 50
minibatchSize = 25

# 启动session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(maxEpochs):
        sumerr = 0
        for i in range(np.int32(len(Y) / minibatchSize)):
            x1 = X[i * minibatchSize:(i + 1) * minibatchSize, :]
            y1 = Y[i * minibatchSize:(i + 1) * minibatchSize, :]

            _, lossval, outputval, errval = sess.run([train, loss, output, err],
                                                     feed_dict={input_features: x1, input_lables: y1})
            sumerr = sumerr + (errval / minibatchSize)

        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(lossval), "err=", sumerr / minibatchSize)
    print('----------------------------------')

    #4 数据可视化
    train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
    aa = [np.argmax(l) for l in train_Y]
    colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)

    x = np.linspace(-1, 8, 200)

    y = -x * (sess.run(W)[0][0] / sess.run(W)[1][0]) - sess.run(b)[0] / sess.run(W)[1][0]
    plt.plot(x, y, label='first line', lw=3)

    y = -x * (sess.run(W)[0][1] / sess.run(W)[1][1]) - sess.run(b)[1] / sess.run(W)[1][1]
    plt.plot(x, y, label='second line', lw=2)

    y = -x * (sess.run(W)[0][2] / sess.run(W)[1][2]) - sess.run(b)[2] / sess.run(W)[1][2]
    plt.plot(x, y, label='third line', lw=1)

    plt.legend()
    plt.show()
    print(sess.run(W), sess.run(b))
    print('----------------------------------')

    #5 模型可视化
    train_X, train_Y = generate(200, num_classes, [[3.0], [3.0, 0]], False)
    aa = [np.argmax(l) for l in train_Y]
    colors = ['r' if l == 0 else 'b' if l == 1 else 'y' for l in aa[:]]
    plt.scatter(train_X[:, 0], train_X[:, 1], c=colors)

    nb_of_xs = 200
    xs1 = np.linspace(-1, 8, num=nb_of_xs)
    xs2 = np.linspace(-1, 8, num=nb_of_xs)
    xx, yy = np.meshgrid(xs1, xs2)  #创建网络 create the grid
    #初始化和填充 Initialize and fill the classification plane
    classification_plane = np.zeros((nb_of_xs, nb_of_xs))
    for i in range(nb_of_xs):
        for j in range(nb_of_xs):
            # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
            classification_plane[i, j] = sess.run(a1, feed_dict={input_features: [[xx[i, j], yy[i, j]]]})

    #创建color map用于显示 Create a color map to show the classification colors of each grid point
    cmap = ListedColormap([
        colorConverter.to_rgba('r', alpha=0.30),
        colorConverter.to_rgba('b', alpha=0.30),
        colorConverter.to_rgba('y', alpha=0.30)])
    #图示各个样本边界 Plot the classification plane with decision boundary and input samples
    plt.contourf(xx, yy, classification_plane, cmap=cmap)
    plt.show()

结果:

在这里插入图片描述
在这里插入图片描述

Epoch: 0001 cost= 0.408615470 err= 0.7823999999999991
Epoch: 0002 cost= 0.340759307 err= 0.3488000000000002
Epoch: 0003 cost= 0.329538494 err= 0.33600000000000024
Epoch: 0004 cost= 0.331317455 err= 0.3200000000000001
Epoch: 0005 cost= 0.337001711 err= 0.3056000000000002
Epoch: 0006 cost= 0.343782663 err= 0.3040000000000002
Epoch: 0007 cost= 0.350635111 err= 0.2992000000000002
Epoch: 0008 cost= 0.357164919 err= 0.29280000000000017
Epoch: 0009 cost= 0.363229364 err= 0.2912000000000002
Epoch: 0010 cost= 0.368793219 err= 0.2880000000000002
Epoch: 0011 cost= 0.373867065 err= 0.2864000000000002
Epoch: 0012 cost= 0.378480762 err= 0.28480000000000016
Epoch: 0013 cost= 0.382670939 err= 0.2832000000000002
Epoch: 0014 cost= 0.386475146 err= 0.28480000000000016
Epoch: 0015 cost= 0.389929056 err= 0.2832000000000002
Epoch: 0016 cost= 0.393066227 err= 0.2832000000000002
Epoch: 0017 cost= 0.395916939 err= 0.2800000000000002
Epoch: 0018 cost= 0.398508519 err= 0.2784000000000002
Epoch: 0019 cost= 0.400865823 err= 0.2784000000000002
Epoch: 0020 cost= 0.403011084 err= 0.27680000000000016
Epoch: 0021 cost= 0.404963940 err= 0.27680000000000016
Epoch: 0022 cost= 0.406742632 err= 0.27680000000000016
Epoch: 0023 cost= 0.408363074 err= 0.27680000000000016
Epoch: 0024 cost= 0.409840047 err= 0.27680000000000016
Epoch: 0025 cost= 0.411186486 err= 0.27680000000000016
Epoch: 0026 cost= 0.412414044 err= 0.27680000000000016
Epoch: 0027 cost= 0.413533986 err= 0.27680000000000016
Epoch: 0028 cost= 0.414555728 err= 0.27520000000000017
Epoch: 0029 cost= 0.415487975 err= 0.27520000000000017
Epoch: 0030 cost= 0.416338772 err= 0.27520000000000017
Epoch: 0031 cost= 0.417115360 err= 0.27520000000000017
Epoch: 0032 cost= 0.417824358 err= 0.27520000000000017
Epoch: 0033 cost= 0.418471694 err= 0.27520000000000017
Epoch: 0034 cost= 0.419062734 err= 0.27520000000000017
Epoch: 0035 cost= 0.419602484 err= 0.27520000000000017
Epoch: 0036 cost= 0.420095444 err= 0.27520000000000017
Epoch: 0037 cost= 0.420545697 err= 0.27520000000000017
Epoch: 0038 cost= 0.420956999 err= 0.27520000000000017
Epoch: 0039 cost= 0.421332628 err= 0.27680000000000016
Epoch: 0040 cost= 0.421675920 err= 0.2784000000000002
Epoch: 0041 cost= 0.421989441 err= 0.2784000000000002
Epoch: 0042 cost= 0.422275901 err= 0.2784000000000002
Epoch: 0043 cost= 0.422537714 err= 0.2784000000000002
Epoch: 0044 cost= 0.422776937 err= 0.2784000000000002
Epoch: 0045 cost= 0.422995329 err= 0.2784000000000002
Epoch: 0046 cost= 0.423195034 err= 0.2784000000000002
Epoch: 0047 cost= 0.423377633 err= 0.2784000000000002
Epoch: 0048 cost= 0.423544437 err= 0.2784000000000002
Epoch: 0049 cost= 0.423696935 err= 0.2784000000000002
Epoch: 0050 cost= 0.423836201 err= 0.2784000000000002

在这里插入图片描述
7.3 使用带隐藏层的神经网络拟合异或操作
此处应将MNIST的数据集放入指定目录下,并将该文件夹放置在跟代码同一目录下。如下:
在这里插入图片描述
通过构建符合异或规律的数据集作为模拟样本,构建一个简单的多层神经网络来拟合其样本特征完成分类任务。
程序:

import tensorflow as tf
import numpy as np

learning_rate = 1e-4
n_input = 2       # 输入层节点个数
n_label = 1
n_hidden = 2      # 隐藏层节点个数

x = tf.placeholder(tf.float32,[None,n_input])
y = tf.placeholder(tf.float32,[None,n_label])

weights = {
    'h1':tf.Variable(tf.truncated_normal([n_input,n_hidden],stddev=0.1)),
    'h2':tf.Variable(tf.truncated_normal([n_hidden,n_label],stddev=0.1))
}
biases = {
    'h1':tf.Variable(tf.zeros([n_hidden])),
    'h2':tf.Variable(tf.zeros([n_label]))
}
# 定义模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x,weights['h1']),biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1,weights['h2']),biases['h2']))

loss = tf.reduce_mean((y_pred-y)**2)  # 计算和的平均值
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# 构建模拟数据
 # 生成数据
X = [[0,0],[0,1],[1,0],[1,1]]
Y = [[0],[1],[1],[0]]
X = np.array(X).astype('float32')
Y = np.array(Y).astype('int16')

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for i in range(10000):
    sess.run(train_step,feed_dict={x:X,y:Y})
print(sess.run(y_pred,feed_dict={x:X}))
print(sess.run(layer_1,feed_dict={x:X}))


结果:

[[0.36163944]
 [0.80520314]
 [0.36163944]
 [0.36163944]]
[[-0.         -0.        ]
 [ 0.84885484 -0.        ]
 [-0.         -0.        ]
 [-0.         -0.        ]]

7.5 MNIST多层分类
代码:

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


# 定义参数
learning_rate  = 0.001
training_epochs=25
batch_size = 100
display_step = 1

# 设置网络模型参数
n_hidden_1 = 256
n_hidden_2 = 256
n_input = 784
n_classes = 10

# 定义占位符
x = tf.placeholder("float",[None,n_input])
y = tf.placeholder("float",[None,n_classes])

# 创建model
def multilayer_perceptron(x,weights,biases):
    # 第一层隐藏层
    layer_1 = tf.add(tf.matmul(x,weights['h1']),biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # 第二层隐藏层
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # 输出层
    out_layer = tf.matmul(layer_2,weights['out'])+biases['out']
    return out_layer

# 学习参数
weights = {
    'h1':tf.Variable(tf.random_normal([n_input,n_hidden_1])),
    'h2':tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
    'out':tf.Variable(tf.random_normal([n_hidden_2,n_classes]))
}
biases = {
    'b1':tf.Variable(tf.random_normal([n_hidden_1])),
    "b2":tf.Variable(tf.random_normal([n_hidden_2])),
    'out':tf.Variable(tf.random_normal([n_classes]))
}


# 输出值
pred = multilayer_perceptron(x,weights,biases)

# 定义loss和优化器
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred,
                                                               labels = y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# 初始化变量
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    # 启动循环开始训练
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/ batch_size)
        # 循环所有数据集
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # 运行优化器
            _, c = sess.run([optimizer,cost],feed_dict={x:batch_x,y:batch_y})

            # 计算平均loss值
            avg_cost += c/ total_batch
            # 显示训练中的详细信息
        if epoch % display_step == 0:
            print("Epoch:",'%04d' %(epoch+ 1),"cost=",
                      "{:.9f}".format(avg_cost))
    print("Finished!")


    # 测试 model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    # 计算准确率
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels}))


结果:

Epoch: 0001 cost= 160.221606081
Epoch: 0002 cost= 40.583966269
Epoch: 0003 cost= 25.711453710
Epoch: 0004 cost= 17.989222194
Epoch: 0005 cost= 13.130539027
Epoch: 0006 cost= 9.799690339
Epoch: 0007 cost= 7.529750408
Epoch: 0008 cost= 5.616319189
Epoch: 0009 cost= 4.424415947
Epoch: 0010 cost= 3.358077639
Epoch: 0011 cost= 2.523357921
Epoch: 0012 cost= 2.039513816
Epoch: 0013 cost= 1.590141108
Epoch: 0014 cost= 1.235403902
Epoch: 0015 cost= 0.961221460
Epoch: 0016 cost= 0.805200391
Epoch: 0017 cost= 0.712103010
Epoch: 0018 cost= 0.598735175
Epoch: 0019 cost= 0.561253314
Epoch: 0020 cost= 0.446038284
Epoch: 0021 cost= 0.490823834
Epoch: 0022 cost= 0.377785401
Epoch: 0023 cost= 0.333711553
Epoch: 0024 cost= 0.414948347
Epoch: 0025 cost= 0.360114482
Finished!
Accuracy: 0.952

7.6 异或集的过拟合
代码:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder

def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y

np.random.seed(10)
input_dim = 2
num_classes = 4
X, Y = generate(320,num_classes,[[3.0,0],[3.0,3.0],[0,3.0]],True)
Y = Y% 2
xr = []
xb = []
for (l,k) in zip(Y[:],X[:]):
    if l == 0.0:
        xr.append([k[0],k[1]])
    else:
        xb.append([k[0],k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:,0],xr[:,1],c = 'r',marker = '+')
plt.scatter(xb[:,0],xb[:,1],c = 'b',marker='o')
plt.show()



Y = np.reshape(Y, [-1, 1])
print('-------------------------------------------')

#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
# n_hidden = 2#欠拟合
n_hidden = 2

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])

#定义学习参数
weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

# Leaky relus  40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# y_pred = tf.maximum(layer2, 0.01 * layer2)

loss = tf.reduce_mean((y_pred - y) ** 2)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(20000):  #

    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})

    if i % 1000 == 0:
        print("Step:", i, "Current loss:", loss_val)

print('------------------------------------------')

# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)

#添加可视化
#生成120个点并放到模型里,然后将其在直角坐标系中显示出来
xTrain,yTrain = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
yTrain=np.reshape(yTrain,[-1,1])
print("loss:\n",sess.run(loss,feed_dict={x:xTrain,y:yTrain}))



nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
print('---------------------------------------------------------------')

#验证过拟合
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2

xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

结果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

Step: 0 Current loss: 0.525204
Step: 1000 Current loss: 0.36390907
Step: 2000 Current loss: 0.26005936
Step: 3000 Current loss: 0.24341969
Step: 4000 Current loss: 0.22977524
Step: 5000 Current loss: 0.21489516
Step: 6000 Current loss: 0.1992037
Step: 7000 Current loss: 0.17430758
Step: 8000 Current loss: 0.14982383
Step: 9000 Current loss: 0.13814351
Step: 10000 Current loss: 0.13208419
Step: 11000 Current loss: 0.12838805
Step: 12000 Current loss: 0.1236871
Step: 13000 Current loss: 0.12113049
Step: 14000 Current loss: 0.1201177
Step: 15000 Current loss: 0.119730495
Step: 16000 Current loss: 0.11938534
Step: 17000 Current loss: 0.119005516
Step: 18000 Current loss: 0.118972085
Step: 19000 Current loss: 0.11897151
------------------------------------------
loss:
 0.09773064
---------------------------------------------------------------
loss:
 0.1479112

7.7 异或集的正则化处理
正则化是在神经网络计算损失值的过程中,在损失后面再加一项。误差就会受到干扰,实现模型无法与样本完全拟合的结果,从而达到防止过拟合的效果。
代码:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from matplotlib.colors import colorConverter, ListedColormap
# 对于上面的fit可以这么扩展变成动态的
from sklearn.preprocessing import OneHotEncoder

def onehot(y, start, end):
    ohe = OneHotEncoder()
    a = np.linspace(start, end - 1, end - start)
    b = np.reshape(a, [-1, 1]).astype(np.int32)
    ohe.fit(b)
    c = ohe.transform(y).toarray()
    return c

def generate(sample_size, num_classes, diff, regression=False):
    np.random.seed(10)
    mean = np.random.randn(2)
    cov = np.eye(2)

    # len(diff)
    samples_per_class = int(sample_size / num_classes)

    X0 = np.random.multivariate_normal(mean, cov, samples_per_class)
    Y0 = np.zeros(samples_per_class)

    for ci, d in enumerate(diff):
        X1 = np.random.multivariate_normal(mean + d, cov, samples_per_class)
        Y1 = (ci + 1) * np.ones(samples_per_class)

        X0 = np.concatenate((X0, X1))
        Y0 = np.concatenate((Y0, Y1))

    if regression == False:  # one-hot  0 into the vector "1 0
        Y0 = np.reshape(Y0, [-1, 1])
        # print(Y0.astype(np.int32))
        Y0 = onehot(Y0.astype(np.int32), 0, num_classes)
        # print(Y0)
    X, Y = shuffle(X0, Y0)
    # print(X, Y)
    return X, Y

np.random.seed(10)
input_dim = 2
num_classes = 4
X, Y = generate(320,num_classes,[[3.0,0],[3.0,3.0],[0,3.0]],True)
Y = Y% 2
xr = []
xb = []
for (l,k) in zip(Y[:],X[:]):
    if l == 0.0:
        xr.append([k[0],k[1]])
    else:
        xb.append([k[0],k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:,0],xr[:,1],c = 'r',marker = '+')
plt.scatter(xb[:,0],xb[:,1],c = 'b',marker='o')
plt.show()



Y = np.reshape(Y, [-1, 1])
print('-------------------------------------------')

#定义变量
learning_rate = 1e-4
n_input = 2
n_label = 1
# n_hidden = 2#欠拟合
n_hidden = 2

x = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_label])

#定义学习参数
weights = {
    'h1': tf.Variable(tf.truncated_normal([n_input, n_hidden], stddev=0.1)),
    'h2': tf.Variable(tf.random_normal([n_hidden, n_label], stddev=0.1))
}
biases = {
    'h1': tf.Variable(tf.zeros([n_hidden])),
    'h2': tf.Variable(tf.zeros([n_label]))
}

#定义网络模型
layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['h1']))
y_pred = tf.nn.tanh(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))
# y_pred = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))#局部最优解

# y_pred = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, weights['h2']),biases['h2']))

# Leaky relus  40000次 ok
# layer2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['h2'])
# y_pred = tf.maximum(layer2, 0.01 * layer2)
reg = 0.01
loss = tf.reduce_mean((y_pred - y) ** 2) + tf.nn.l2_loss(weights['h1'])*reg+tf.nn.l2_loss(weights['h2'])*reg
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

# 加载
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

for i in range(20000):  #

    _, loss_val = sess.run([train_step, loss], feed_dict={x: X, y: Y})

    if i % 1000 == 0:
        print("Step:", i, "Current loss:", loss_val)

print('------------------------------------------')

# colors = ['r' if l == 0.0 else 'b' for l in Y[:]]
# plt.scatter(X[:,0], X[:,1], c=colors)

#添加可视化
#生成120个点并放到模型里,然后将其在直角坐标系中显示出来
xTrain,yTrain = generate(120, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2
xr = []
xb = []
for (l, k) in zip(Y[:], X[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')
yTrain=np.reshape(yTrain,[-1,1])
print("loss:\n",sess.run(loss,feed_dict={x:xTrain,y:yTrain}))



nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()
print('---------------------------------------------------------------')

#验证过拟合
xTrain, yTrain = generate(12, num_classes, [[3.0, 0], [3.0, 3.0], [0, 3.0]], True)
yTrain = yTrain % 2

xr = []
xb = []
for (l, k) in zip(yTrain[:], xTrain[:]):
    if l == 0.0:
        xr.append([k[0], k[1]])
    else:
        xb.append([k[0], k[1]])
xr = np.array(xr)
xb = np.array(xb)
plt.scatter(xr[:, 0], xr[:, 1], c='r', marker='+')
plt.scatter(xb[:, 0], xb[:, 1], c='b', marker='o')

# plt.show()
yTrain = np.reshape(yTrain, [-1, 1])
print("loss:\n", sess.run(loss, feed_dict={x: xTrain, y: yTrain}))

nb_of_xs = 200
xs1 = np.linspace(-1, 8, num=nb_of_xs)
xs2 = np.linspace(-1, 8, num=nb_of_xs)
xx, yy = np.meshgrid(xs1, xs2)  # create the grid
# Initialize and fill the classification plane
classification_plane = np.zeros((nb_of_xs, nb_of_xs))
for i in range(nb_of_xs):
    for j in range(nb_of_xs):
        # classification_plane[i,j] = nn_predict(xx[i,j], yy[i,j])
        classification_plane[i, j] = sess.run(y_pred, feed_dict={x: [[xx[i, j], yy[i, j]]]})
        classification_plane[i, j] = int(classification_plane[i, j])

# Create a color map to show the classification colors of each grid point
cmap = ListedColormap([
    colorConverter.to_rgba('r', alpha=0.30),
    colorConverter.to_rgba('b', alpha=0.30)])
# Plot the classification plane with decision boundary and input samples
plt.contourf(xx, yy, classification_plane, cmap=cmap)
plt.show()

结果:
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

Step: 0 Current loss: 0.5002848
Step: 1000 Current loss: 0.3300084
Step: 2000 Current loss: 0.26237753
Step: 3000 Current loss: 0.2573841
Step: 4000 Current loss: 0.25614178
Step: 5000 Current loss: 0.25478172
Step: 6000 Current loss: 0.25330725
Step: 7000 Current loss: 0.25196472
Step: 8000 Current loss: 0.251057
Step: 9000 Current loss: 0.25065008
Step: 10000 Current loss: 0.25047454
Step: 11000 Current loss: 0.25032705
Step: 12000 Current loss: 0.25019154
Step: 13000 Current loss: 0.2500856
Step: 14000 Current loss: 0.25001946
Step: 15000 Current loss: 0.24999328
Step: 16000 Current loss: 0.2499901
Step: 17000 Current loss: 0.24999003
Step: 18000 Current loss: 0.24998999
Step: 19000 Current loss: 0.24999002
------------------------------------------
loss:
 0.25002107
---------------------------------------------------------------
loss:
 0.24952938
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值