用TensorFlow建立卷积神经网络，识别kaggle mnist手写数字集

最新推荐文章于 2022-12-18 15:07:17 发布

sysu_zzd

最新推荐文章于 2022-12-18 15:07:17 发布

阅读量336

点赞数 1

分类专栏： python ai 机器学习

本文链接：https://blog.csdn.net/weixin_41116447/article/details/85036843

版权

python ai 机器学习专栏收录该内容

3 篇文章 0 订阅

订阅专栏

#################################################

# -*- coding=utf-8 -*-

# 这是加入多层神经网络tensorflow算法用于识别kaggle digits项目，利用到了自己写的工具小包  
# kaggle digits 数据集下载地址为：https://www.kaggle.com/c/digit-recognizer/data  需要注册为kaggle用户

import tensorflow as tf
import numpy as np
# import input_data   #官方这里这么写有问题，或者经历过代码更新
# import tensorflow.examples.tutorials.mnist.input_data as input_data
# mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

import num_recog_tools_forTensorFlow as nrtTF
train_data, train_label = nrtTF.open_train_csv(flag='MNIST')
test_data = nrtTF.open_test_csv()

sess = tf.InteractiveSession()

x = tf.placeholder('float', shape=[None, 784])
y_ = tf.placeholder('float', shape=[None, 10])

W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

# sess.run(tf.global_variables_initializer())   # 初始化参数

y = tf.nn.softmax(tf.matmul(x,W) + b)

# 权重初始化
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# 卷积和池化
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    # stride其为长度为4的一阶张量，并且要求strides[0]=strides[3]=1，
    # strides[1]，strides[2]决定卷积核在输入图像in_hight，in_width方向的滑动步长，
    # 而通过图像卷积的过程可以知道，卷积核的大小和图像的大小有时候比不是完全匹配，
    # 比如图像大小为5x5，卷积核大小为3x3，滑动步长为3时，会涉及到图像卷积过程中边界如何填充的问题，
    # padding参数的两个值SAME，VALID决定了其填充方式
    # 'SAME'参数用于边界的0填充  'VALID'时则无填充，直接抛弃不整齐部分

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

# 第一层卷积
print('第一层卷积')
W_conv1 = weight_variable([5, 5, 1, 32])   # 感受野为5*5，32个神经元（卷积核）
b_conv1 = bias_variable([32])   # 对应每个神经元的32个偏移值

# 将图像数据由原来一整行转换为方形的矩阵 28*28=784
# 数组新的shape属性应该要与原来的配套，如果等于-1的话，那么Numpy会根据剩下的维度计算出数组的另外一个shape属性值。
x_image = tf.reshape(x, [-1, 28, 28, 1])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积
print('第二层卷积')
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# 密集连接层
W_fc1 = weight_variable([7 * 7 *64, 1024])    #   此处7*7 中7 的来源是因为28*28像素的图片经过了两次2*2的max_pool计算后，只剩下7*7的单层输入
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout  防止过拟合 对网络中的参数选择性的在当下的训练步中选择一定的比例进行更新，而非全部更新，取值范围为0-1之间。测试或者验证步中则取值1.0
keep_prob = tf.placeholder('float')
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 输出层
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

# 训练和评估模型
print('开始训练')
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))  # 定义交叉熵
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))  # 判断预测正确与否
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))  # 计算正确率
sess.run(tf.global_variables_initializer())
start = 0
for i in range(2000):
    # batch = mnist.train.next_batch(50)
    feed_images, feed_labels, start = nrtTF.data_feed(train_data, train_label, start, batch=50)
    if i%100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:feed_images, y_:feed_labels, keep_prob:1.0})
        print('step %d, training accuracy %f' %(i, train_accuracy))
    train_step.run(feed_dict={x:feed_images, y_:feed_labels, keep_prob:0.9})

# print('test accuracy %f' % accuracy.eval(feed_dict={x: mnist.test.images, y_:mnist.test.labels, keep_prob:1.0}))
predict_label = y_conv.eval(feed_dict={x: test_data[0:100], keep_prob:1.0})  #此处代入数据求值格式需牢记

predict_label1 = np.zeros([1,np.shape(predict_label)[0]])
for i in range(np.shape(predict_label)[0]):
    predict_label1[0,i] = np.argmax(predict_label[i])
    # print(np.argmax(predict_label[i]))
# print(sess.run(y_conv))
# print(y_conv)

# 输出测试集前100个手写数字图像的预测结果，正确为黑色，错误为红色
nrtTF.digits_plot(test_data[0:100], 10, predict_label1[0], predict_label1[0])

以下代码段为辅助以上数字识别而写的一个小工具
###################################################
# -*- coding=utf-8 -*-

# 这个自己写的工具包为将kaggle digits项目中的数据读取及作出相关图像的工具，现在将其改造成适用于tensorflow使用的版本


import csv
import os
import time
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.decomposition import PCA
from numpy import *
from tensorflow.python.util.deprecation import deprecated

cur_dir = os.getcwd()
train_data_dir = cur_dir + r'\kaggle_data\train.csv'   # 训练数据路径
test_data_dir = cur_dir + r'\kaggle_data\test.csv'     # 测试数据路径

def open_train_csv(flag = 'kaggle'):
    with open(train_data_dir, 'r') as f:
        temp = f.readlines()
        rows = len(temp) - 1
        columns = len(temp[0].split(',')) - 1
        label = zeros([rows, 1])
        data = zeros([rows, columns])
        # for i, line in zip(range(len(temp)), temp):   # 两个变量同时迭代要用zip？
        i = 0
        for line in temp:
            if i == 0:
                i += 1
                continue
            line1 = line.split(',')
            label[i - 1] = line1[0]
            data[i - 1, 0:] = line1[1:]
            i += 1
        label_return = label.astype(int).T[0]
    if flag == 'kaggle':
         return data, label_return
    elif flag == 'MNIST':
        label_matrix = zeros([rows,10])
        for i in range(rows):
            label_matrix[i, label_return[i]] = 1
        return data * 1/255, label_matrix

def open_test_csv():
    with open(test_data_dir, 'r') as f:
        temp = f.readlines()
        rows = len(temp) - 1
        columns = len(temp[0].split(','))
        data = zeros([rows, columns])
        # for i, line in zip(range(len(temp)), temp):   # 两个变量同时迭代要用zip？
        i = 0
        for line in temp:
            if i == 0:
                i += 1
                continue
            line1 = line.split(',')
            data[i - 1, 0:] = line1
            i += 1
    return data * 1/255


def digits_plot(train_data, axis_num, label, test_label):
    columns = len(train_data[0])
    fig1 = plt.gcf()
    fig1.set_size_inches(10, 10)  # 设置图片大小，单位为英寸
    sqrt_pixel = int(sqrt(columns))
    total_num = axis_num * axis_num
    images = zeros([total_num, sqrt_pixel, sqrt_pixel])

    for i in range(total_num):
        for j in range(sqrt_pixel):
            k1 = sqrt_pixel * j
            k2 = sqrt_pixel * (j + 1)
            images[i, j, :] = train_data[i, k1:k2]
            # print(data[i,k1:k2])
            # print(images[i,j,:])

    # images1 = around(images / 16)    #这里是为将0-255范围的数值转为0-16的值，或者可以试试不这么做
    images1 = images
    # time.sleep(1)
    # plot the digits:
    error_num = 0
    for i in range(total_num):
        ax = fig1.add_subplot(axis_num, axis_num, i + 1, xticks=[], yticks=[])
        ax.imshow(images1[i], cmap=plt.cm.binary, interpolation='nearest')
        ax.imshow(images1[i])
        if label[i] == test_label[i]:
            color = 'black'
        else:
            color = 'red'   # 如预测错误，color为红色
            error_num += 1
        # ax.set_title((y_pred, y_test[i]),  # 这里加入了前为测试值，后为实际值的对比显示。可以在图中看到故意被干扰的第四张图标题变成了红色
        #              fontsize='small', color=color)
        ax.set_title((label[i], test_label[i]), fontsize='small', color=color)   # 根据预测正确与否选择title的颜色
        supertitle = 'Total error counts are: %d' % error_num
    fig1.suptitle(supertitle)

    # 设置输出子图title和大图title 之间的位置
    plt.tight_layout()  # 令子图尽可能在大图中张开，留下尽量少的边缘
    plt.subplot_adjust(top=0.91)   # 设置整个子图区域占大图的区域百分比， 确保大图标题不被覆盖

    # time.sleep(3)
    plt.show()

def plot_pca(train_data, train_label, n_components):
    pca = PCA(n_components)
    proj = pca.fit_transform(train_data)
    plt.scatter(proj[:, 0], proj[:, 1], c=train_label)
    plt.colorbar()
    plt.show()

def plot_tsne(train_data, train_label, n_components):
    print('准备进行TSNE分析，请稍候……')
    from sklearn.manifold import TSNE
    X1 = train_data
    y1 = train_label
    label_names = set(train_label)
    target_ids = range(len(label_names))

    tsne = TSNE(n_components, random_state=0)
    X1_2d = tsne.fit_transform(X1)

    print('已完成TSNE分析，正在作图……')
    colorlist = ['r', 'coral', 'g', 'springgreen', 'b', 'blueviolet', 'c', 'steelblue', 'm', 'y', 'k', 'w']
    if n_components == 2:
        fig1 = plt.gcf()
        fig1.set_size_inches(15, 15)
        for i, c, label in zip(target_ids, colorlist, label_names):
            fig1.scatter(X1_2d[y1 == i, 0], X1_2d[y1 == i, 1], c=c, label=label)
    elif n_components == 3:
        ax = plt.subplot(111, projection='3d')
        ax.view_init(elev=10., azim=11)
        for i, c, label in zip(target_ids, colorlist, label_names):
            ax.scatter(X1_2d[y1 == i, 0], X1_2d[y1 == i, 1], X1_2d[y1 == i, 2], c=c, label=label)

    plt.legend()
    plt.show()

def gaussian_nb(train_data, train_label, test_data):
    clf = GaussianNB()
    clf.fit(train_data, train_label)
    predicted = clf.predict(test_data)
    return predicted

def clf_svm(train_data, train_label, test_data):
    clf = svm.SVC(gamma=0.01, C=10000)
    clf.fit(train_data, train_label)
    predicted = clf.predict(test_data)
    return predicted

def show_pca_mean(pca, shapepixels):
    plt.imshow(pca.mean_.reshape((shapepixels, shapepixels)), cmap=plt.cm.bone)
    plt.show()

def show_pca(pca, shapepixels):
    fig = plt.figure(figsize=(16, 6))
    for i in range(30):
        ax = fig.add_subplot(3, 10, i + 1, xticks=[], yticks=[])
        ax.imshow(pca.components_[i].reshape((shapepixels, shapepixels)),
                  cmap=plt.cm.bone)
    plt.show()

def data_feed(data, labels, start, batch):
    num_example = shape(data)[0]
    end = start + batch
    feed_images = data[start:end, :]
    feed_labels = labels[start:end, :]
    return feed_images, feed_labels, end-40