DeepLearning&Tensorflow学习笔记1__mnist数据集LogisticRegression

最新推荐文章于 2020-08-12 23:49:22 发布

Rosun_

最新推荐文章于 2020-08-12 23:49:22 发布

阅读量929

点赞数

分类专栏： DeepLearning 文章标签： Tensorflow 手写数字识别 Mul-Logistic回归

本文链接：https://blog.csdn.net/mr_kktian/article/details/76694684

版权

DeepLearning 专栏收录该内容

11 篇文章 1 订阅

订阅专栏

1.Introduction

使用广义logistic Regression对手写数字识别，数据集Mnist。
模型：

P (Y = k | x) = e x p ( w k * x ) 1 + \sum K - 1 k = 1 e x p ( w k * x ), k = 1, 2, . . ., K - 1

$P(Y=k|x)=\frac{exp(w_k*x)}{1+\sum_{k=1}^{K-1}exp(w_k*x)} ,k=1,2,...,K-1$

2.Source code

#coding:utf-8
import tensorflow as tf
import numpy as np
import input_data
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"   #设置使用0号GPU板卡

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01), name="weight_w")
    #正态分布的标准差stddev
def model(X, w):    #表示X*W    X.size:N-by-784(28*28)   w.size:784-by-10
    return tf.matmul(X, w)


mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)    #load DataSet
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
print("trX.shape:  %d-by-%d"%trX.shape)
print("trY.shape:  %d-by-%d"%trY.shape)
print("teX.shape:  %d-by-%d"%teX.shape)
print("teY.shape:  %d-by-%d"%teY.shape)

log_dir = "mnist_logs"


#summary函数（也可以不定义，直接用），可以根据需要定义自己想看的数据，可以进行其他运算
def variable_summaries(var):
#"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)   #arg1：可视化数据的tag，arg2：进行可视化的数据
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var) # 使用tf.summary.histogram记录变量的直方图数据

graph = tf.Graph()
with graph.as_default():

    input_X = tf.placeholder("float", [None, 784], name="input_X")#28*28=784 None表示输入训练样本数量(不限） 784表示每个样本用784维的向量表示
    input_Y = tf.placeholder("float", [None, 10], name="input_Y")

    global_step = tf.Variable(0, trainable=False)

    with tf.name_scope('input_reshape'):
        image_shaped_input = tf.reshape(input_X, [-1, 28, 28, 1])
        tf.summary.image('input', image_shaped_input, 10)

    w = init_weights([784, 10])   #初始化参数 w
    print("w.shape:")
    print w.get_shape().as_list()
    variable_summaries(w)
    py_x = model(input_X, w)
    tf.summary.tensor_summary("predict", py_x)

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=input_Y))   #py_x计算的label  input_Y真实的label
    #tf.reduce_mean()是对整个mini_batch的交叉熵求均值
    tf.summary.scalar("cost", cost)
    #实例化一个tf.train.GradientDescentOptimizer
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(cost, global_step=global_step)  #利用SGD算法优化，学习率：0.05,优化目标是cost
    predict_op = tf.argmax(py_x, 1)  #放回py_x序列中最大值的index，概率最大的那个，即类别（0--9）

    merged = tf.summary.merge_all()   ##定义合并变量操作，一次性生成所有摘要数据
    with tf.device("/cpu:0"):
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)


with tf.Session(graph=graph) as sess:  #启动模型
    train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph)   #定义写入摘要数据到事件日志的操作
    tf.global_variables_initializer().run()   #将所有的变量初始化

    for i in range(100):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX)+1, 128)): #zip(X,Y)返回一个元组tuple,序列X与Y中的index从0，1,2,3,...
            #mini_batch=128
            summary, _, step_id = sess.run([merged, optimizer, global_step],
                            feed_dict={input_X: trX[start:end],
                            input_Y: trY[start:end]})
            train_writer.add_summary(summary, step_id)

        predict_results = sess.run(predict_op, feed_dict={input_X: teX}) #测试集做预测
        accuracy = np.mean(np.argmax(teY, axis=1) == predict_results)   #本次测试集上的分类正确率
        print "第", i+1, "轮学习, 准确率为: ", accuracy
        print global_step
    saver.save(sess, 'run/checkpoint', global_step=global_step)   #使用tf.save.Saver保存整个模型后,让TensorBoard自动对模型中所有二维的Variable进行可视化

//input_data.py

#encoding:utf-8
"""Functions for downloading and reading MNIST data."""
import gzip
import os
from six.moves.urllib.request import urlretrieve
import numpy
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'


def maybe_download(filename, work_directory):
    """Download the data from Yann's website, unless it's already here."""   #文档化函数functionname.__doc__  可查看
    if not os.path.exists(work_directory):
        os.mkdir(work_directory)
    filepath = os.path.join(work_directory, filename)
    if not os.path.exists(filepath):
        filepath, _ = urlretrieve(SOURCE_URL + filename, filepath)
        statinfo = os.stat(filepath)
        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
    return filepath


def _read32(bytestream):
    dt = numpy.dtype(numpy.uint32).newbyteorder('>')
    return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]


def extract_images(filename):
    'Extract the images into a 4D uint8 numpy array [index, y, x, depth].'
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2051:
            raise ValueError(
                'Invalid magic number %d in MNIST image file: %s' %
                (magic, filename))
        num_images = _read32(bytestream)
        rows = _read32(bytestream)
        cols = _read32(bytestream)
        buf = bytestream.read(rows * cols * num_images)
        data = numpy.frombuffer(buf, dtype=numpy.uint8)
        data = data.reshape(num_images, rows, cols, 1)
        return data


def dense_to_one_hot(labels_dense, num_classes=10):
    """Convert class labels from scalars to one-hot vectors."""
    num_labels = labels_dense.shape[0]
    index_offset = numpy.arange(num_labels) * num_classes
    labels_one_hot = numpy.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot


def extract_labels(filename, one_hot=False):
    """Extract the labels into a 1D uint8 numpy array [index]."""    #文档化函数functionname._doc_  可查看
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:      #raise语句引发异常，一旦执行了raise语句，raise后面的语句将不能执行
            raise ValueError(
                'Invalid magic number %d in MNIST label file: %s' %
                (magic, filename))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)
        if one_hot:
            return dense_to_one_hot(labels)
        return labels


class DataSet(object):   #python2中的新式类必须从object类继承;类DataSet继承  类object
    def __init__(self, images, labels, fake_data=False):   #构造函数(实例化时自动被调用） 魔法方法
        if fake_data:
            self._num_examples = 10000  #self与C++中的this类似，只是python中必须以传入参数写在函数的参数列表中 指向类的对象
            #_num_examples 成员变量
        else:
            assert images.shape[0] == labels.shape[0], (    #assert断言语句，期待布尔值为真，如果为假则抛出异常
                "images.shape: %s labels.shape: %s" % (images.shape,
                                                       labels.shape))
            self._num_examples = images.shape[0]
            # Convert shape from [num examples, rows, columns, depth]
            # to [num examples, rows*columns] (assuming depth == 1)
            assert images.shape[3] == 1
            images = images.reshape(images.shape[0],
                                    images.shape[1] * images.shape[2])
            # Convert from [0, 255] -> [0.0, 1.0].
            images = images.astype(numpy.float32)
            images = numpy.multiply(images, 1.0 / 255.0)
        self._images = images   #成员变量
        self._labels = labels   #成员变量
        self._epochs_completed = 0
        self._index_in_epoch = 0

    @property   #把类方法设置成类属性
    def images(self):
        return self._images

    @property
    def labels(self):
        return self._labels

    @property
    def num_examples(self):
        return self._num_examples

    @property
    def epochs_completed(self):
        return self._epochs_completed

    def next_batch(self, batch_size, fake_data=False):
        """Return the next `batch_size` examples from this data set."""
        if fake_data:
            fake_image = [1.0 for _ in xrange(784)]
            fake_label = 0
            return [fake_image for _ in xrange(batch_size)], [
                fake_label for _ in xrange(batch_size)]
        start = self._index_in_epoch
        self._index_in_epoch += batch_size
        if self._index_in_epoch > self._num_examples:
            # Finished epoch
            self._epochs_completed += 1
            # Shuffle the data
            perm = numpy.arange(self._num_examples)
            numpy.random.shuffle(perm)
            self._images = self._images[perm]
            self._labels = self._labels[perm]
            # Start next epoch
            start = 0
            self._index_in_epoch = batch_size
            assert batch_size <= self._num_examples
        end = self._index_in_epoch
        return self._images[start:end], self._labels[start:end]


def read_data_sets(train_dir, fake_data=False, one_hot=False):
    class DataSets(object):
        pass    #pass 不做任何事情,占位语句
    data_sets = DataSets()
    if fake_data:
        data_sets.train = DataSet([], [], fake_data=True)  #通过对象名直接定义其成员对象
        data_sets.validation = DataSet([], [], fake_data=True)
        data_sets.test = DataSet([], [], fake_data=True)
        return data_sets
    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
    VALIDATION_SIZE = 5000
    local_file = maybe_download(TRAIN_IMAGES, train_dir)
    train_images = extract_images(local_file)
    local_file = maybe_download(TRAIN_LABELS, train_dir)
    train_labels = extract_labels(local_file, one_hot=one_hot)
    local_file = maybe_download(TEST_IMAGES, train_dir)
    test_images = extract_images(local_file)
    local_file = maybe_download(TEST_LABELS, train_dir)
    test_labels = extract_labels(local_file, one_hot=one_hot)
    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]
    data_sets.train = DataSet(train_images, train_labels)
    data_sets.validation = DataSet(validation_images, validation_labels)
    data_sets.test = DataSet(test_images, test_labels)
    return data_sets
~

3.Note

tf.nn.softmax_cross_entropy_with_logits(logits, labels, name=None)
除去name参数用以指定该操作的name，与方法有关的一共两个参数：
第一个参数logits：就是神经网络最后一层的输出，如果有batch的话，它的大小就是[batchsize，num_classes]，单样本的话，大小就是num_classes
第二个参数labels：实际的标签，大小同上