1.Introduction
使用广义logistic Regression对手写数字识别,数据集Mnist。
模型:
P(Y=k|x)=exp(wk∗x)1+∑K−1k=1exp(wk∗x),k=1,2,...,K−1
2.Source code
#coding:utf-8
import tensorflow as tf
import numpy as np
import input_data
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #设置使用0号GPU板卡
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01), name="weight_w")
#正态分布的标准差stddev
def model(X, w): #表示X*W X.size:N-by-784(28*28) w.size:784-by-10
return tf.matmul(X, w)
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) #load DataSet
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
print("trX.shape: %d-by-%d"%trX.shape)
print("trY.shape: %d-by-%d"%trY.shape)
print("teX.shape: %d-by-%d"%teX.shape)
print("teY.shape: %d-by-%d"%teY.shape)
log_dir = "mnist_logs"
#summary函数(也可以不定义,直接用),可以根据需要定义自己想看的数据,可以进行其他运算
def variable_summaries(var):
#"""Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
with tf.name_scope('summaries'):
mean = tf.reduce_mean(var)
tf.summary.scalar('mean', mean) #arg1:可视化数据的tag,arg2:进行可视化的数据
with tf.name_scope('stddev'):
stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
tf.summary.scalar('stddev', stddev)
tf.summary.scalar('max', tf.reduce_max(var))
tf.summary.scalar('min', tf.reduce_min(var))
tf.summary.histogram('histogram', var) # 使用tf.summary.histogram记录变量的直方图数据
graph = tf.Graph()
with graph.as_default():
input_X = tf.placeholder("float", [None, 784], name="input_X")#28*28=784 None表示输入训练样本数量(不限) 784表示每个样本用784维的向量表示
input_Y = tf.placeholder("float", [None, 10], name="input_Y")
global_step = tf.Variable(0, trainable=False)
with tf.name_scope('input_reshape'):
image_shaped_input = tf.reshape(input_X, [-1, 28, 28, 1])
tf.summary.image('input', image_shaped_input, 10)
w = init_weights([784, 10]) #初始化参数 w
print("w.shape:")
print w.get_shape().as_list()
variable_summaries(w)
py_x = model(input_X, w)
tf.summary.tensor_summary("predict", py_x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=input_Y)) #py_x计算的label input_Y真实的label
#tf.reduce_mean()是对整个mini_batch的交叉熵求均值
tf.summary.scalar("cost", cost)
#实例化一个tf.train.GradientDescentOptimizer
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(cost, global_step=global_step) #利用SGD算法优化,学习率:0.05,优化目标是cost
predict_op = tf.argmax(py_x, 1) #放回py_x序列中最大值的index,概率最大的那个,即类别(0--9)
merged = tf.summary.merge_all() ##定义合并变量操作,一次性生成所有摘要数据
with tf.device("/cpu:0"):
saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
with tf.Session(graph=graph) as sess: #启动模型
train_writer = tf.summary.FileWriter(log_dir + '/train', sess.graph) #定义写入摘要数据到事件日志的操作
tf.global_variables_initializer().run() #将所有的变量初始化
for i in range(100):
for start, end in zip(range(0, len(trX), 128), range(128, len(trX)+1, 128)): #zip(X,Y)返回一个元组tuple,序列X与Y中的index从0,1,2,3,...
#mini_batch=128
summary, _, step_id = sess.run([merged, optimizer, global_step],
feed_dict={input_X: trX[start:end],
input_Y: trY[start:end]})
train_writer.add_summary(summary, step_id)
predict_results = sess.run(predict_op, feed_dict={input_X: teX}) #测试集做预测
accuracy = np.mean(np.argmax(teY, axis=1) == predict_results) #本次测试集上的分类正确率
print "第", i+1, "轮学习, 准确率为: ", accuracy
print global_step
saver.save(sess, 'run/checkpoint', global_step=global_step) #使用tf.save.Saver保存整个模型后,让TensorBoard自动对模型中所有二维的Variable进行可视化
//input_data.py
#encoding:utf-8
"""Functions for downloading and reading MNIST data."""
import gzip
import os
from six.moves.urllib.request import urlretrieve
import numpy
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
def maybe_download(filename, work_directory):
"""Download the data from Yann's website, unless it's already here.""" #文档化函数functionname.__doc__ 可查看
if not os.path.exists(work_directory):
os.mkdir(work_directory)
filepath = os.path.join(work_directory, filename)
if not os.path.exists(filepath):
filepath, _ = urlretrieve(SOURCE_URL + filename, filepath)
statinfo = os.stat(filepath)
print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
return filepath
def _read32(bytestream):
dt = numpy.dtype(numpy.uint32).newbyteorder('>')
return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]
def extract_images(filename):
'Extract the images into a 4D uint8 numpy array [index, y, x, depth].'
print('Extracting', filename)
with gzip.open(filename) as bytestream:
magic = _read32(bytestream)
if magic != 2051:
raise ValueError(
'Invalid magic number %d in MNIST image file: %s' %
(magic, filename))
num_images = _read32(bytestream)
rows = _read32(bytestream)
cols = _read32(bytestream)
buf = bytestream.read(rows * cols * num_images)
data = numpy.frombuffer(buf, dtype=numpy.uint8)
data = data.reshape(num_images, rows, cols, 1)
return data
def dense_to_one_hot(labels_dense, num_classes=10):
"""Convert class labels from scalars to one-hot vectors."""
num_labels = labels_dense.shape[0]
index_offset = numpy.arange(num_labels) * num_classes
labels_one_hot = numpy.zeros((num_labels, num_classes))
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
return labels_one_hot
def extract_labels(filename, one_hot=False):
"""Extract the labels into a 1D uint8 numpy array [index].""" #文档化函数functionname._doc_ 可查看
print('Extracting', filename)
with gzip.open(filename) as bytestream:
magic = _read32(bytestream)
if magic != 2049: #raise语句引发异常,一旦执行了raise语句,raise后面的语句将不能执行
raise ValueError(
'Invalid magic number %d in MNIST label file: %s' %
(magic, filename))
num_items = _read32(bytestream)
buf = bytestream.read(num_items)
labels = numpy.frombuffer(buf, dtype=numpy.uint8)
if one_hot:
return dense_to_one_hot(labels)
return labels
class DataSet(object): #python2中的新式类必须从object类继承;类DataSet继承 类object
def __init__(self, images, labels, fake_data=False): #构造函数(实例化时自动被调用) 魔法方法
if fake_data:
self._num_examples = 10000 #self与C++中的this类似,只是python中必须以传入参数写在函数的参数列表中 指向类的对象
#_num_examples 成员变量
else:
assert images.shape[0] == labels.shape[0], ( #assert断言语句,期待布尔值为真,如果为假则抛出异常
"images.shape: %s labels.shape: %s" % (images.shape,
labels.shape))
self._num_examples = images.shape[0]
# Convert shape from [num examples, rows, columns, depth]
# to [num examples, rows*columns] (assuming depth == 1)
assert images.shape[3] == 1
images = images.reshape(images.shape[0],
images.shape[1] * images.shape[2])
# Convert from [0, 255] -> [0.0, 1.0].
images = images.astype(numpy.float32)
images = numpy.multiply(images, 1.0 / 255.0)
self._images = images #成员变量
self._labels = labels #成员变量
self._epochs_completed = 0
self._index_in_epoch = 0
@property #把类方法设置成类属性
def images(self):
return self._images
@property
def labels(self):
return self._labels
@property
def num_examples(self):
return self._num_examples
@property
def epochs_completed(self):
return self._epochs_completed
def next_batch(self, batch_size, fake_data=False):
"""Return the next `batch_size` examples from this data set."""
if fake_data:
fake_image = [1.0 for _ in xrange(784)]
fake_label = 0
return [fake_image for _ in xrange(batch_size)], [
fake_label for _ in xrange(batch_size)]
start = self._index_in_epoch
self._index_in_epoch += batch_size
if self._index_in_epoch > self._num_examples:
# Finished epoch
self._epochs_completed += 1
# Shuffle the data
perm = numpy.arange(self._num_examples)
numpy.random.shuffle(perm)
self._images = self._images[perm]
self._labels = self._labels[perm]
# Start next epoch
start = 0
self._index_in_epoch = batch_size
assert batch_size <= self._num_examples
end = self._index_in_epoch
return self._images[start:end], self._labels[start:end]
def read_data_sets(train_dir, fake_data=False, one_hot=False):
class DataSets(object):
pass #pass 不做任何事情,占位语句
data_sets = DataSets()
if fake_data:
data_sets.train = DataSet([], [], fake_data=True) #通过对象名直接定义其成员对象
data_sets.validation = DataSet([], [], fake_data=True)
data_sets.test = DataSet([], [], fake_data=True)
return data_sets
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
VALIDATION_SIZE = 5000
local_file = maybe_download(TRAIN_IMAGES, train_dir)
train_images = extract_images(local_file)
local_file = maybe_download(TRAIN_LABELS, train_dir)
train_labels = extract_labels(local_file, one_hot=one_hot)
local_file = maybe_download(TEST_IMAGES, train_dir)
test_images = extract_images(local_file)
local_file = maybe_download(TEST_LABELS, train_dir)
test_labels = extract_labels(local_file, one_hot=one_hot)
validation_images = train_images[:VALIDATION_SIZE]
validation_labels = train_labels[:VALIDATION_SIZE]
train_images = train_images[VALIDATION_SIZE:]
train_labels = train_labels[VALIDATION_SIZE:]
data_sets.train = DataSet(train_images, train_labels)
data_sets.validation = DataSet(validation_images, validation_labels)
data_sets.test = DataSet(test_images, test_labels)
return data_sets
~
3.Note
tf.nn.softmax_cross_entropy_with_logits(logits, labels, name=None)
除去name参数用以指定该操作的name,与方法有关的一共两个参数:
第一个参数logits:就是神经网络最后一层的输出,如果有batch的话,它的大小就是[batchsize,num_classes],单样本的话,大小就是num_classes
第二个参数labels:实际的标签,大小同上