数据集简介:MNIST数据集,60000个样本用于训练,10000个样本用于测试,每一个样本都是28*28点阵图,共784个像素点(pixel),每个像素点的值从0-255。 每个样本的784个像素点的值就是特征值(feature),每个样本的数字就是标签(label)。
第一种方法,一步步构建神经网络,不使用Tensorflow和Keras。
使用csv格式的数据文件,每一行代表一个样本,train用的共60000行,每一行由一个label+784个f
eatures组成,也就是共785列。
代码如下:
#DumpLocalMnistData.py
#将数据进行预处理存为一个序列化的文件
import numpy as np
import pickle
#读取数据文件
image_size = 28
image_pixels = image_size * image_size
data_path = 'mnistdata/'
train_data = np.loadtxt(data_path + 'mnist_train.csv',
delimiter= ',')
test_data = np.loadtxt(data_path + 'mnist_test.csv',
delimiter= ',')
#预处理,将features(0-255)转换到0-1之间
frac=0.99/255
train_imgs = np.asfarray(train_data[:, 1:]) * frac +0.01 #asfarray返回浮点类型的数组
test_imgs = np.asfarray(test_data[:, 1:]) * frac + 0.01
train_labels = np.asfarray(train_data[:, :1])#只取第一列数据
test_labels = np.asfarray(test_data[:, :1])
lr = np.arange(10) #0-9的列表
#把labels转变成one hot representation
# 0 => [1 0 0 0 0 0 0 0 0 0]
train_labels_one_hot = (lr == train_labels).astype(np.float)
test_labels_one_hot = (lr == test_labels).astype(np.float)
train_labels_one_hot[train_labels_one_hot == 0] = 0.01
train_labels_one_hot[train_labels_one_hot == 1] = 0.99
test_labels_one_hot[test_labels_one_hot == 0] = 0.01
test_labels_one_hot[test_labels_one_hot == 1] = 0.99
##用pickle软件包把数据存在硬盘上,存为一个序列化的文件,也就是二进制的文件,这样就可以非常高效地读取数据
with open('mnistdata/pickled_mnist.pkl','bw') as fh:
data = (train_imgs,
test_imgs,
train_labels,
test_labels,
train_labels_one_hot,
test_labels_one_hot)
pickle.dump(data, fh)
#Ann_HandWritten.py
import numpy as np
import pickle
#numpy.vectorize takes a function f:a->b and turns it into g:a[]->b[]
#让输入值和返回值向量化 + 激活函数sigmoid
@np.vectorize
def sigmoid(x):
return 1 / (1 + np.e** -x)
activation_function = sigmoid
#引入scipy的统计包,目的是引入truncnorm函数
#truncnorm无法指定上下界,稍微改造一下,让它能体现上下界
from scipy.stats import truncnorm
def truncated_normal(mean = 0,sd = 1, low = 0, upp = 10):
return truncnorm((low - mean) / sd,
(upp - mean) / sd,
loc = mean,
scale = sd)
#定义神经网络ANN
class NeuralNetwork:
def __init__(self,
no_of_in_nodes,
no_of_out_nodes,
no_of_hidden_nodes,
learning_rate):
self.no_of_in_nodes = no_of_in_nodes
self.no_of_out_nodes = no_of_out_nodes
self.no_of_hidden_nodes = no_of_hidden_nodes
self.learning_rate = learning_rate
#初始化的时候,调用初始化NN网络的权重矩阵
self.create_weight_matrices()
def create_weight_matrices(self):
#定义上下界:(输入节点数+偏移量节点数)开平方根之1
rad = 1 / np.sqrt(self.no_of_in_nodes)
X = truncated_normal(mean = 0,
sd = 1,
low = -rad,
upp = rad)
#输入与隐藏层之间权重矩阵
self.wih = X.rvs((self.no_of_hidden_nodes, self.no_of_in_nodes))
#隐藏层与输出层之间权重
rad = 1 / np.sqrt(self.no_of_hidden_nodes)
X = truncated_normal(mean = 0,
sd = 1,
low = -rad,
upp = rad)
self.who = X.rvs((self.no_of_out_nodes, self.no_of_hidden_nodes))
#定义训练函数
#一个数值,一个forward propagation,一个back propagation
def train(self, input_vector, target_vector):
#做准备工作,让input_vector符合模型的dot product运算的输入
input_vector = np.array(input_vector, ndmin = 2).T
target_vector = np.array(target_vector, ndmin = 2).T
#做输入层与隐藏层的点积计算,结果放入激活函数得到隐藏层输出
output_vector1 = np.dot(self.wih, input_vector)
output_hidden = activation_function(output_vector1)
#做隐藏层与输出层的点积计算,结果放入激活函数得到输出层输出
output_vector2 = np.dot(self.who, output_hidden)
output_network = activation_function(output_vector2)
#目标值减去输出值作为损失函数
output_errors = target_vector - output_network
#update the weights
#实施back propagation
#损失函数做梯度下降gradient descent,结果更新权重
tmp = output_errors * output_network * \
(1.0 - output_network )
tmp = self.learning_rate * np.dot(tmp, output_hidden.T)
#得到新的隐藏层与输出层之间的权重矩阵who
self.who += tmp
#通过新的who权重矩阵和输出损失函数计算隐藏层的损失
hidden_errors = np.dot(self.who.T, output_errors)
#得到新的输入层和隐藏层的权重矩阵wih
tmp = hidden_errors * output_hidden * (1.0 - output_hidden)
self.wih += self.learning_rate * np.dot(tmp, input_vector.T)
#测试函数或者叫做predict函数,检查分类的准确性,主要看学习到的权重的有效性
def run(self, input_vector):
input_vector = np.array(input_vector, ndmin=2).T
output_vector = np.dot(self.wih, input_vector)
output_vector = activation_function(output_vector)
output_vector = np.dot(self.who, output_vector)
output_vector = activation_function(output_vector)
return output_vector
#通过调用run函数,获得模型学习结果的有效性
def confusion_matrix(self, data_array, labels):
cm = np.zeros((10, 10), int)
for i in range(len(data_array)):
res = self.run(data_array[i])
res_max = res.argmax()
target = labels[i][0]
cm[res_max, int(target)] += 1
return cm
#计算精准率,查准率
def precision(self, label, confusion_matrix):
col = confusion_matrix[:, label]
return confusion_matrix[label,label] / col.sum()
#计算召回率,查全率
def recall(self, label,confusion_matrix):
row = confusion_matrix[label, :]
return confusion_matrix[label,label] / row.sum()
#计算分对和分错的数量
def evaluate(self, data, labels):
corrects, wrongs = 0, 0
for i in range(len(data)):
res = self.run(data[i])
res_max = res.argmax()
if res_max == labels[i]:
corrects += 1
else:
wrongs += 1
return corrects, wrongs
#先把pickle的数据还原出来
with open('mnistdata/pickled_mnist.pkl','br') as fh:
data = pickle.load(fh)
train_imgs = data[0]
test_imgs = data[1]
train_labels = data[2]
test_labels = data[3]
train_labels_one_hot = data[4]
test_labels_one_hot = data[5]
image_size = 28
no_of_different_labels = 10
image_pixels = image_size * image_size
#初始化一个ANN
ANN = NeuralNetwork(no_of_in_nodes = image_pixels,
no_of_out_nodes = 10,
no_of_hidden_nodes = 100,
learning_rate = 0.1)
#学习一次
print('len(train_imags) before learning', len(train_imgs))
for i in range(len(train_imgs)):
ANN.train(train_imgs[i], train_labels_one_hot[i])
for i in range(20):
res = ANN.run(test_imgs[i])
print('test_labels[i], argmax, max, i: ',test_labels[i], np.argmax(res), np.max(res), i)
corrects, wrongs = ANN.evaluate(train_imgs, train_labels)
print('accuracy train:', corrects / (corrects + wrongs))
corrects, wrongs = ANN.evaluate(test_imgs, test_labels)
print('accuracy test:', corrects / (corrects + wrongs))
cm = ANN.confusion_matrix(train_imgs, train_labels)
print(cm)
for i in range(10):
print('digit:', i, 'precision', ANN.precision(i, cm), 'recall:', ANN.recall(i, cm))
运行结果:
第二种方法,使用使用Tensorflow和Keras的深度学习框架,使代码简单明了。
代码如下:
#Ann_KerasTf.py
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import utils
import matplotlib.pyplot as plt
#load data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
#flatten 28*28 images to a 784 vector
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape((X_train.shape[0], num_pixels)).astype('float32')
X_test = X_test.reshape((X_test.shape[0], num_pixels)).astype('float32')
#normalize inputs from 0-255 to 0-1
X_train = X_train / 255.0
X_test = X_test /255.0
#构造label的one hot representation
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# creat a sequential model
model = Sequential()
#add a hidden layer
model.add(Dense(100, input_dim = num_pixels, kernel_initializer = 'normal', activation = 'relu'))
#add a output layer
model.add(Dense(num_classes, kernel_initializer = 'normal', activation = 'softmax'))
#compile the model, 指定损失函数,优化器,测量值
#Availabel optimizers SGD, RMSprop, Adam, Adadelta, Adagrad, Adamax, Nadam, Ftrl
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
#build a model
model = baseline_model()
#fit the model
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 10, batch_size = 100, verbose = 2)
#final evaluation of the model, loss and accuracy
scores = model.evaluate(X_test, y_test, verbose = 0)
print('scores', scores)
print('Baseline Error: %.3f%%' % (100-scores[1]*100))
运行结果:
注:代码来自于B站up主ironwire关于ANN的讲解视频。