softmax回归
多类图像分类数据集的实现
from t import my_ones_packages as mop
from mxnet.gluon import data as gdata
import sys
import time
minist_train = gdata.vision.FashionMNIST(train=True) # 从网上获取训练数据--train = True
minist_test = gdata.vision.FashionMNIST(train=False) # 从网上获取测试数据--train = False
# l1 = len(minist_train)
# l2 = len(minist_test)
# print('l1_lenth',end='')
# print(l1)
# print('l2_lenth',end='')
# print(l2)
# l1_lenth60000
# l2_lenth10000
feature, label = minist_train[0]
# print(feature.shape)
# print(feature.dtype)
# # (28, 28, 1)
# # <class 'numpy.uint8'>
# print(label)
# print(type(label))
# print(label.dtype)
# # 2
# # <class 'numpy.int32'>
# # int32
def get_fashion_minist(labels):
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
def show_fashion_minist(images, labels):
mop.use_svg_display()
# subplots(1,3,figsize(12,12)) 表示有 1行3个 12*12大小的子图
_, figs = mop.plt.subplots(1, len(images), figsize = (12,12)) # 下划线_ 表示不使用的数据
for f, img, lbl in zip(figs, images, labels):
f.imshow(img.reshape((28, 28)).asnumpy())
f.set_title(lbl)
f.axes.get_xaxis().set_visible(False)
f.axes.get_yaxis().set_visible(False)
# X, y = minist_train[0:9]
# show_fashion_minist(X, get_fashion_minist(y))
# mop.plt.show()#显示图像
#读取小批量数据
batch_size = 256
transformer = gdata.vision.transforms.ToTensor()
if sys.platform.startswith('win'):
num_workers = 0
else:
num_workers = 4
train_iter = gdata.DataLoader(minist_train.transform_first(transformer),
batch_size, shuffle=True, num_workers=num_workers)
test_iter = gdata.DataLoader(minist_test.transform_first(transformer),
batch_size, shuffle=False, num_workers=num_workers)
# #查看读取一遍训练数据需要的时间
# start = time.time()
# for X, y in train_iter:
# continue
# print('%.2f sec'%(time.time() - start))
# #6.39 sec
softmax回归的复杂实现
import t.my_ones_packages as mop
from mxnet import nd,autograd
'''获取数据'''
batch_size = 256 # 定义小批量样本数为 256个
train_iter, test_iter = mop.load_data_fashion_mnist(batch_size) # 获取服饰的数据
# print(train_iter)
'''<mxnet.gluon.data.dataloader.DataLoader object at 0x000001C7B8159308>'''
# print(len(train_iter))
'''最后显示数据集中有60000个样本
实际是小批量样本大小batch_size 256 * 234 + 96 得到的 最后一个不足256没有丢弃 证明DataLoader函数的参数drop_last默认为False
235'''
# print(test_iter)
'''<mxnet.gluon.data.dataloader.DataLoader object at 0x000001E77E932408>'''
# print(len(test_iter))
'''40'''
# 初始化模型参数
num_inputs = 784 # 因为图像是 28*28 所以是784
num_outputs = 10 # 因为图像有十种类别 所以输出结果10种 详见 type_labels数组
# 定义W为一个均值0 标准差0.01的正态分布, b为0
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs)) # W的形状为 784 * 10
b = nd.zeros(num_outputs) # b的形状 10*1
# 申请W和b的梯度的内存空间
W.attach_grad()
b.attach_grad()
# X = nd.array([[1,2,3], [4,5,6]])
#
# print(X.sum(axis=0, keepdims=True))
# # [[5. 7. 9.]]
# # <NDArray 1x3 @cpu(0)>
# print(X.sum(axis=1, keepdims=True))
# # [[ 6.]
# # [15.]]
# # <NDArray 2x1 @cpu(0)>
def softmax(X): # 实现softmax()函数
X_exp = X.exp() # exp()求指数
partition = X_exp.sum(axis=1, keepdims=True) # 对X的同一行的指数分别求和
return X_exp / partition
# X = nd.random.normal(shape=(2,5))
# X_prob = softmax(X)
# print(X_prob)
# # [[0.6264712 0.126293 0.01826552 0.10885343 0.12011679]
# # [0.25569436 0.2917251 0.07546549 0.3024068 0.07470828]]
# # <NDArray 2x5 @cpu(0)>
# print(X_prob.sum(axis=1))
# # [0.99999994 1. ]
# # <NDArray 2 @cpu(0)>
# 定义模型
def net(X):
return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)
y_hat = nd.array([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
# [[0.1 0.3 0.6]
# [0.3 0.2 0.5]]
# <NDArray 2x3 @cpu(0)>
y = nd.array([0, 2], dtype='int32')
# [0 2]
# <NDArray 2 @cpu(0)>
# 定义损失
def cross_entropy(y_hat, y):
return -nd.pick(y_hat, y).log() # 使用pick函数可以得到2个样本的标签的预测概率
# #计算分类准确率
# def accuracy(y_hat, y):
# return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
#
# #分类准确率为0.5
# # print(accuracy(y_hat,y))#0.5
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
# astype(dtype)类型变换函数
y = y.astype('float32')
'''argmax函数的使用'''
# acc_sum 是统计矩阵y 和矩阵 net(X)的相同位置且数值相同的元素个数 然后除以 样本数 一个小批量样本大小为256 这个数据集共 256*40 = 10000个样本
acc_sum +=(net(X).argmax(axis=1) == y).sum().asscalar()
n += y.size # 因为 一个小批量样本大小为 256 所以一直执行的是 n += 256
return acc_sum / n
# print(evaluate_accuracy(test_iter, net))#0.0856
num_epochs = 5
lr = 0.1
# 训练模型 train_ch3的参数为(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr, trainer)
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params = None, lr = None, trainer = None):
for epochs in range(num_epochs):
# 定义 训练的失败个数 、 训练成功个数 、 训练总个数 分别为0
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_iter:
with autograd.record():
y_hat = net(X)
l = loss(y_hat, y).sum() # l是每一次迭代的样本的总损失量
l.backward() # 求l的梯度
if trainer is None:
mop.sgd(params, lr, batch_size) # 如果训练器迭代结束 显示图像
else:
trainer.step(batch_size) # 否则按照 小批量样本大小为256 的模式继续训练 仅保存参数和超级参数 ,根据batch_size进行迭代,数据更新
y = y.astype('float32') # 使y由NDArray转化为float32格式
train_l_sum += l.asscalar() # l.asscalar()将大小为1的数组转换为其等效python的标量
train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
n += y.size
test_acc = evaluate_accuracy(test_iter, net) #求net的损失
print('epoch %d, loss %f, train acc %f, test acc %f'%(epochs + 1, train_l_sum / n, train_acc_sum / n, test_acc))
# 迭代周期为5 时 成功率在84.0%--84.5%左右
# 迭代周期为25时 成功率在85.4%--85.6%左右
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)
for X, y in test_iter:
break
true_labels = mop.get_fashion_mnist_labels(y.asnumpy())
pred_labels = mop.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true,pred in zip(true_labels, pred_labels)]
# zip()函数解释
'''zip(x)函数只有一个参数时, 从迭代器x依次选取一个元组,组成一个元组
a = [1, 2, 3, 4]
z = zip(a)
print(z)
print(list(z))
# <zip object at 0x000001D6EFFD57C8>
# [(1,), (2,), (3,), (4,)]
'''
'''当zip()函数有两个参数时,zip(a,b)函数分别从a和b中取一个元素组成元组,再次将组成的元组组合成一个列表。
a与b的维数相同时,正常组合对应位置的元素。
a = [1, 2, 3, 4]
b = [9, 7, 8, 5]
z = zip(a,b)
print(z)
print(list(z))
# <zip object at 0x000002C3F4217908>
# [(1, 9), (2, 7), (3, 8), (4, 5)]
m = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
n = [[1, 1, 1], [2, 2, 3], [3, 3, 3]]
p = [[1, 1, 1], [2, 2, 2]]
z1 = zip(m,n)
print(z1)
print(list(z1))
# <zip object at 0x000001CC4B457BC8>
# [([1, 2, 3], [1, 1, 1]), ([4, 5, 6], [2, 2, 3]), ([7, 8, 9], [3, 3, 3])]
当a与b行或列数不同时,取两者中的最小的行列数。
z2 = zip(m,p)
print(z2)
print(list(z2))
# <zip object at 0x000001CC4B457CC8>
# [([1, 2, 3], [1, 1, 1]), ([4, 5, 6], [2, 2, 2])]
'''
mop.show_fashion_mnist(X[0:9], titles[0:9])
mop.plt.show()
softmax的简单实现
import t.my_ones_packages as mop
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
'''小批量样本大小为256'''
batch_size = 256
'''获取数据集的数据'''
train_iter, test_iter = mop.load_data_fashion_mnist(batch_size)
'''
len(train_iter) = 256
len(test_iter) = 40
'''
'''Sequential是一个容器 , 当给定输入数据时,将第一个数据的输出作为下一个的输入'''
net = nn.Sequential()
'''添加一个输出层 输出单元个数为 10'''
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
'''softmax和交叉熵损失函数'''
'''Gluon提供了一个包括softmax运算和交叉熵损失计算的函数。它的数值稳定性更好'''
loss = gloss.SoftmaxCrossEntropyLoss()
'''优化算法'''
'''
gluon.Trainer(net.collect.params , optimizer , optimizer_parameter)是注册优化器函数
net.collect.params是一个类似字典的类型
optiizer是优化器的名字
optimizer_parameter是字典类型,用于设置优化器参数
'''
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})
'''训练模型'''
num_epoch = 25
mop.train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, None, None, trainer)
# train_ch3(net, train_iter, test_iter, loss, num_epoch, batch_size, params, lr, trainer)