文章目录
一.波士顿房价预测
最终测试集上的RMSE为3.0702387288867223
- 下载数据集:sklearn上boston房价被封了,所以是从网上下载的
import numpy as np
import pandas as pd
import sklearn
import torch
import torch.utils.data as Data
import random
#获取数据集
dataset = pd.read_table('boston.txt', sep='\s+')
dataset=dataset.values
#按照 8:2 的比例划分训练集和测试集
train_size = int(len(dataset) * 0.8)
test_size = len(dataset)-train_size
train_dataset,test_dataset = Data.random_split(dataset,[train_size,test_size])
train_dataset=np.array(train_dataset)
test_dataset=np.array(test_dataset)
train_dataset=torch.tensor(train_dataset,dtype=torch.float32)
test_dataset=torch.tensor(test_dataset,dtype=torch.float32)
print(train_size,test_size)
- 数据归一化:我用的Z-score标准化
#对数据进行归一化:采取
train_features=train_dataset[:,:13]
train_labels=train_dataset[:,13]
test_features=test_dataset[:,:13]
test_labels=test_dataset[:,13]
train_mean=train_features.mean(axis=0).reshape(1,13)
train_std=train_features.std(axis=0).reshape(1,13)
test_mean=test_features.mean(axis=0).reshape(1,13)
test_std=test_features.std(axis=0).reshape(1,13)
train_features=(train_features-train_mean)/train_std
test_features=(test_features-test_mean)/test_std
- 定义所需的函数:数据迭代器,线性回归模型,均方差损失函数,随机梯度下降函数
def data_iter(batch_size,features,labels):
num_examples=len(features)
indices=list(range(num_examples))
random.shuffle(indices)
for i in range(0,num_examples,batch_size):
batch_indices=torch.tensor(indices[i:min(i+batch_size,num_examples)])
yield features[batch_indices],labels[batch_indices]
def LinearRegression(X,w,b):
return torch.mm(X,w)+b
def squared_loss(y_hat,y):
return (y_hat-y.reshape(y_hat.shape))**2/2
def sgd(params,lr,batch_size):
with torch.no_grad():
for param in params:
param -= lr * param.grad /batch_size
param.grad.zero_()
- 设置基本参数并直接开始优化w,b:
lr=0.01 #学习率
num_epochs=3 #整体迭代次数
batch_size=10 #每次使用的数据批量
net=LinearRegression
loss=squared_loss #使用的模型是线性回归模型
#初始化w,b
w=torch.normal(0,1,(13,1),requires_grad=True)
b=torch.zeros(1,requires_grad=True)
loss_y=[]#用于记录loss的数据画图
for epoch in range(num_epochs):
for X,y in data_iter(batch_size,train_features,train_labels):
y_hat=net(X,w,b)
l=loss(y_hat,y)
l.sum().backward()
sgd((w,b),lr,batch_size)
with torch.no_grad():#每次数据集整体迭代完后,计算整体的损失函数
train_l = loss(net(train_features, w, b),train_labels)
loss_y.append(train_l.detach().sum())
print(f'epoch {epoch + 1}, loss {float(train_l.sum())}')
jupyter notebook 上的输出
- 画损失函数曲线图并计算在测试集上的RMSE
import matplotlib.pyplot as plt
plt.plot(range(len(loss_y)),loss_y,label='the sum of loss on train_dataset')
plt.legend()
plt.xlabel('epoch')
plt.show()
在测试集上RMSE为3.0702387288867223
import math
RMSE=math.sqrt(loss(net(test_features, w, b),test_labels).mean())
print(RMSE)
二.mnist手写数字数据集单层
accuracy一直在0.9056左右
- 预处理
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import matplotlib.pyplot as plt
#下载数据集
trans=transforms.ToTensor()
mnist_train=torchvision.datasets.MNIST(
root='./data',train=True,transform=trans,download=True
)
mnist_test=torchvision.datasets.MNIST(
root='./data',train=False,transform=trans,download=True
)
#根据label获取名称的函数,对这个数字数据集有点多此一举但是还是写上
def get_fashion_mnist_labels(labels):
"""返回Fashion-MNIST数据集的文本标签"""
text_labels = ['0', '1', '2', '3', '4',
'5', '6', '7', '8', '9']
return [text_labels[int(i)] for i in labels]
#画图的函数,用于直观看预测值的和真实的区别,把预测值和真实值的列表加入titles参数中
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
"""绘制图像列表"""
figsize = (num_cols * scale, num_rows * scale)
fig, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
# 图片张量
ax.imshow(img.numpy())
else:
# PIL图片
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
return axes
def get_dataloader_workers():
return 4
#获取训练集和测试集的迭代器
def load_data_fashion_mnist(batch_size, resize=None):
"""下载Fashion-MNIST数据集,然后将其加载到内存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
return (data.DataLoader(mnist_train, batch_size, shuffle=False,
num_workers=get_dataloader_workers()),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=get_dataloader_workers()))
#定义softmax函数模型
def softmax(Z):
Z=Z-Z.max(dim=1,keepdims=True).values
Z_exp=torch.exp(Z)
partition=Z_exp.sum(dim=1,keepdims=True)
return Z_exp/partition
def net(X):
return softmax(torch.matmul(X.reshape(-1,W.shape[0]),W)+b)
#定义交叉熵损失函数
def cross_entropy(y_hat,y):
return -torch.log(y_hat[range(len(y_hat)),y])
def sgd(params,lr,batch_size):
with torch.no_grad():
for param in params:
param -= lr * param.grad /batch_size
param.grad.zero_()
def updater(batch_size,lr):
return sgd([W,b],lr,batch_size)
#定义计算预测精度的函数,获取预测对样本的数量
def accuracy(y_hat,y):
y_hat=y_hat.argmax(axis=1)
cmp=y_hat.type(y.dtype).reshape(y.shape) == y
return float(cmp.type(y.dtype).sum())
train_features=mnist_train.data.type(torch.float32)
train_labels=mnist_train.targets
num_inputs = 784#28*28
num_outputs = 10
#初始化W,b
W = torch.normal(0, 0.01, size=(num_inputs, num_outputs), requires_grad=True)
b = torch.zeros(num_outputs, requires_grad=True)
- 开搞
batch_size=256
lr=0.1#学习率
loss=cross_entropy
#用于画图的数组,记录损失函数值和批量数据上测试集和训练集的精准度
loss_show=[]
ac_train_show=[]
ac_test_show=[]
test_features=mnist_test.data.type(torch.float32)
test_labels=mnist_test.targets
num_epoch=10
for epoch in range(num_epoch):
train_iter,test_iter=load_data_fashion_mnist(batch_size)
with torch.no_grad():
ac_train_show.append(accuracy(net(train_features),train_labels)/len(train_labels))
for X,y in train_iter:
y_hat=net(X)
l=loss(y_hat,y)
loss_show.append(l.detach().sum())
l.sum().backward()
updater(batch_size,lr)
with torch.no_grad():
ac_test_show.append(accuracy(net(test_features),test_labels)/len(test_labels))
- 画图看看
fig,axes=plt.subplots(2,1,figsize=(10,10))
ax=axes.flatten()
ax[0].plot(range(len(loss_show)),loss_show,label='mean loss')
ax[0].set_xlabel('iter')
ax[0].legend()
ax[1].plot(range(len(ac_train_show)),ac_train_show,'b--',label='accuracy on train_dataset')
ax[1].plot(range(len(ac_test_show)),ac_test_show,'r-',label='accuracy on test_dataset')
ax[1].set_xlabel('epoch')
ax[1].legend()
- 直观地通过图像查看
import math
train_iter,test_iter=load_data_fashion_mnist(batch_size)
def predict_ch3(net, test_iter, n=20): #@save
"""预测标签(定义见第3章)"""
for X, y in test_iter:
break
trues = get_fashion_mnist_labels(y)
preds = get_fashion_mnist_labels(net(X).argmax(axis=1))
titles = ['true:'+true +'\n' + 'pred:'+pred for true, pred in zip(trues, preds)]
show_images(
X[0:n].reshape((n, 28, 28)), math.ceil(n/6),6, titles=titles[0:n])
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5): #@save
"""绘制图像列表"""
figsize = (num_cols * scale, num_rows * scale)
fig, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
# 图片张量
ax.imshow(img.numpy())
else:
# PIL图片
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
fig.subplots_adjust(hspace=0.5)
return axes
predict_ch3(net, test_iter,20)
在测试集前二十张图中,只有一张"5"被识别成了6
三.用PyTorch中的torch.nn模块构建模型来简化
最后测试集上accuracy为0.9045,和不用torch.nn差不多
- cross_entropy函数和sgd都直接用torch自带的,用nn.Sequential构建网络
import torch
from torch import nn
net=nn.Sequential(nn.Flatten(),nn.Linear(784,10))
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight,std=0.01)
net.apply(init_weights)
loss=nn.CrossEntropyLoss(reduction='none')
trainer=torch.optim.SGD(net.parameters(),lr=0.1)
batch_size=256
num_epoch=10
updater=trainer
loss_show=[]
ac_train_show=[]
ac_test_show=[]
for epoch in range(num_epoch):
train_iter,test_iter=load_data_fashion_mnist(batch_size)
for X,y in train_iter:
y_hat=net(X)
l=loss(y_hat,y)
loss_show.append(l.detach().mean())
if isinstance(updater, torch.optim.Optimizer):
# 使用PyTorch内置的优化器和损失函数
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(batch_size)
with torch.no_grad():
ac_train_show.append(accuracy(y_hat,y)/len(y))
ac_test_show.append(accuracy(y_hat,y)/len(y))
- 画图看变化趋势
fig,axes=plt.subplots(2,1,figsize=(10,10))
ax=axes.flatten()
ax[0].plot(range(len(loss_show)),loss_show,label='mean loss')
ax[0].set_xlabel('iter')
ax[0].legend()
ax[1].plot(range(len(ac_train_show)),ac_train_show,'b--',label='accuracy on train_dataset')
ax[1].plot(range(len(ac_test_show)),ac_test_show,'r-',label='accuracy on test_dataset')
ax[1].set_xlabel('epoch')
ax[1].legend()
四.多层神经网络实现mnist数据集分类
加入256个神经元的dense层,测试集上的accuracy高达0.95,明显优于单层神经网络
import torch
from torch import nn
net = nn.Sequential(nn.Flatten(),
nn.Linear(784,256),
nn.ReLU(),
nn.Linear(256,10))
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights);
batch_size, lr, num_epochs = 256, 0.1, 10
loss = nn.CrossEntropyLoss(reduction='none')
updater = torch.optim.SGD(net.parameters(), lr=lr)
loss_show=[]
ac_train_show=[]
ac_test_show=[]
for epoch in range(num_epoch):
train_iter,test_iter=load_data_fashion_mnist(batch_size)
for X,y in train_iter:
y_hat=net(X)
l=loss(y_hat,y)
loss_show.append(l.detach().sum())
if isinstance(updater, torch.optim.Optimizer):
# 使用PyTorch内置的优化器和损失函数
updater.zero_grad()
l.mean().backward()
updater.step()
else:
l.sum().backward()
updater(batch_size)
with torch.no_grad():
ac_train_show.append(accuracy(net(train_features),train_labels)/len(train_labels))
ac_test_show.append(accuracy(net(test_features),test_labels)/len(test_labels))
- 变化趋势
五.keras实现多层神经网络mnist数据集分类
加入64,32两个dense隐藏层,测试集上准确率高达0.97,更优于只加一层隐藏层的神经网络
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import layers,models
from keras.models import Sequential
from keras.layers import Dense,Activation,Flatten
from keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
import pandas as pd
(train_features,train_labels),(test_features,test_labels)=mnist.load_data()
train_features = train_features.astype('float32').reshape(-1,28*28)
test_features = test_features.astype('float32').reshape(-1,28*28)
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(28 * 28,)))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
history = model.fit(train_features, train_labels, epochs=12, batch_size=64)
笔者水平有限,没能写的非常详细,望谅解