用PyTorch实现linear模型
模拟数据集
模拟数据集这块与上节课一致
num_inputs = 2 #feature number
num_examples = 1000 #训练样本个数
true_w = torch.tensor([[2],[-3.4]]) #真实的权重值
true_b = torch.tensor(4.2) #真实的bias
samples = torch.normal(0,1,(num_examples,num_inputs))
noise = torch.normal(0,0.01,(num_examples,1))
labels = samples.matmul(true_w) + true_b + noise
定义模型
class LinearNet(nn.Module):
def __init__(self,in_features):
super().__init__()
self.fc = nn.Linear(in_features=2,out_features=1)
def forward(self,t):
t = self.fc(t)
return t
加载数据集
import torch.utils.data as Data
dataset = Data.TensorDataset(samples,labels)#类似于zip,把两个张量打包
data_loader = Data.DataLoader(dataset,batch_size=100,shuffle=True)
optimizer
network = LinearNet(2)
optimizer = optim.SGD(network.paramters(),lr=0.05)
模型训练
for epoch in range(10):
total_loss = 0
for data,label in data_loader:
predict = network(data)
loss = F.mse_loss(predict,label)
total_loss += loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(
'epoch',epoch,
'loss',total_loss,
'weight',network.weight,
'bias',network.bias
)
softmax回归模型
sotfmax主要用于分类任务。regression最终得到的是一个scalar,根据input中的feature线性相加得到一个output。分类任务的结果是一个类别,是离散的。
假设现在有一批图片是2 * 2大小的灰度图片,这样图片中的每隔二像素用一个标量表示就行了。这批图片一种是三类小动物,第一类是小狗,第二类是小猫,第三类是小兔子。
每张图片总共4个像素点,我们可以看作是4个feature,假设这三类小动物的图片线性可分,每一类对应一组weight和一个bias。
δ
1
=
w
11
x
1
+
w
12
x
2
+
w
13
x
3
+
w
14
x
4
+
b
1
\delta_1 =w_{11}x_1 + w_{12}x_2+w_{13}x_3+w_{14}x_4 +b_1
δ1=w11x1+w12x2+w13x3+w14x4+b1
δ
2
=
w
21
x
1
+
w
22
x
2
+
w
23
x
3
+
w
24
x
4
+
b
2
\delta_2=w_{21}x_1 + w_{22}x_2+w_{23}x_3+w_{24}x_4 +b_2
δ2=w21x1+w22x2+w23x3+w24x4+b2
δ
3
=
w
31
x
1
+
w
32
x
2
+
w
33
x
3
+
w
34
x
4
+
b
3
\delta_3 =w_{31}x_1 + w_{32}x_2+w_{33}x_3+w_{34}x_4 +b_3
δ3=w31x1+w32x2+w33x3+w34x4+b3
[
δ
1
δ
2
δ
3
]
\begin{bmatrix}\delta_1 \\ \delta_2 \\ \delta_3 \end{bmatrix}
⎣⎡δ1δ2δ3⎦⎤
=
[
w
11
w
12
w
13
w
14
w
21
w
22
w
23
w
24
w
31
w
32
w
33
w
34
]
= \begin{bmatrix}w_{11}& w_{12} & w_{13} & w_{14}\\ w_{21}& w_{22} & w_{23} & w_{24}\\ w_{31}& w_{32} & w_{33} & w_{34}\end{bmatrix}
=⎣⎡w11w21w31w12w22w32w13w23w33w14w24w34⎦⎤
[
x
1
x
2
x
3
x
4
]
\begin{bmatrix}x_1 \\ x_2\\x_3\\x_4\end{bmatrix}
⎣⎢⎢⎡x1x2x3x4⎦⎥⎥⎤+
[
b
1
b
2
b
3
]
\begin{bmatrix}b_1\\b_2\\b_3\end{bmatrix}
⎣⎡b1b2b3⎦⎤
可以根据输出值较大的来决定哪一类,可这样有个问题,首先输出值没有明确的意义,且可能是实数范围。其次,不好衡量输出值与真实值之间的差距。所以采用softmax操作,将三个输出值转化成概率值,这样输出结果满足概率分布。label采用one-hot编码,相当于对应类别的概率是1,这样就可以用cross_entropy来计算loss。
Fashion-MNIST
本次学习softmax模型采用torchvision.datasets中的Fashion-MNIST。
import torchvision
import torchvision.transforms as transforms
train_set = torchvision.datasets.FashionMNIST(
root='./data',
train=True,
download=True,
transform=transforms.ToTensor()
)
transforms.ToTensor()将尺寸为(H x W x C)且数据位于(0,255)的PIL图片或者数据类型为np.uint8的NumPy数组转换为尺寸为C x H x W且数据类型为torch.float32且位于(0.0,1.0)的Tensor
len(train_set),len(test_set)
> (60000,10000)
展示一下数据集中的图片
import matplotlib.pyplot as plt
plt.figure(figsize=(10,10))
for i,(image,lable) in enumerate(train_set,start=1):
plt.subplot(1,10,i)
plt.imshow(image.squeeze())
plt.title(train_set.classes[lable])
plt.axis('off')
if i == 10:
break
plt.show()
train_loader = torch.utils.data.DataLoader(train_set,batch_size=100,shuffle=True,num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set,batch_size=100,shuffle=False,num_workers=1)
cross_entropy
def cross_entropy(predict,labels):
labels = torch.LongTensor(labels).view(-1,1)
loss = -torch.log(predict.gather(1,labels)
return loss.mean()
多分类问题的交叉熵函数如下:
l
o
s
s
=
1
n
∑
1
n
l
o
g
p
i
loss = \frac 1 n\sum_1^nlogp^i
loss=n1∑1nlogpi
i表示label对应的种类,pi为真实种类的预测概率,log是以e为底的对数
这里gather函数的作用,就是在predict上取到对应label的概率值,注意负号不能丢,pytorch中的cross_entropy对输入先进行一次softmax操作,以保证输入都是正的。
模型的实现
def net(samples,w,b):
samples = samples.flatten(start_dim=1) #将c,h,w三个轴展成一个feature轴,长度为28 * 28
samples = torch.exp(samples)#全体元素取以e为底的指数
partial_sum = samples.sum(dim=1,keepdim=True)
samples = samples / partial_sum #归一化,得概率,这里还应用了广播机制
return samples.matmul(w) + b
利用PyTorch简易实现softmax
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.init as init
class SoftmaxNet(nn.Module):
def __init__(self,in_features,out_features):
super().__init__()
self.fc = nn.Linear(in_features=in_features,out_features=out_features)
def forward(self,t):
t = t.flatten(start_dim=1)
t = self.fc(t)
return t
train_set = torchvision.datasets.FashionMNIST(
root='E:\project\python\jupyterbook\data',
train=True,
download=True,
transform=transforms.ToTensor()
)
test_set = torchvision.datasets.FashionMNIST(
root='E:\project\python\jupyterbook\data',
train=False,
download=True,
transform=transforms.ToTensor()
)
train_loader = Data.DataLoader(
train_set,
batch_size=100,
shuffle=True,
#num_workers=2
)
test_loader = Data.DataLoader(
test_set,
batch_size=100,
shuffle=False,
#num_workers=2
)
@torch.no_grad()
def get_correct_nums(predict,labels):
return predict.argmax(dim=1).eq(labels).sum().item()
@torch.no_grad()
def evaluate(test_loader,net,total_num):
correct = 0
for image,label in test_loader:
predict = net(image)
correct += get_correct_nums(predict,label)
pass
return correct / total_num
network = SoftmaxNet()
optimizer = optim.SGD(network.parameters(),lr=0.05)
for epoch in range(10):
total_loss = 0
total_correct = 0
for image,label in train_loader:
predict = network(image)
loss = F.cross_entropy(predict,label)
total_loss += loss.item()
total_correct += get_correct_nums(predict,label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
pass
print(
'epoch',epoch,
'loss',total_loss,
'train_acc',total_correct / len(train_set),
'test_acc',evaluate(test_loader,network,len(test_set))
)