学习目标:
- PyTorch
- 深度学习花书
学习内容:
- PyTorch的简单使用
- 深度学习花书前两章
学习时间:
9.18-9.25
学习产出:
一、PyTorch
pytorch是一个基于Python的科学计算包,它主要有两个用途:
类似于Numpy但是能利用GPU加速;
一个非常灵活和快速用于深度学习的研究平台。
学习线性回归、梯度下降算法、反向传播的原理及实现,并使用PyTorch实现线性回归、逻辑回归、处理多维特征输入以及使用数据集和卷积神经网络。
1、线性回归的实现
计算图:
代码实现:
# 一、使用pytorch:y = w * x + b
'''
步骤:
1、准备数据集
2、构造模型
3、构造loss/cost优化器
4、训练
'''
'''
我们模型类应该继承自nn.Module,它是所有神经网络模块的基类。
必须实现成员方法__init__()和forward()
class nn.linear包含两个成员张量:权重和偏差
class nn.linear实现了魔法方法__call__(),这使得类的实例可以就像函数一样被调用。通常会调用forward()
'''
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[2.0], [4.0], [6.0]])
epoch_list = []
mse_list = []
class LinearModel(torch.nn.Module):
def __init__(self): # 初始化构造函数
super(LinearModel, self).__init__()
# (1,1)是指输入x和输出y的特征维度,这里数据集中的x和y的特征都是1维的
# 该线性层需要学习的参数是w和b,获取w/b的方式为linear.weight/linear.bias,向量w是模型,向量x是数据,实数b是bias
self.linear = torch.nn.Linear(1, 1)
# 前馈
def forward(self, x):
y_pred = self.linear(x)
return y_pred
# 实例化方便调用
model = LinearModel()
# 计算损失函数
criterion = torch.nn.MSELoss(size_average=False)
# 定义优化器(随机梯度下降等方式)torch.optim为优化模块,model.parameters()告诉优化器对那些优化(梯度下降)并且找到他
optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # lr为学习率
# 训练
for epoch in range(1000):
y_pred = model(x_data)
loss_val = criterion(y_pred, y_data)
print('epoch:', epoch, 'loss:', loss_val.item())
optimizer.zero_grad() # 梯度清零
loss_val.backward() # 反向传播计算梯度
optimizer.step() # 根据梯度和学习率进行更新
epoch_list.append(epoch)
mse_list.append(loss_val.item())
print('w=', model.linear.weight.item())
print('b=', model.linear.bias.item())
x_test = torch.Tensor([[4.0]]) # 输入1*1矩阵
y_test = model(x_test) # 输出1*1矩阵
print('y_pred=', y_test.data)
plt.plot(epoch_list, mse_list)
plt.xlabel('Epoch')
plt.ylabel('Cost')
plt.grid()
plt.show()
2、加载数据集
使用糖尿病数据集练习:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
epoch_list = []
bce_list = []
class DiabetesDataset(Dataset):
def __init__(self, filepath):
xy = np.loadtxt(filepath, delimiter=',', dtype=np.float32)
self.len = xy.shape[0]
self.x_data = torch.from_numpy(xy[:, :-1]) # x_data数据不要最后一列(八个特征)
self.y_data = torch.from_numpy(xy[:, [-1]]) # y_data数据只要最后一列(结果)
def __getitem__(self, index):
return self.x_data[index], self.y_data[index] # 直接将x,y数据的样本返回成元组形式
def __len__(self):
return self.len
dataset = DiabetesDataset('./data/diabetes.csv.gz')
# 首先将创建的dataset数据集传递到加载器Dataloader中,mini-batch的大小设置为32,将数据随机打乱顺序,num_workers是在读取数据时采用的GPU单元数
# dataset:数据集,batch_size:batch大小,shuffle:batch中的数据来源是否随机,num_workers:线程数
train_loader = DataLoader(dataset=dataset,
batch_size=32,
shuffle=True,
num_workers=0)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8, 6)
self.linear2 = torch.nn.Linear(6, 4)
self.linear3 = torch.nn.Linear(4, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, x):
# 将x输入之后,得到的线性结果,经过激活函数激活,将结果范围限制在[0,1]之间
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
# 设计损失评价标准和优化器
# 引入损失评价,线性回归是MSE,逻辑回归是BCE(二分类误差),即交叉熵损失函数
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
if __name__ == '__main__':
for epoch in range(1000):
for i, data in enumerate(train_loader, 0):
inputs, labels = data # inputs是x_data,labels是y_data
y_pred = model(inputs) # 调用forward
loss_val = criterion(y_pred, labels)
print(epoch, i, loss_val.item())
bce_list.append(loss_val.item()/32)
epoch_list.append(epoch)
optimizer.zero_grad() # 梯度归零
loss_val.backward() # 反向传播
optimizer.step() # 更新参数
plt.plot(epoch_list, bce_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid()
plt.show()
3、卷积神经网络
GoogleNet
import torch
import numpy as np
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
# GoogleNet
batch_size = 16
# transform.Compose(转换操作包括),其中是个列表,所以会遍历该列表,并执行
# transform.ToTensor输入模式为PIL Image或numpy.ndarray (形状为H x W x C)数据范围是[0, 255],转换到一个Torch.FloatTensor,其形状 (C x H x W) 在 [0.0, 1.0] 范围
# transform.Normalize 是指归一化操作,其中均值和方差为0.1307和0.3081
transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307), (0.3081))])
train_dataset = datasets.MNIST(root='./data/mnist', train=True, download=True, transform=transforms)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='./data/mnist', train=False, download=True, transform=transforms)
test_loader = DataLoader(test_dataset, shuffle=True, batch_size=batch_size)
# 引入新的网络集成模块
class InceptionA(torch.nn.Module):
def __init__(self, in_channels):
super(InceptionA, self).__init__()
# 该Inception模块包括四个分支
# 分支一是先池化,再进行1*1卷积,通道数转换成24
self.branch_pool = torch.nn.Conv2d(in_channels, 24, kernel_size=1)
# 分支二是直接进行1*1的卷积,且通道数为16
self.branch1x1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
# 分支三是先进行1*1的卷积,通道数转换成为16;再进行5*5的卷积,通道数转换成24
self.branch5x5_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_2 = torch.nn.Conv2d(16, 24, kernel_size=5, padding=2)
# 分支四是先进行1*1的卷积,通道数转换成16;再进行3*3卷积,通道数转换成24;再进行3*3卷积,通道数转换成24
self.branch3x3_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch3x3_2 = torch.nn.Conv2d(16, 24, kernel_size=3, padding=1)
self.branch3x3_3 = torch.nn.Conv2d(24, 24, kernel_size=3, padding=1)
def forward(self, x):
branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
branch_pool = self.branch_pool(branch_pool)
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch3x3 = self.branch3x3_3(branch3x3)
# 将四个分支产生的通道数以元组形式保存,并使用cat函数进行累加,累加和是88
outputs = [branch1x1,branch5x5,branch3x3,branch_pool]
return torch.cat(outputs,dim=1) # (batch_size,C,W,H),以通道的维度进行合并,所以维度是1
# 定义模型
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 =torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2 = torch.nn.Conv2d(88,20,kernel_size=5)
self.inception1 = InceptionA(in_channels=10)
self.inception2 = InceptionA(in_channels=20)
self.mp = torch.nn.MaxPool2d(2)
self.fc = torch.nn.Linear(1408,10)# 最后一个分类线性层需要将卷积池化后的结果输出为10维向量
def forward(self,x):
# x.size:(batch_size,channel,width,height)
batch_size = x.size(0)
# 1 -> 10
x = F.relu(self.mp(self.conv1(x)))
# 10 -> 88
x = self.inception1(x)
# 88 -> 20
x = F.relu(self.mp(self.conv2(x)))
# 20 -> 88
x = self.inception2(x)
# 将格式从(batch_size,C,H,W)转换成(batch_size,-1),即(64,88*4*4)即(64,1408)
x = x.view(batch_size,-1)
x = self.fc(x)
# 最后一层不用激活函数激活,因为会用到交叉熵函数
return x
model = Net()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # 使用GPU进行计算
model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01,momentum=0.5)
def train(epoch):
running_loss = 0.0
for batch_index, data in enumerate(train_loader):
inputs, target = data
inputs, target = inputs.to(device), target.to(device)
optimizer.zero_grad()
output = model(inputs)
loss = criterion(output, target)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_index % 300 == 299:
print('[%d,%5d] loss:%.3f' % (epoch + 1, batch_index + 1, running_loss / 300))
running_loss = 0.0
accuracy = []
def test():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
output = model(images)
_, predict = torch.max(output.data, dim=1)
total += labels.size(0)
correct += (predict == labels).sum().item()
print('Accuracy on test set:%d %%' % (100 * correct / total))
accuracy.append(100 * correct / total)
return correct / total
if __name__ == '__main__':
epoch_list = []
acc_list = []
for epoch in range(10):
train(epoch)
acc = test()
epoch_list.append(epoch)
acc_list.append(acc)
print(accuracy)
plt.plot(epoch_list, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()
二、深度学习
学习了第一章、第二章和第三章
第一章了解了深度学习的历史,从刚开始的控制论到联结主义再到现在的深度学习。
第二章回顾了线性代数,重新学习了一下一些遗忘的知识点。
第三章了解了概率论