文章目录
一、反向传播y=w1 x^2+w2 x+b
#训练y=w1x^2+w2x+b
import numpy as np
import torch
import random
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
x_data=[1.0,2.0,3.0]
y_data=[2.0,4.0,6.0]
w_1=torch.tensor(1.0,requires_grad=True)
w_2=torch.tensor(1.0,requires_grad=True)
b=torch.tensor(1.0,requires_grad=True)
def forward(x):
return x**2*w_1+x*w_2+b
def loss(x,y):
y_pred=forward(x)
return (y-y_pred)**2
print('Predict (before training)',4,round(forward(4).item(),2))
epochs=[]
costs=[]
for epoch in range(100):
epochs.append(epoch)
for x,y in zip(x_data,y_data):
l=loss(x,y)
l.backward()
print('\tgrad: ',x,y,round(w_1.grad.item(),2),round(w_2.grad.item(),2),round(b.grad.item(),2))
w_1.data-=0.01*w_1.grad.item()
w_2.data-=0.01*w_2.grad.item()
b.data-=0.01*b.grad.item()
w_1.grad.data.zero_()
w_2.grad.data.zero_()
b.grad.data.zero_()
costs.append(l.item())
print('progress:',epoch,l.item())
print('Predict (after training)',4,round(forward(4).item(),2))
plt.plot(epochs,costs)
plt.ylabel('Cost')
plt.xlabel('Epoch')
plt.show()
二、线性回归
1.Pytorch实现线性回归
使用Pytorch构建训练模型的一般步骤:
1.准备数据集
2.使用相关类构建模型,用以计算预测值
3.使用Pytorch的应用接口来构建损失函数和优化器
4.编写循环迭代的训练过程——前向计算,反向传播和梯度更新
import numpy as np
import torch
import random
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
x_data=torch.tensor([[1.0],[2.0],[3.0]])
y_data=torch.tensor([[2.0],[4.0],[6.0]])
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel,self).__init__()
self.linear=torch.nn.Linear(1,1)#输入输出的维度均为1
def forward(self,x):
y_pred=self.linear(x)
return y_pred
model=LinearModel()
criterion=torch.nn.MSELoss(size_average=False)
optimizer=torch.optim.SGD(model.parameters(),lr=0.01)
epochs=[]
costs=[]
for epoch in range(100):
epochs.append(epoch)
y_pred=model(x_data)
loss=criterion(y_pred,y_data)
costs.append(loss.item())
print(epoch,loss)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('w=',model.linear.weight.item())
print('b=',model.linear.bias.item())
x_test=torch.Tensor([[4.0]])
y_test=model(x_test)
print('y_pred=',y_test.data)
plt.plot(epochs,costs)
plt.ylabel('Cost')
plt.xlabel('Epoch')
plt.show()
总结
1.nn.Linear类中对__call()__方法进行了实现,且其内部有对函数forward()的调用,故在定义模型时需要对forward()函数进行实现。
2.nn.Linear类相当于是对线性计算单元的封装,里面包含两个张量类型的成员:权重和偏置项
2.比较不同优化器下的线性回归并可视化
import numpy as np
import torch
import random
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
x_data=torch.tensor([[1.0],[2.0],[3.0]])
y_data=torch.tensor([[2.0],[4.0],[6.0]])
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel,self).__init__()
self.linear=torch.nn.Linear(1,1)#输入输出的维度均为1
def forward(self,x):
y_pred=self.linear(x)
return y_pred
model1=LinearModel()
model2 = LinearModel()
model3 = LinearModel()
model4 = LinearModel()
model5 = LinearModel()
#model6 = LinearModel()
model7 = LinearModel()
model8 = LinearModel()
models = [model1,model2,model3,model4,model5,model7,model8]
criterion=torch.nn.MSELoss(size_average=False)
op1 = torch.optim.SGD(model1.parameters(),lr = 0.01)#以下分别尝试不同的优化器
op2 = torch.optim.Adagrad(model2.parameters(),lr = 0.01)
op3 = torch.optim.Adam(model3.parameters(),lr = 0.01)
op4 = torch.optim.Adamax(model4.parameters(),lr = 0.01)
op5 = torch.optim.ASGD(model5.parameters(),lr = 0.01)
#op6 = torch.optim.LBFGS(model6.parameters(),lr = 0.01)
op7 = torch.optim.RMSprop(model7.parameters(),lr = 0.01)
op8 = torch.optim.Rprop(model8.parameters(),lr = 0.01)
ops = [op1,op2,op3,op4,op5,op7,op8]
titles = ['SGD','Adagrad','Adam','Adamax','ASGD','RNSprop','Rprop']
index=0
for item in zip(ops,models):
epochs=[]
costs=[]
model=item[1]
op=item[0]
for epoch in range(100):
epochs.append(epoch)
y_pred=model(x_data)
loss=criterion(y_pred,y_data)
costs.append(loss.item())
print(epoch,loss)
op.zero_grad()
loss.backward()
op.step()
print('w=',model.linear.weight.item())
print('b=',model.linear.bias.item())
x_test=torch.Tensor([[4.0]])
y_test=model(x_test)
print('y_pred=',y_test.data)
plt.plot(epochs,costs)
plt.ylabel('Cost')
plt.xlabel('Epoch')
index+=1
plt.show()
三、多维输入的逻辑斯蒂回归(sigmoid)
主要解决二分类问题
import numpy as np
import torch
import matplotlib.pyplot as plt
x_data = torch.from_numpy(np.loadtxt('diabetes_data.csv.gz',delimiter=' ',dtype=np.float32))
y_data = torch.from_numpy(np.loadtxt('diabetes_target.csv.gz',dtype=np.float32))
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.linear1 = torch.nn.Linear(10,8)
self.linear2 = torch.nn.Linear(8,6)
self.linear3 = torch.nn.Linear(6,4)
self.linear4 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self,x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
x = self.sigmoid(self.linear4(x))
return x
model = Model()
criterion = torch.nn.BCELoss(size_average = True)#逻辑回归模型适用二分类交叉熵损失函数
op = torch.optim.SGD(model.parameters(),lr = 0.1)
epochs = []
costs = []
for epoch in range(1000):
epochs.append(epoch)
# 前向计算
y_pred = model(x_data)
loss = criterion(y_pred, y_data)
costs.append(loss.item())
print(epoch, loss.item())
# 反向传播
op.zero_grad()
loss.backward()
#权重更新
op.step()
# 训练过程可视化
plt.ylabel('Cost')
plt.xlabel('Epoch')
plt.plot(epochs, costs)
plt.show()
四、数据集加载
在神经网络训练过程中采用的工具类,诸如Dataset和DataLoader
Dataset主要用于构造数据集,该数据集应该能够支持索引结构;
DataLoader主要用于加载数据集,支持训练时的Mini-Batch形式
概念辨析: Epoch:所有数据样本都参与一次训练,称为一次Epoch
Batch-size:进行一轮训练时所投入的训练样本规模
Iteration:训练样本总数/batch-size
使用Dataset和DataLoader进行糖尿病分类
import numpy as np
import torch
from torch.utils.data import Dataset,DataLoader
import matplotlib.pyplot as plt
class DiabetesDataset(Dataset):
def __init__(self):
self.x_data=torch.from_numpy(np.loadtxt('diabetes_data.csv.gz',delimiter=' ',dtype=np.float32))
self.y_data = torch.from_numpy(np.loadtxt('diabetes_target.csv.gz', dtype=np.float32))
self.len=self.y_data.shape[0]
def __getitem__(self, index):
return self.x_data[index],self.y_data[index]
def __len__(self):
return self.len
dataset=DiabetesDataset()
train_loader=DataLoader(dataset=dataset,batch_size=32,shuffle=True)
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.linear1=torch.nn.Linear(10,8)
self.linear2=torch.nn.Linear(8,6)
self.linear3=torch.nn.Linear(6,4)
self.linear4=torch.nn.Linear(4,1)
self.sigmoid=torch.nn.Sigmoid()
def forward(self,x):
x=self.sigmoid(self.linear1(x))
x=self.sigmoid(self.linear2(x))
x=self.sigmoid(self.linear3(x))
x=self.sigmoid(self.linear4(x))
return x
model=Model()
criterion=torch.nn.BCELoss(size_average=True)#逻辑回归模型适用二分类交叉熵
op=torch.optim.SGD(model.parameters(),lr=0.01)
epochs=[]
costs=[]
for epoch in range(100):
epochs.append(epoch)
loss_sum=0.0
for i,data in enumerate(train_loader,0):#enumerate可以获得i, 0代表i序号从0开始算
inputs,labels=data
y_pred=model(inputs)
loss=criterion(y_pred,labels)
loss_sum+=loss.item()
print(epoch,i,loss.item())
op.zero_grad()
loss.backward()
op.step()
costs.append(loss_sum/(i+1))
plt.ylabel('Cost')
plt.xlabel('Epoch')
plt.plot(epochs,costs)
plt.show()
五、多分类问题
#MNIST手写数字分类
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
batch_size=64
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307),(0.3081))
])
train_dataset=datasets.MNIST(root="../dataset/mnist",
train=True,download=True,transform=transform)
train_loader=DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset=datasets.MNIST(root="../dataset/mnist",train=False,
download=True,transform=transform)
test_loader=DataLoader(test_dataset,shuffle=True,batch_size=batch_size)
class Net(torch.nn.Module):
def __init__(self):
super(Net,self).__init__()
self.linear1 = torch.nn.Linear(784, 512)
self.linear2 = torch.nn.Linear(512, 256)
self.linear3 = torch.nn.Linear(256, 128)
self.linear4 = torch.nn.Linear(128, 64)
self.linear5 = torch.nn.Linear(64, 10)
def forward(self,x):
x=x.view(-1,784)
x=F.relu(self.linear1(x))
x=F.relu(self.linear2(x))
x=F.relu(self.linear3(x))
x=F.relu(self.linear4(x))
return self.linear5(x)
model=Net()
criterion=torch.nn.CrossEntropyLoss()
op=torch.optim.SGD(model.parameters(),lr=0.01)
def train(epoch):
running_loss=0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target=data
op.zero_grad()
outputs=model(inputs)
loss=criterion(outputs,target)
loss.backward()
op.step()
running_loss+=loss.item()
if batch_idx%300==299:
print('[%d,%5d] loss: %3f' %(epoch+1,batch_idx+1,running_loss/300))
running_loss=0.0
def vali():
correct=0
total=0
with torch.no_grad():
for data in test_loader:
images,labels=data
outputs=model(images)
_,predicted=torch.max(outputs.data,dim=1)
total+=labels.size(0)
correct+=(predicted==labels).sum().item()
print('Accuracy on test set:%d %%'%(100*correct/total))
if __name__=='__main__':
for epoch in range(10):
train(epoch)
vali()
六、CNN
1.使用CNN训练MNIST手写数字识别问题
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import matplotlib.pyplot as plt
#超参数定义
BATCH_SIZE = 512
EPOCHS = 20
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#准备数据,转换成张量类型的数据,并进行归一化操作
batch_size = 64
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307),(0.3081))
])
train_dataset = datasets.MNIST(root = "../dataset/mnist",
train = True,download=False,transform = transform)
train_loader = DataLoader(train_dataset,shuffle = True,batch_size = batch_size)
test_dataset = datasets.MNIST(root = "../dataset/mnist",train = False,
download=False,transform = transform)
test_loader = DataLoader(test_dataset,shuffle = True,batch_size = batch_size)
#自定义网络模型
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.conv1 = torch.nn.Conv2d(1,10,kernel_size = 5)
self.conv2 = torch.nn.Conv2d(10,20,kernel_size=3,padding = 1)
self.conv3 = torch.nn.Conv2d(20,20,kernel_size=3,padding = 1)
self.pooling = torch.nn.MaxPool2d(2)
self.l1 = torch.nn.Linear(180,16)
self.l2 = torch.nn.Linear(16,10)
def forward(self, x):
batch_size = x.size(0)
x = self.pooling(F.relu(self.conv1(x)))
x = self.pooling(F.relu(self.conv2(x)))
x = self.pooling(F.relu(self.conv3(x)))
x = x.view(batch_size, -1) # 进行FC之前将向量拉长
x = self.l1(x)
return self.l2(x)
model = Model().to(DEVICE)
#构建损失函数的计算和优化器
criterion = torch.nn.CrossEntropyLoss()#多分类交叉熵损失函数
op = torch.optim.SGD(model.parameters(),lr = 0.01)#采用SGD
#训练过程,包括前向计算和反向传播,封装成一个函数
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
op.zero_grad()
#前向计算
outputs = model(inputs)
loss = criterion(outputs,target)
#反向传播与权值更新
loss.backward()
op.step()
running_loss += loss.item()
if batch_idx % 300 == 299:#每训练300代就输出一次
print('[%d,%5d] loss: %3f' % (epoch+1,batch_idx+1,running_loss / 300))
running_loss = 0.0
#测试过程,封装成函数
def vali():
correct = 0
total = 0
with torch.no_grad():#因为test的过程无需反向传播,也就不需要计算梯度
for data in test_loader:
images,labels = data
outputs = model(images)
_,predicted = torch.max(outputs.data,dim = 1)#因为是按批给的数据
#所以得到的数据标签也是一个矩阵
total += labels.size(0) #同样labels也是一个Nx1的张量
correct += (predicted == labels).sum().item()
print('Accuracy on test set: %d %%'%(100 * correct / total))
#主函数逻辑
if __name__ == '__main__':
for epoch in range(10): #一共训练10epochs
train(epoch)
vali()
2.CNN高级 实现GoogleNet
1.Inception
# 首先对Inception网络块进行抽象封装
# 其他的网络结构则可以直接调用封装好的Incpetion块来构成完整网络
class InceptionA(nn.Module):
def __init__(self,in_channels):
super(InceptionA,self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
self.branch3x3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
def forward(self,x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch3x3 = self.branch3x3_3(branch3x3)
branch_pool = F.avg_pool2d(x,kernel_size = 3,stride = 1,padding = 1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1x1,branch5x5,branch3x3,branch_pool]
return torch.cat(outputs,dim = 1)#沿着通道方向进行堆叠
2.ResNet
# 残差网络块的实现
class ResidualBlock(nn.Module):
def __init__(self,channels):
super(ResidualBlock,self).__init__()
self.channels = channels
self.conv1 = nn.Conv2d(channels,channels,kernel_size = 3,padding = 1)
self.conv2 = nn.Conv2d(channels,channels,kernel_size = 3,padding = 1)
#因为残差块的输入出最后要叠加起来一起进行激活,所以通道、长和宽这里都处理成一致的
def forward(self,x):
y = F.relu(self.conv1(x))
y = self.conv2(y)
return F.relu(x+y)
#利用残差块搭建网络结构
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=5, padding=2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, padding=2)
self.mp = nn.MaxPool2d(2)
self.rblock1 = ResidualBlock(16)
self.rblock2 = ResidualBlock(32)
self.fc = nn.Linear(512,10)
def forward(self,x):
in_size = x.size(0)
x = self.mp(F.relu(self.conv1(x)))
x = self.rblock1(x)
x = self.mp(F.relu(self.conv2(x)))
x = self.rblock2(x)
x = x.view(in_size,-1)
x = self.fc(x)
return x