刘二大人。Pytorch深度学习实践学习。

Pytorch深度学习实践

学习b站刘二大人Pytorch深度学习的视频。

1、线性模型

import numpy as np
import matplotlib.pylab as plt

# 穷举法

x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]

def forward(x):
    return x * w

def loss(x,y):
    y_pred = forward(x)
    return (y_pred-y) * (y_pred-y)

w_list = []
mse_list = []
for w in np.arange(0.0,4.1,0.1):   # 从 1~4.1,每间隔 0.1 取一个值。
    print('w=',w)
    l_sum = 0
    for x_val,y_val in zip(x_data,y_data):
        y_pred_val = forward(x_val)
        loss_val = loss(x_val,y_val)
        l_sum += loss_val
        print('\t',x_val,y_val,y_pred_val,loss_val)
    print('MSE=',l_sum / 3)
    w_list.append(w)
    mse_list.append(l_sum / 3)

plt.plot(w_list,mse_list)
plt.ylabel('Loss')
plt.xlabel('w')
plt.show()

2、梯度下降算法

import matplotlib.pyplot as plt

# 梯度下降法:复杂度低,效率低。

x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]

w = 1.0

def forward(x):
    return x * w

# MSE
def cost(xs,ys):
    cost = 0
    for x,y in zip(xs,ys):
        y_pred = forward(x)
        cost += (y_pred - y) ** 2
    return cost / len(ys)

def gradient(xs,ys):
    grad = 0
    for x,y in zip(xs,ys):
        grad +=2 * x * (x * w - y)
    return grad / len(xs)

epoch_list = []
cost_list = []
print('Predict (before training)',4,forward(4))
# 进行100次训练
for epoch in range(100):
    cost_val = cost(x_data,y_data)
    grad_val = gradient(x_data,y_data)
    w -= 0.01 * grad_val       # 0.01是学习速率
    print('Epoch:',epoch,'w=',w,'loss=',cost_val)
    epoch_list.append(epoch)
    cost_list.append(cost_val)
print('Predict (after training)',4,forward(4))

plt.plot(epoch_list,cost_list)
plt.ylabel('cost')
plt.xlabel('epoch')
plt.show()
import matplotlib.pyplot as plt

# 随机梯度下降法:复杂度高,效率高。

x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]

w = 1.0

def forward(x):
    return x * w

def loss(x,y):
    y_pred = forward(x)
    return (y_pred - y) ** 2

def gradient(x,y):
    return 2 * x * (x * w - y)

epoch_list = []
loss_list = []
print('Predict (before training)',4,forward(4))
# 进行100次训练
for epoch in range(100):
    for x,y in zip(x_data,y_data):
        grad = gradient(x,y)
        w = w - 0.01 * grad
        # print("\tgrad:",x,y,grad)
        l = loss(x,y)
    print("第", epoch+1,"次训练:", "w=", w, "loss=", l)
    epoch_list.append(epoch)
    loss_list.append(l)
print('Predict (after training)',4,forward(4))

plt.plot(epoch_list,loss_list)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

3、反向传播

# 反向传播

# w是Tensor(张量类型),Tensor中包含data和grad,data和grad也是Tensor。
# grad初始为None,调用l.backward()方法后 w.grad为Tensor,故更新w.data时需使用w.grad.data,防止产生计算图。
# 如果w需要计算梯度,那构建的计算图中,跟w相关的tensor都默认需要计算梯度。

import torch

x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]

w = torch.tensor([1.0])
w.requires_grad=True   # 需要计算梯度

def forward(x):
    return x * w

def loss(x,y):
    y_pred = forward(x)
    return (y_pred - y) ** 2

print('Predict (before training)',4,forward(4).item())
for epoch in range(100):
    for x,y in zip(x_data,y_data):
        l = loss(x,y)
        l.backward()   # 计算梯度。调用该方法后w.grad由None更新为Tensor类型,且w.grad.data的值用于后续w.data的更新。
        print('\tgrad',x,y,w.grad.item())   # 使用item()变成标量,防止产生计算图
        w.data = w.data - 0.01 * w.grad.data # 使用w.grad.data进行计算,防止产生计算图。
        w.grad.data.zero_()   # 梯度清零,否则会累加。
    print('progress:', epoch, l.item())   # 取出loss使用l.item,不要直接使用l(l是tensor会构建计算图)
print('Predict (after training)',4,forward(4).item())

4、实现线性回归

import torch

# Pytorch实现线性回归

# x,y是矩阵,3行1列,也就是说总共有3个数据,每个数据只有1个特征。
x_data = torch.Tensor([[1.0],[2.0],[3.0]])
y_data = torch.Tensor([[2.0],[4.0],[6.0]])

# Module实现了函数__call__(),call()里面有一条语句是要调用 forward()。
# 所以需要重写forward()覆盖掉父类中的forward()。
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        # (1,1)是指输入x和输出y的特征维度,这里数据集中的x和y的特征都是1维的。
        self.linear = torch.nn.Linear(1,1)

    def forward(self,x):
        y_pred = self.linear(x)
        return y_pred
model = LinearModel()

criterion = torch.nn.MSELoss(size_average=False) # 不计算平均值。
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)

for epoch in range(1000):
    # forward体现是通过以下语句实现的。
    y_pred = model(x_data)   # 计算预测值。
    loss = criterion(y_pred,y_data)   # 计算误差。
    print('progress : ',epoch,loss.item())

    optimizer.zero_grad()
    loss.backward()   # 反向传播,计算梯度。
    optimizer.step() # 更新w和b的值。

print('w = ',model.linear.weight.item())
print('b = ',model.linear.bias.item())

# 测试
x_test = torch.Tensor([[4.0]])
y_test = model(x_test)
print('y_pred = ',y_test.data)

5、逻辑斯蒂回归

# 逻辑斯蒂回归

import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

# 1. Prepare Dataset
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])

# 2. Define Model
class LogisticRegressionModel(torch.nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()
        self.linear = torch.nn.Linear(1,1)

    def forward(self,x):
        y_pred = F.sigmoid(self.linear(x))
        return y_pred
model = LogisticRegressionModel()

# 3. Construct Loss and Optimizer
# 默认情况下,loss会基于element平均,如果size_average=False的话,loss会被累加.
criterion = torch.nn.BCELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)

# 4. Training Cycle
for epoch in range(1000):
    y_pred = model(x_data)
    loss = criterion(y_pred,y_data)
    print('progress:', epoch, loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# 绘图
x = np.linspace(0,10,200)   # 0-10之间取200个点。
x_t = torch.Tensor(x).view((200,1))   # 维度变换,变成200行1列。
y_t = model(x_t)
y = y_t.data.numpy()
plt.plot(x,y)
# plt.plot([0.10],[0.5,0.5],c='r')
plt.xlabel('Hours')
plt.ylabel('Probability of Pass')
plt.grid()   # 显示网格线 1=True=默认显示;0=False=不显示
plt.show()

6、处理多维特征的输入

# 处理多维特征的输入

import numpy as np
import torch
import matplotlib.pyplot as plt

xy = np.loadtxt('../dataset01/diabetes.csv.gz',delimiter=',',dtype=np.float32) # delimiter为间隔符号。
x_data = torch.from_numpy(xy[:, :-1])   # 第一个‘:’是指读取所有行;第二个‘:’是指从第一列开始,最后一列不要.
print("input data.shape",x_data.shape)
y_data = torch.from_numpy(xy[:, [-1]])   # [-1] 最后得到的是个矩阵.取最后一列.

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8,6)
        self.linear2 = torch.nn.Linear(6,4)
        self.linear3 = torch.nn.Linear(4,1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self,x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
model = Model()

criterion = torch.nn.BCELoss(reduction='mean') # 返回loss的平均值。
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)

epoch_list = []
loss_list = []
for epoch in range(100):
    y_pred = model(x_data)
    loss = criterion(y_pred,y_data)
    print(epoch,loss.item())
    epoch_list.append(epoch)
    loss_list.append(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


plt.plot(epoch_list,loss_list)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

7、加载数据集

import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

# 加载数据集

# 1.prepare dataset
# init,getitem,len魔法函数。分别是加载数据集,获取数据索引,获取数据总量。
class DiabetesDatset(Dataset):
    def __init__(self,filepath):
        xy = np.loadtxt(filepath,delimiter=',',dtype=np.float32)
        self.len = xy.shape[0]    # shape(多少行,多少列)
        self.x_data = torch.from_numpy(xy[:, :-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])

    def __getitem__(self, index):
        return self.x_data[index],self.y_data[index]

    def __len__(self):
        return self.len
dataset = DiabetesDatset('../dataset01/diabetes.csv.gz')
train_loader = DataLoader(dataset=dataset,batch_size=32,shuffle=True,num_workers=2)

# 2.design model using class
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.linear1 = torch.nn.Linear(8,6)
        self.linear2 = torch.nn.Linear(6,4)
        self.linear3 = torch.nn.Linear(4,1)
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self,x):
        x = self.sigmoid(self.linear1(x))
        x = self.sigmoid(self.linear2(x))
        x = self.sigmoid(self.linear3(x))
        return x
model = Model()

# 3.construct loss and optimizer
criterion = torch.nn.BCELoss(reduction='mean')   # reduction = 'mean',返回loss的平均值.
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)

# 4.training cycle forward, backward, update
if __name__ == '__main__':
    for epoch in range(100):
        for i,data in enumerate(train_loader,0): # data存放的是x和y的数据。
            inputs,labels = data   # inputs为x的数据,labels为y的数据。
            y_pred = model(inputs)
            loss = criterion(y_pred,labels)
            print(epoch,i,loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

8、多分类问题

# 多分类问题

# softmax作用:1。如果在进行softmax前的input有负数,通过指数变换,得到正数;2。所有类的概率求和为1。
# CrossEntropyLoss <==> LogSoftmax + NLLLoss

import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
# 将原始图像PIL变为tensor类型(H*W*C),再将[0,255]区间转换为[0.1,1.0]
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,),(0.3081,))])  # 归一化,均值和方差。

train_dataset = datasets.MNIST(root='../dataset02/mnist/',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = torch.nn.Linear(784,512)
        self.l2 = torch.nn.Linear(512,256)
        self.l3 = torch.nn.Linear(256,128)
        self.l4 = torch.nn.Linear(128,64)
        self.l5 = torch.nn.Linear(64,10)

    def forward(self,x):
        x = x.view(-1,784) # -1意思是不确定行数N,但确定列数,自动计算行数N。
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)   # 最后一层不做激活,不进行非线性变换。
model = Net()

criterion = torch.nn.CrossEntropyLoss()   # 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # momentum:冲量

def train(epoch):
    running_loss = 0.0
    # 返回了数据的下标和数据。
    for batch_idx,data in enumerate(train_loader,0):
        # 送入两个张量,一个张量是64个图像的特征,一个张量图片对应的数字分类。
        inputs,target = data
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs,target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # 每300轮输出一次。
        if batch_idx % 300 == 299:
             print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
             running_loss = 0.0

def kuang():
    correct = 0
    total = 0
    with torch.no_grad(): # 测试时,不计算梯度。
        for data in test_loader:
            images,labels = data
            outputs = model(images)
            # predicted为最大值的下标。
            _,predicted = torch.max(outputs.data,dim = 1)    # dim = 1 列是第0个维度,行是第1个维度。
            # labels.size(0)=64 每个都是64个元素,就可以计算总的元素.
            total += labels.size(0) # labels是N*1的矩阵。size是一个元祖(N,1),size(0)就是这个N。
            correct += (predicted == labels).sum().item()   # 张量之间的比较运算,真是1,假是0。
    print('accuracy on test set: %d %% ' %(100 * correct/total)) # 为了显示百分比.

if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        kuang()

9、卷积神经网络

# 卷积神经网络
# 每一个卷积核它的通道数量要求和输入通道是一样的。这种卷积核的总数有多少个和你输出通道的数量是一样的。

import torch
from  torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class Kuang(torch.nn.Module):
    def __init__(self):
        super(Kuang, self).__init__()
        self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
        self.conv2 = torch.nn.Conv2d(10,20,kernel_size=5)
        self.pooling = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(320,10)

    def forward(self,x):
        # flatten data from (n,1,28,28) to (n, 784)
        batch_size = x.size(0)  # 指 batchsize 的值。
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        x = x.view(batch_size,-1)   # -1 此处自动算出的是320
        x = self.fc(x)
        return x
model = Kuang()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

def train(epoch):
    running_loss = 0.0
    for batch_idx,data in enumerate(train_loader,0):
        inputs,target = data
        # inputs, target = inputs.to(device), target.to(device)
        outputs = model(inputs)
        loss = criterion(outputs,target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0

def shen():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader :
            images,labels = data
            # images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _,predicted = torch.max(outputs.data,dim = 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100 * correct/total))

if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        shen()
# 卷积神经网络2
# 使用1x1卷积核虽然参数量增加了,但是能够显著的降低计算量。
# 4个分支在 dim=1(channels) 上进行 concatenate。24+16+24+24 = 88。
# 1408 这个数据可以通过 x = x.view(in_size, -1) 后调用 x.shape 得到。

import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class InceptionA(nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)

        self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)

        self.branch3x3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)

        self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)

    def forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch5x5 = self.branch5x5_1(x)
        branch5x5 = self.branch5x5_2(branch5x5)

        branch3x3 = self.branch3x3_1(x)
        branch3x3 = self.branch3x3_2(branch3x3)
        branch3x3 = self.branch3x3_3(branch3x3)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch5x5, branch3x3, branch_pool]
        return torch.cat(outputs, dim=1)  # b,c,w,h  c对应的是dim=1。按c维度进行拼接。输出的c大小为:24+16+24+24 = 88


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(88, 20, kernel_size=5)  # 88 = 24x3 + 16

        self.incep1 = InceptionA(in_channels=10)  # 与conv1 中的10对应
        self.incep2 = InceptionA(in_channels=20)  # 与conv2 中的20对应

        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(1408, 10)

    def forward(self, x):
        in_size = x.size(0) # 得到batch_size的大小。
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incep1(x)   # 调用 incep1 层
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incep2(x)   # 调用 incep2 层
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x

model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)


def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        outputs = model(inputs)
        loss = criterion(outputs, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0

def kuang():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100 * correct / total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        kuang()
# 卷积神经网络3
# 要解决的问题:梯度消失。
# 跳连接,H(x) = F(x) + x,张量维度必须一样,加完后再激活。不要做pooling,张量的维度会发生变化。

import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,))])

train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super(ResidualBlock, self).__init__()
        self.channels = channels
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)

    def forward(self, x):
        y = F.relu(self.conv1(x))
        y = self.conv2(y)
        return F.relu(x + y)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=5)  # 88 = 24x3 + 16

        self.rblock1 = ResidualBlock(16)
        self.rblock2 = ResidualBlock(32)

        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(512, 10)

    def forward(self, x):
        in_size = x.size(0)
        x = self.mp(F.relu(self.conv1(x)))
        x = self.rblock1(x)
        x = self.mp(F.relu(self.conv2(x)))
        x = self.rblock2(x)
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x

model = Net()

criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)


def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        outputs = model(inputs)
        loss = criterion(outputs, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
            running_loss = 0.0

def kuang():
    correct = 0
    total = 0
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print('accuracy on test set: %d %% ' % (100 * correct / total))


if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        kuang()

10、循环神经网络

# 循环神经网络

import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2

cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)

# (seq,batch,features)
dataset = torch.randn(seq_len,batch_size,input_size)   # seq_len:序列长度
hidden = torch.zeros(batch_size,hidden_size)

for idx,input in enumerate(dataset):
    print('=' * 20,idx,'=' * 20)
    print('Input size:',input.shape)

    hidden = cell(input,hidden)

    print('output size:',hidden.shape)
    print(hidden)
# 循环神经网络2

import torch

batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1

# num_layers:RNN的层数,如果RNN有多层,每一层都会有输出.
# 如果初始化RNN时,把batch_first设置成了TRUE,那么inputs的参数batch_size和seq_len需要调换一下位置.
cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)

# (seqLen,batchSize,inputSize)
inputs = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(num_layers,batch_size,hidden_size)

out,hidden = cell(inputs,hidden)

print('Output size:',out.shape)
print('Output:',out)
print('Hidden size:',hidden.shape)
print('Hidden:',hidden)
# 循环神经网络3
# 使用RNNCell
# 使 hello 变为 ohlol 。
import torch

input_size = 4
hidden_size = 4
batch_size = 1

idx2char = ['e','h','l','o'] # 可看作一个字典,可以根据索引把字母取出来。
x_data = [1,0,2,3,3] # hello中各个字符的下标。
y_data = [3,1,2,3,2] # ohlol中各个字符的下标。

#独热向量
one_hot_lookup = [[1,0,0,0],
                  [0,1,0,0],
                  [0,0,1,0],
                  [0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]   # (seqLen, inputSize)
# reshape the inputs to (seqlen,batchSize,inputSize)
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
# reshape the labels to (seqlen,1)
labels = torch.LongTensor(y_data).view(-1,1)
print(inputs.shape,labels.shape)   # torch.Size([5, 1, 4]) torch.Size([5, 1])

class Model(torch.nn.Module):
    def __init__(self,input_size,hidden_size,batch_size):
        super(Model, self).__init__()
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnncell = torch.nn.RNNCell(input_size=self.input_size,hidden_size=self.hidden_size)

    def forward(self,inputs,hidden):
        hidden = self.rnncell(inputs,hidden)
        return hidden

    def init_hidden(self):
        return torch.zeros(self.batch_size,self.hidden_size) # 提供初始的隐层,生成全0的h0。
net = Model(input_size,hidden_size,batch_size)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)   # 使用Adam优化器。

for epoch in range(15):
    loss = 0
    optimizer.zero_grad()
    hidden = net.init_hidden()   # 每一轮的第一步先初始化hidden,即先计算h0。
    print('Predicted string:', end='')
    # 第二个inputs:(seqlen,batchSize,inputSize), 第一个input:(batchSize,inputSize)
    # 第二个labeis:(seqsize,1), 第一个labei:(1)
    for input,label in zip(inputs,labels):
        hidden = net(input,hidden)
        # 注意交叉熵在计算loss的时候维度关系,这里的hidden是([1, 4]), label是 ([1])
        loss += criterion(hidden,label)  # 不用loss.item,所有的和才是最终的损失.
        _,idx = hidden.max(dim = 1)  # idx最大值的下标.
        print(idx2char[idx.item()],end='')

    loss.backward()
    optimizer.step()
    print(', Epoch [%d/15] loss=%.4f' % (epoch + 1, loss.item()))
# 循环神经网络4
# 使用RNN
# 使 hello 变为 ohlol 。

import torch

input_size = 4
hidden_size = 4
num_layers = 1
batch_size = 1
seq_len = 5

idx2char = ['e', 'h', 'l', 'o']
x_data = [1, 0, 2, 2, 3]  # hello
y_data = [3, 1, 2, 3, 2]  # ohlol

one_hot_lookup = [[1, 0, 0, 0],
                  [0, 1, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]]  # 分别对应0,1,2,3项。
x_one_hot = [one_hot_lookup[x] for x in x_data]  # 组成序列张量。
print('x_one_hot:', x_one_hot)

# 构造输入序列和标签
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)
labels = torch.LongTensor(y_data)  # labels维度是: (seqLen * batch_size ,1)

class Model(torch.nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.rnn = torch.nn.RNN(input_size=self.input_size,
                                hidden_size=self.hidden_size,
                                num_layers=self.num_layers)

    def forward(self, input):
        hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
        out, _ = self.rnn(input, hidden)
        # 为了能和labels做交叉熵,需要reshape一下:(seqlen*batchsize, hidden_size),即二维向量,变成一个矩阵.
        return out.view(-1, self.hidden_size)
net = Model(input_size, hidden_size, batch_size, num_layers)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

for epoch in range(15):
    optimizer.zero_grad()
    # inputs维度是: (seqLen, batch_size, input_size) labels维度是: (seqLen * batch_size,1)
    # outputs维度是: (seqLen * batch_size, hidden_size)
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted: ', ''.join([idx2char[x] for x in idx]), end='')
    print(',Epoch [%d/20] loss=%.3f' % (epoch + 1, loss.item()))
# 循环神经网络5
# Embedding编码方式
# Embedding把一个高维的稀疏的样本映射到一个稠密的低维的空间里面,也就是数据的降维。

import torch

input_size = 4
num_class = 4
hidden_size = 8
embedding_size = 10
batch_size = 1
num_layers = 2
seq_len = 5

idx2char_1 = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]]
y_data = [3, 1, 2, 2, 3]

# inputs 维度为(batchsize,seqLen)
inputs = torch.LongTensor(x_data)
# labels 维度为(batchsize * seqLen)
labels = torch.LongTensor(y_data)

class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # 告诉 input 大小和 embedding 大小 ,构成 input_size * embedding_size 的矩阵.
        self.emb = torch.nn.Embedding(input_size, embedding_size)
        self.rnn = torch.nn.RNN(input_size=embedding_size,
                                hidden_size=hidden_size,
                                num_layers=num_layers,
                                batch_first=True)
        # batch_first=True,input of RNN:(batchsize,seqlen,embeddingsize) output of RNN:(batchsize,seqlen,hiddensize)
        self.fc = torch.nn.Linear(hidden_size, num_class)  # 从 hiddensize 到类别数量的变换.

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)
        x = self.emb(x)  # 进行embedding处理,把输入的长整型张量转变成嵌入层的稠密型张量.
        x, _ = self.rnn(x, hidden)
        x = self.fc(x)
        return x.view(-1, num_class)  # 为了使用交叉熵,变成一个矩阵(batchsize * seqlen,numclass)
net = Model()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)

for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)

    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print('Predicted string: ', ''.join([idx2char_1[x] for x in idx]), end='')
    print(", Epoch [%d/15] loss = %.3f" % (epoch + 1, loss.item()))
# 循环神经网络6(高级篇)

'''
根据名字识别他所在的国家.
人名字符长短不一,最长的10个字符,所以处理成10维输入张量,都是英文字母刚好可以映射到ASCII码上.
Maclean ->  ['M', 'a', 'c', 'l', 'e', 'a', 'n'] ->  [ 77 97 99 108 101 97 110]  ->  [ 77 97 99 108 101 97 110 0 0 0]
共有18个国家,设置索引为0-17.
训练集和测试集的表格文件都是第一列人名,第二列国家.
'''

import torch
import time
import csv
import gzip
from torch.utils.data import DataLoader
import datetime
import matplotlib.pyplot as plt
import numpy as np

HIDDEN_SIZE = 100   # 隐层数量(GRU输出的隐层的维度)
BATCH_SIZE = 256
N_LAYER = 2   # 用来设置所使用的GRU层数
N_EPOCHS = 100   # 训练100轮
N_CHARS = 128   # ASCII码字符数量
USE_GPU = False   # 是否使用GPU

# 处理数据集
class NameDataset():
    def __init__(self, is_train_set=True):
        filename = '../dataset01/names_train.csv.gz' if is_train_set else '../dataset01/names_test.csv.gz'
        with gzip.open(filename, 'rt') as f:  # 打开压缩文件并将变量名设为为f
            reader = csv.reader(f)  # 读取表格文件
            rows = list(reader)   # 一个元组
        self.names = [row[0] for row in rows]  # 取出人名
        self.len = len(self.names)  # 人名数量
        self.countries = [row[1] for row in rows]  # 取出国家名
        # countrys是所有国家名,set(countrys)把所有国家明元素设为集合(去除重复项),sorted()函数是将集合排序。
        self.country_list = list(sorted(set(self.countries)))  # 国家名集合,18个国家名的集合。
        self.country_dict = self.getCountryDict()  # 转变成词典
        self.country_num = len(self.country_list)  # 得到国家集合的长度18

    def __getitem__(self, index):   # 返回名字,和国家的索引。
        return self.names[index], self.country_dict[self.countries[index]]

    def __len__(self):
        return self.len

    def getCountryDict(self):
        country_dict = dict()  # 创建空字典
        for idx, country_name in enumerate(self.country_list, 0):  # 取出序号和对应国家名
            country_dict[country_name] = idx  # 把对应的国家名和序号存入字典
        return country_dict

    def idx2country(self, index):  # 返回索引对应国家名
        return self.country_list(index)

    def getCountrysNum(self):  # 返回国家数量
        return self.country_num

trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)

N_COUNTRY = trainset.getCountrysNum()  # 模型输出大小

# 判断是否使用GPU 使用的话把tensor搬到GPU上去
def create_tensor(tensor):
    if USE_GPU:
        device = torch.device("cuda:0")
        tensor = tensor.to(device)
    return tensor

class RNNClassifier(torch.nn.Module):
    # input_size=128, hidden_size=100, output_size=18
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = 2 if bidirectional else 1 # 是否双向循环神经网络
        # input.shape=(seqlen,batch) output.shape=(seqlen,batch,hiddensize)
        self.embedding = torch.nn.Embedding(input_size,hidden_size)
        # 经过Embedding后input的大小是100,hidden_size的大小也是100,所以形参都是hidden_size。
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional=bidirectional)
        # 如果是双向,会输出两个hidden层,要进行拼接,所以线性成的input大小是 hidden_size * self.n_directions,输出是大小是18,是为18个国家的概率。
        self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)

    def forward(self, input, seq_lengths):
        input = input.t()  # 转置:Batch x Seq -> S x B 。用于embedding。
        batch_size = input.size(1)
        hidden = self._init_hidden(batch_size)
        embedding = self.embedding(input)

        # pack_padded_sequence函数当出入seq_lengths是GPU张量时报错,在这里改成cpu张量就可以,不用GPU直接注释掉下面这一行代码。
        seq_lengths = seq_lengths.cpu()  # 改成cpu张量

        # pack them up
        gru_input = torch.nn.utils.rnn.pack_padded_sequence(embedding, seq_lengths)
        # 让0值不参与运算加快运算速度的方式.
        # 需要提前把输入按有效值长度降序排列,再对输入做嵌入,然后按每个输入len(seq——lengths)取值做为GRU输入.

        output, hidden = self.gru(gru_input, hidden)  # 双向传播的话hidden有两个
        # 如果是双向的,需要进行拼接
        if self.n_directions == 2:
            hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)
        else:
            hidden_cat = hidden[-1]
        fc_output = self.fc(hidden_cat)
        return fc_output

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
        return create_tensor(hidden)
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

# 需要先把每个名字按字符都变成ASCII码
def name2list(name):
    arr = [ord(c) for c in name]
    return arr, len(arr)

# 处理名字ASCII码,重新排序的长度和国家列表
def make_tensors(names, countries):
    sequences_and_lengths = [name2list(name) for name in names]  # 把每个名字按字符都变成ASCII码
    name_sequences = [sl[0] for sl in sequences_and_lengths]  # 取出名字列表对应的ACSII码
    seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths])  # 取出每个名字对应的长度列表
    countries = countries.long()

    # make tensor of name, BatchSize x SeqLen
    seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()  # 先做一个 名字数量x最长名字长度 的全0tensor
    for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):  # 取出序列,ACSII码和长度列表.
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)  # 用名字列表的ACSII码填充上面的全0tensor.

    # sort by length to use pack_padded_sequence
    seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)  # 将seq_lengths按序列长度重新降序排序,返回排序结果和排序序列。
    seq_tensor = seq_tensor[perm_idx]  # 按新序列把ASCII表重新排序
    countries = countries[perm_idx]  # 按新序列把国家列表重新排序

    # 返回排序后的 ASCII列表,名字长度降序列表,国家名列表.
    return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(countries)

def trainModel():
    total_loss = 0
    for i, (names, countries) in enumerate(trainloader, 1):
        optimizer.zero_grad()
        inputs, seq_lengths, target = make_tensors(names, countries)  # 取出排序后的 ASCII列表 名字长度列表 国家名列表
        output = classifier(inputs, seq_lengths)  # 把输入和序列放入分类器
        loss = criterion(output, target)  # 计算损失

        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        # 打印输出结果
        if i == len(trainset) // BATCH_SIZE:   # “//”表示取整除
            print(f'loss={total_loss / (i * len(inputs))}')
    return total_loss

def testModel():
    correct = 0
    total = len(testset)

    with torch.no_grad():
        for i, (names, countries) in enumerate(testloader, 1):
            inputs, seq_lengths, target = make_tensors(names, countries)  # 返回处理后的名字ASCII码 重新排序的长度 国家列表
            output = classifier(inputs, seq_lengths)
            pred = output.max(dim=1, keepdim=True)[1]  # 预测
            correct += pred.eq(target.view_as(pred)).sum().item()  # 计算预测对了多少

        percent = '%.2f' % (100 * correct / total)
        print(f'Test set: Accuracy {correct}/{total} {percent}%')
    return correct / total

acc_list = []
for epoch in range(1, N_EPOCHS + 1):
    print('%d / %d:' % (epoch, N_EPOCHS))
    trainModel()
    acc = testModel()
    acc_list.append(acc)
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)

plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()

# if __name__ == '__main__':
#     print("Train for %d epochs..." % N_EPOCHS)
#     start = time.time()
#     classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
#     if USE_GPU:
#         device = torch.device('cuda:0')
#         classifier.to(device)
#
#     criterion = torch.nn.CrossEntropyLoss()  # 计算损失
#     optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)  # 更新
#
#     acc_list = []
#     for epoch in range(1, N_EPOCHS + 1):
#         # 训练
#         print('%d / %d:' % (epoch, N_EPOCHS))
#         trainModel()
#         acc = testModel()
#         acc_list.append(acc)
#     end = time.time()
#     print(datetime.timedelta(seconds=(end - start) // 1))
#
#     epoch = np.arange(1, len(acc_list) + 1, 1)
#     acc_list = np.array(acc_list)
#     plt.plot(epoch, acc_list)
#     plt.xlabel('Epoch')
#     plt.ylabel('Accuracy')
#     plt.grid()
#     plt.show()
  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值