Pytorch实践


一、线性回归

#  自身数值和损失函数对它的导数
import torch

x_data = [1, 2, 3]
y_data = [2, 4, 6]
w = torch.Tensor([1.0])
w.requires_grad = True  # 需要求损失函数对它导数


# 计算y实际值
def forward(x):
    return x * w


# 损失函数
def loss(x, y):
    y_prediction = forward(x)
    return (y_prediction - y) ** 2


for epoch in range(100):
    # 伪随机梯度下降 一次循环遍历每个样本
    for x, y in zip(x_data, y_data):
        l = loss(x, y)
        l.backward()  # 求出计算链上所有梯度 存入变量
        print("\tgrad:", x, y, w.grad.item())
        # backward结束后 释放并重建计算图
        w.data = w.data - 0.01 * w.grad.data
        # 更新取data运算不建立计算图
        w.grad.data.zero_()
    print("progress:", epoch, l.item())
    # l.data和l.item()意思相同

二、pytorch线性回归

import torch

x_data = torch.Tensor([[1.0], [2.0], [3.0]])  # 3行1列
y_data = torch.Tensor([[2.0], [4.0], [6.0]])  # 3行1列


# 模型定义成一个继承类
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()  # 调用父类构造
        self.linear = torch.nn.Linear(1, 1)  # 一层神经网络单入单出 构造对象

    def forward(self, x):
        # 前馈怎么搭建
        y_prediction = self.linear(x)
        return y_prediction


model = LinearModel()
# 不平均 但是需要降为1维度(求和)
# 损失函数需要y和y_hat输入
criterion = torch.nn.MSELoss(size_average=False, reduce=True)
# 优化器 SGD为随机梯度 优化器父类成员函数会检测待优化参数
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1000):
    y_pred = model(x_data)  # y_hat
    loss = criterion(y_pred, y_data)  # 损失计算
    print(epoch, loss.item())
    optimizer.zero_grad()  # 梯度归0
    loss.backward()  # 反向传播
    optimizer.step()  # 自动更新

print("w=", model.linear.weight.item())
print("b=", model.linear.bias.item())
x_test = torch.Tensor([4.0])
y_test = model(x_test)
print("y_pred=", y_test.data)

三、二分类

import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
x_data = torch.Tensor([[1.0], [2.0], [3.0]])  # 3行1列
y_data = torch.Tensor([[0], [0], [1]])  # 3行1列 二分类数据01


# 模型定义成一个继承类
class LogisticRegressionModel(torch.nn.Module):
    def __init__(self):
        super(LogisticRegressionModel, self).__init__()  # 调用父类构造
        self.linear = torch.nn.Linear(1, 1)  # 一层神经网络单入单出 构造对象

    def forward(self, x):
        # 前馈怎么搭建
        y_prediction = F.sigmoid(self.linear(x))  # 采用sigmoid
        return y_prediction


model = LogisticRegressionModel()
# 不平均 但是需要降为1维度(求和)
# 损失函数需要y和y_hat输入
criterion = torch.nn.BCELoss(size_average=False, reduce=True)  # 交叉熵损失
# 优化器 SGD为随机梯度 优化器父类成员函数会检测待优化参数
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(1000):
    y_pred = model(x_data)  # y_hat
    loss = criterion(y_pred, y_data)  # 损失计算
    print(epoch, loss.item())
    optimizer.zero_grad()  # 梯度归0
    loss.backward()  # 反向传播
    optimizer.step()  # 自动更新

x = np.linspace(0, 10, 200)
x_t = torch.Tensor(x).view((200, 1))  # 200行1列
y_t = model(x_t)
y = y_t.data.numpy()
plt.plot(x, y)
plt.show()

四、多输入单输出二分类

import numpy as np
import torch

# 数据读取为numpy 一般只支持float32 很少贵显卡支持double
xy = np.loadtxt("diabetes.csv", delimiter=",", dtype=np.float32)
# 读取为torch
x_data = torch.from_numpy(xy[:, :-1])
y_data = torch.from_numpy(xy[:, -1]).view((759, 1))


# 模型定义成一个继承类
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()  # 调用父类构造
        self.linear1 = torch.nn.Linear(8, 6)  # 神经网络
        self.linear2 = torch.nn.Linear(6, 4)  # 神经网络
        self.linear3 = torch.nn.Linear(4, 1)  # 神经网络
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x1):
        # 前馈怎么搭建
        x2 = self.sigmoid(self.linear1(x1))
        x3 = self.sigmoid(self.linear2(x2))
        y_prediction = self.sigmoid(self.linear3(x3))
        return y_prediction


model = Model()
# 损失函数需要y和y_hat输入
criterion = torch.nn.BCELoss(reduction='sum')  # 交叉熵损失 误差求和 否则会警告
# 优化器 SGD为随机梯度 优化器父类成员函数会检测待优化参数
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):
    y_pred = model(x_data)  # y_hat
    loss = criterion(y_pred, y_data)  # 损失计算
    print(epoch, loss.item())
    optimizer.zero_grad()  # 梯度归0
    loss.backward()  # 反向传播
    optimizer.step()  # 自动更新

五、mini batch

Dataset    构造数据集 支持索引访问
DataLoader 提供一组mini batch供训练使用

Epoch 一轮使用过所有数据
Batch-Size 一轮中一次训练样本数量
Iteration 内层迭代多少次(总样本数/Batch-Size)
DataLoader 需要支持索引 知道长度
shuffle 是否打乱

如果能用DataLoader进行小批量生成数据
Dataset至少满足两个条件
可以通过索引访问 且 总长度已知
可以用 for in 语句拿出每一个batch
标准格式
# 类 DiabetesDataset 继承自抽象类 Dataset
class DiabetesDataset(Dataset):
    def __init__(self, filepath):
        pass

    # 模仿方法 下标访问元素
    def __getitem__(self, index):
        pass

    # 模仿函数 数据条数
    def __len__(self):
        pass


# dataset 为 DiabetesDataset 类实例化对象
dataset = DiabetesDataset()
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)


for epoch in range(100):
    for i, data in enumerate(train_loader, 0):
__init__	读取并预处理数据
__getitem__	输入索引返回数据 如果返回多个数据 解包时候就是元组
__len__		总数据长度(训练一个epoch用了多少数据,要分为多个batch)
import torch
import numpy as np
from torch.utils.data import Dataset
# Dataset抽象类不能实例化 只能被其他子类继承
from torch.utils.data import DataLoader


# DataLoader可以实例化 处理数据
class DiabetesDataset(Dataset):
    def __init__(self, filepath):
        # 数据读取为numpy 一般只支持float32 很少贵显卡支持double
        xy = np.loadtxt(filepath, delimiter=",", dtype=np.float32)  # n行9列
        print(xy.shape)
        # 列数读取
        self.len = xy.shape[0]
        # 读取为torch
        self.x_data = torch.from_numpy(xy[:, :-1])
        print(self.x_data.shape)  # 759, 8
        self.y_data = torch.from_numpy(xy[:, -1]).view((759, 1))
        print(self.y_data.shape)  # 759, 1

    # 模仿方法 下标访问元素
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # 模仿函数 数据条数
    def __len__(self):
        return self.len


# 模型定义成一个继承类
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()  # 调用父类构造
        self.linear1 = torch.nn.Linear(8, 6)  # 神经网络
        self.linear2 = torch.nn.Linear(6, 4)  # 神经网络
        self.linear3 = torch.nn.Linear(4, 1)  # 神经网络
        self.sigmoid = torch.nn.Sigmoid()

    def forward(self, x1):
        # 前馈怎么搭建
        x2 = self.sigmoid(self.linear1(x1))
        x3 = self.sigmoid(self.linear2(x2))
        y_prediction = self.sigmoid(self.linear3(x3))
        return y_prediction


model = Model()
# 损失函数需要y和y_hat输入
criterion = torch.nn.BCELoss(reduction='mean')  # 交叉熵损失 求平均 size_average=True会警告
# 优化器 SGD为随机梯度 优化器父类成员函数会检测待优化参数
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
dataset = DiabetesDataset("diabetes.csv")
# 数据集 容量 打乱 读取线程
train_loader = DataLoader(dataset=dataset, batch_size=32, shuffle=True, num_workers=2)
# 所有数据均跑100遍
for epoch in range(100):
    # 从0开始枚举
    for i, data in enumerate(train_loader, 0):  # 这个0只是i从0开始 便于打印 不影响数据组数
        print(type(train_loader))  # torch.utils.data.dataloader.DataLoader
        # 被读成Tensor
        print(i)
        inputs, labels = data
        # print(inputs.shape)  # 32,8
        # print(labels.shape)  # 32,1
        y_pred = model(inputs)
        loss = criterion(y_pred, labels)
        print(epoch, i, loss.item())
        optimizer.zero_grad()  # 梯度归0
        loss.backward()  # 反向传播
        optimizer.step()  # 自动更新
特殊用法 当__init__初始化数据是 字典 且值为嵌套列表 嵌套列表的第一内层维度相同
此时可逐步提取第一维度
import torch.utils.data as data
import torch
class build_dataset(data.Dataset):
    def __init__(self):
        self.data = {
            'a' : [[1,2],[3,4],[5,6],[7,8],[9,10],[11,12],[13,14],[15,16],[17,18],[19,20],[21,22],[23,24]],
            'b' : [[24,25,26],[27,28,29],[30,31,32],[33,34,35],[36,37,38],[39,40,41],[42,43,44],[45,46,47],[48,49,50],[51,52,53],[54,55,56],[57,58,59]]
        }
    def __getitem__(self, index):
        ret = {
            'c':torch.FloatTensor(self.data['a'][index]),
            'd':torch.FloatTensor(self.data['b'][index])
        }
        return ret
    def __len__(self):
        return 12

def build_data_loader():
    data_loaders = data.DataLoader(dataset=build_dataset(),batch_size = 2)
    return data_loaders

train_gen = build_data_loader()
for batch_idx, dic in enumerate(train_gen):
    print(dic['c'])
    print(dic['d'])

六、十个数字多分类问题

要点 全连接层的输入
必须是(batch, input_dim)输入
torch.nn.Linear(input_dim, output_dim)
必须是(batch, output_dim)输出
# 交叉熵计算时候只算一半 不算ln1和ln0
# 如y_hat=[0.38 0.34 0.28] y=[1 0 0]
# 计算结果为-1*ln0.38-0*ln0.34-0*0.28
# 损失函数Negative Log Likelihood Loss 输入y_hat所有ln后结果 和标签0,1,2
# 交叉熵损失 torch.nn.CrossEntropyLoss() 包含比上述多了 softmax 和 ln 接到神经网络输出口
# y需要是长整型张量

import numpy as np
import torch
from torchvision import transforms
# 图像原始处理工具包
from torchvision import datasets
from torch.utils.data import DataLoader
# datasets与DataLoader专门用于处理数据 mini batch
import torch.nn.functional as F
# 全连接层激活RELU
import torch.optim as optim

# 优化器

# # 手动计算损失函数
# y = np.array([1, 0, 0])
# z = np.array([0.2, 0.1, -0.1])
# y_pred = np.exp(z) / np.exp(z).sum()
# loss = (-y*np.log(y_pred)).sum()  # 提取出其中唯一的非0元素
# print(loss)

# # torch.nn.CrossEntropyLoss()使用
# y = torch.LongTensor([0])  # 一组数字
# z = torch.Tensor([[0.2, 0.1, -0.1]])  # 一组向量
# criterion = torch.nn.CrossEntropyLoss()
# loss = criterion(z, y)
# print(loss)

# criterion = torch.nn.CrossEntropyLoss()
# Y = torch.LongTensor([2, 0, 1])
# Y_pred1 = torch.Tensor([[0.1, 0.2, 0.9], [1.1, 0.1, 0.2], [0.2, 2.1, 0.1]])
# Y_pred2 = torch.Tensor([[0.8, 0.2, 0.3], [0.2, 0.3, 0.5], [0.2, 0.2, 0.5]])
# l1 = criterion(Y_pred1, Y)
# l2 = criterion(Y_pred2, Y)
# print(l1)
# print(l2)

batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# 多通道图像转换 BGR 读图W*H*C 需要转换 C*W*H 这里从28*28变成1*28*28
# 已知数据集均值方差归一化 神经网络更喜欢0均值方差1正态分布 这里0.3081标准差
train_dataset = datasets.MNIST(root="../dataset/mnist/", train=True, download=True, transform=transform)
# 路径 训练集还是测试集 如果没有下不下载
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
# 数据集 容量 打乱 读取线程
test_dataset = datasets.MNIST(root="../dataset/mnist/", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


class Net(torch.nn.Module):
    # 继承神经网络模块
    def __init__(self):
        # 初始化
        # super(Net, self).__init__()
        super().__init__()
        self.l1 = torch.nn.Linear(784, 512)
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 10)

    def forward(self, a):
        # 搭建 加入非线性部分 隐层用relu 输出层不用
        b = a.view(-1, 784)
        c = F.relu(self.l1(b))
        d = F.relu(self.l2(c))
        e = F.relu(self.l3(d))
        f = F.relu(self.l4(e))
        g = self.l5(f)
        return g


model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 大数据采用带有冲量0.5的优化算法 冲量也就是惯性


def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print("[%d, %5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    with torch.no_grad():
        # 此时不生成计算图 取自test_loader 也是分批
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # outputs 很多行 一行10个量 但仅有一个是1
            _, predicted = torch.max(outputs.data, dim=1)
            # 记录每一行最大值的下标 沿着第1个维度(列)找那一列最大
            total += labels.size(0)
            # batch_size行1列 0就是取出行
            # labels 一列真实值 predicted 一列计算值 全是0-9
            correct += (predicted == labels).sum().item()
    predicted("Accuracy on test set: %d %%" % (100*correct/total))


if __name__ == "__main__":
    for epoch in range(10):
        train(epoch)
        test()

七、CNN入门

import torch
from torchvision import transforms
# 图像原始处理工具包
from torchvision import datasets
from torch.utils.data import DataLoader
# datasets与DataLoader专门用于处理数据 mini batch
import torch.nn.functional as F
# 全连接层激活RELU
import torch.optim as optim


# # 卷积
# in_channels, out_channels = 5, 10
# width, height = 100, 100
# kernel_size = 3
# batch_size = 1
# input = torch.randn(batch_size, in_channels, width, height)  # 5*100*100 batch_size小批量
# conv_layer = torch.nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size)  # 卷积核随机初始化
# output = conv_layer(input)
# print(input.shape)
# print(output.shape)
# print(conv_layer.weight.shape)
# print(conv_layer)

# # 填充padding
# input = [3, 4, 6, 5, 7, 2, 4, 6, 8, 2, 1, 6, 7, 8, 4, 9, 7, 4, 6, 2, 3, 7, 5, 4, 1]
# input = torch.Tensor(input).view(1, 1, 5, 5)  # BCWH
# print(input)
# conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False)  # 卷积核随机初始化
# kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
# conv_layer.weight.data = kernel.data
# output = conv_layer(input)
# print(output)

# # 填充步长2
# input = [3, 4, 6, 5, 7, 2, 4, 6, 8, 2, 1, 6, 7, 8, 4, 9, 7, 4, 6, 2, 3, 7, 5, 4, 1]
# input = torch.Tensor(input).view(1, 1, 5, 5)  # BCWH
# print(input)
# conv_layer = torch.nn.Conv2d(1, 1, kernel_size=3, stride=2, bias=False)  # 卷积核随机初始化
# kernel = torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]).view(1, 1, 3, 3)
# conv_layer.weight.data = kernel.data
# output = conv_layer(input)
# print(output)

# # 最大池化自动设置步长
# image_input = [3, 4, 6, 5, 2, 4, 6, 8, 1, 6, 7, 8, 9, 7, 4, 6]
# image_input = torch.Tensor(image_input).view(1, 1, 4, 4)
# # max_pooling_layer = torch.nn.MaxPool2d(kernel_size=2)
# # output = max_pooling_layer(image_input)
# output = torch.max_pool2d(input=image_input, kernel_size=2)
# # bug UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change.
# # Please do not use them for anything important until they are released as stable.
# # (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
# #   output = torch.max_pool2d(input=image_input, kernel_size=2)
# print(output)


batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# 多通道图像转换 BGR 读图W*H*C 需要转换 C*W*H 这里从28*28变成1*28*28
# 已知数据集均值方差归一化 神经网络更喜欢0均值方差1正态分布 这里0.3081标准差
train_dataset = datasets.MNIST(root="../dataset/mnist/", train=True, download=True, transform=transform)
# 路径 训练集还是测试集 如果没有下不下载
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
# 数据集 容量 打乱 读取线程
test_dataset = datasets.MNIST(root="../dataset/mnist/", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


# 模型定义成一个继承类
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()  # 调用父类构造
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)  # 神经网络
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)  # 神经网络
        self.pooling = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(320, 10)

    def forward(self, x1):
        # 前馈怎么搭建
        # 输入batch*1*28*28
        x2 = F.relu(self.pooling(self.conv1(x1)))
        # 卷积batch*10*24*24 池化batch*10*12*12
        x3 = F.relu(self.pooling(self.conv2(x2)))
        # 卷积batch*20*8*8 池化batch*20*4*4
        batch_size_in = x1.size(0)
        # batch*320
        x4 = x3.view(batch_size_in, -1)
        y_prediction = self.fc(x4)
        return y_prediction


model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 大数据采用带有冲量0.5的优化算法 冲量也就是惯性


def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print("[%d, %5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    with torch.no_grad():
        # 此时不生成计算图 取自test_loader 也是分批
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # outputs 很多行 一行10个量 但仅有一个是1
            _, predicted = torch.max(outputs.data, dim=1)
            # 记录每一行最大值的下标 沿着第1个维度(列)找那一列最大
            total += labels.size(0)
            # batch_size行1列 0就是取出行
            # labels 一列真实值 predicted 一列计算值 全是0-9
            correct += (predicted == labels).sum().item()
    predicted("Accuracy on test set: %d %%" % (100*correct/total))


if __name__ == "__main__":
    for epoch in range(10):
        train(epoch)
        test()

八、CNN进阶

import torch
from torchvision import transforms
# 图像原始处理工具包
from torchvision import datasets
from torch.utils.data import DataLoader
# datasets与DataLoader专门用于处理数据 mini batch
import torch.nn.functional as F
# 全连接层激活RELU
import torch.optim as optim


batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
# 多通道图像转换 BGR 读图W*H*C 需要转换 C*W*H 这里从28*28变成1*28*28
# 已知数据集均值方差归一化 神经网络更喜欢0均值方差1正态分布 这里0.3081标准差
train_dataset = datasets.MNIST(root="../dataset/mnist/", train=True, download=True, transform=transform)
# 路径 训练集还是测试集 如果没有下不下载
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
# 数据集 容量 打乱 读取线程
test_dataset = datasets.MNIST(root="../dataset/mnist/", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


# 输出24*3+16=88通道
class InceptionA(torch.nn.Module):
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        self.branch1x1_1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)

        self.branch5x5_1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)
        self.branch5x5_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=5, padding=2)

        self.branch3x3_1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=16, kernel_size=1)
        self.branch3x3_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=3, padding=1)
        self.branch3x3_3 = torch.nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, padding=1)

        self.branch_pool_1 = torch.nn.Conv2d(in_channels=in_channels, out_channels=24, kernel_size=1)

    def forward(self, x):
        branch_1x1 = self.branch1x1_1(x)

        branch_5x5_a = self.branch5x5_1(x)
        branch_5x5 = self.branch5x5_2(branch_5x5_a)

        branch_3x3_a = self.branch3x3_1(x)
        branch_3x3_b = self.branch3x3_2(branch_3x3_a)
        branch_3x3 = self.branch3x3_3(branch_3x3_b)

        branch_pool_a = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool_1(branch_pool_a)

        outputs = [branch_1x1, branch_5x5, branch_3x3, branch_pool]
        return torch.cat(outputs, dim=1)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(88, 20, kernel_size=5)
        self.incep1 = InceptionA(in_channels=10)
        self.incep2 = InceptionA(in_channels=20)
        self.mp = torch.nn.MaxPool2d(2)
        self.fc = torch.nn.Linear(1408, 10)
        # 88*4*4=1408
        # 28-4=24 24/2=12 12-4=8 8/2=4

    def forward(self, x):
        in_size = x.size(0)
        a = F.relu(self.mp(self.conv1(x)))
        b = self.incep1(a)
        c = F.relu(self.mp(self.conv2(b)))
        d = self.incep2(c)
        e = d.view(in_size, -1)
        # 通道数量不变 每个通道设为向量
        f = self.fc(e)
        return f


model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
# 大数据采用带有冲量0.5的优化算法 冲量也就是惯性


def train(epoch):
    running_loss = 0.0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, target = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if batch_idx % 300 == 299:
            print("[%d, %5d] loss: %.3f" % (epoch+1, batch_idx+1, running_loss/300))
            running_loss = 0.0


def test():
    correct = 0
    total = 0
    with torch.no_grad():
        # 此时不生成计算图 取自test_loader 也是分批
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # outputs 很多行 一行10个量 但仅有一个是1
            _, predicted = torch.max(outputs.data, dim=1)
            # 记录每一行最大值的下标 沿着第1个维度(列)找那一列最大
            total += labels.size(0)
            # batch_size行1列 0就是取出行
            # labels 一列真实值 predicted 一列计算值 全是0-9
            correct += (predicted == labels).sum().item()
    predicted("Accuracy on test set: %d %%" % (100*correct/total))


if __name__ == "__main__":
    for epoch in range(10):
        train(epoch)
        test()

九、RNN入门

# 处理序列连接数据
# RNN本质是个线性层 映射向量到不同维度空间
# batchSize=1 seqLen=3 inputSize=4 hiddenSize=2
import torch

# batch_size = 1
# seq_len = 3  # 序列长度3
# input_size = 4  # x为4行1列向量
# hidden_size = 2  # 隐层2行1列向量
# num_layers = 1
#
# # 采用RNNCELL循环
# cell = torch.nn.RNNCell(input_size=input_size, hidden_size=hidden_size)
# dataset = torch.randn(seq_len, batch_size, input_size)
# hidden = torch.zeros(batch_size, hidden_size)
# # 每次循环都隐层输入都是上一次输出 一次只计算一块
# for idx, input in enumerate(dataset):
#     print("="*20, idx, "="*20)
#     # 按照最外层括号拆包将3*1*4拆成3个1*4
#     print(input)
#     hidden = cell(input, hidden)
#     print("outputs size", hidden.shape)
#     print(hidden)


# # 采用RNN
# # inputs(seqSize,batch,input_size)
# # hidden(numLayers,batch,hidden_size)
# # 每一层都需要隐层输入numLayers
# # out(seqSize,batch,hidden_size)
# # hidden(numLayers,batch,hidden_size)
# # out和input都是一系列只有维度不同
# cell = torch.nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
# inputs = torch.randn(seq_len, batch_size, input_size)
# hidden = torch.zeros(num_layers, batch_size, hidden_size)
# out, hidden = cell(inputs, hidden)
# print("Output size:", out.shape)
# print("Output:", out)
# print("Hidden size", hidden.shape)
# print("Hidden:", hidden)


# 学习hello到ohlol变换
# 字典e0 h1 l2 o3
# hello 10223 5*4 onehot编码 每一行代表一个字母 行数等于字母数 一群四维向量
input_size = 4  # 输入四行一列
hidden_size = 4  # 输出四行一列
batch_size = 1
num_layers = 1
seq_len = 5
idx2char = ["e", "h", "l", "o"]
x_data = [1, 0, 2, 2, 3]
y_data = [3, 1, 2, 3, 2]
one_hot_lookup = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]
# 构造字典便于转换
x_one_hot = [one_hot_lookup[x] for x in x_data]
# x元素转换为向量形式
inputs = torch.Tensor(x_one_hot).view(seq_len, batch_size, input_size)  # 5*1*4
labels = torch.LongTensor(y_data).view(seq_len, 1)  # 5*1 必须是长整型张量


# # rnncell实现
# class Model(torch.nn.Module):
#     def __init__(self, input_size, hidden_size, batch_size):
#         super(Model, self).__init__()
#         self.batch_size = batch_size
#         self.hidden_size = hidden_size
#         self.input_size = input_size
#         self.rnncell = torch.nn.RNNCell(input_size=self.input_size, hidden_size=hidden_size)
#
#     def forward(self, input, hidden):
#         hidden = self.rnncell(input, hidden)
#         return hidden
#
#     def init_hidden(self):
#         return torch.zeros(self.batch_size, self.hidden_size)
#
#
# net = Model(input_size, hidden_size, batch_size)
# criterion = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(net.parameters(), lr=0.1)
#
#
# for epoch in range(15):
#     loss = 0
#     optimizer.zero_grad()
#     hidden = net.init_hidden()
#     print("Predicted string:", end=" ")
#     for input, label in zip(inputs, labels):
#         hidden = net(input, hidden)
#         loss += criterion(hidden, label)  # 数字和向量
#         _, idx = hidden.max(dim=1)
#         print(idx2char[idx.item()], end=" ")
#     loss.backward()
#     optimizer.step()
#     print(",Epoch[%d/15] loss=%.4f" % (epoch+1, loss.item()))


# # rnn实现
# class Model(torch.nn.Module):
#     def __init__(self, input_size, hidden_size, batch_size, num_layers=1):
#         super(Model, self).__init__()
#         self.num_layers = num_layers
#         self.batch_size = batch_size
#         self.hidden_size = hidden_size
#         self.input_size = input_size
#         self.rnn = torch.nn.RNN(input_size=self.input_size, hidden_size=hidden_size, num_layers=num_layers)
#
#     def forward(self, input):
#         hidden = torch.zeros(self.num_layers, self.batch_size, self.hidden_size)
#         out, _ = self.rnn(input, hidden)
#         return out.view(-1, self.hidden_size)  # 原来是seqLen batchSize hiddenSize
#
#
# net = Model(input_size, hidden_size, batch_size, num_layers)


# 表示学习 embedding 专门解决稀疏向量维度爆炸 embedding输入层必须长整型
# 每个输出都连线性层 输出是seq*4
# 输入独热向量维度 降维维度
# 输入seq*batch输出seq*batch*embedding


num_class = 4  # 类别4个
input_size = 4  # 输入维度4
hidden_size = 8  # 8维输出
embedding_size = 10  # 4映射为10
num_layers = 2  # 堆叠两层网络
batch_size = 1  # 一批数据
seq_len = 5  # hello 五个字母
inputs = torch.LongTensor(x_data).view(batch_size, seq_len)  # 自定义参数batch在第一维度
labels = torch.LongTensor(y_data)


class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.emb = torch.nn.Embedding(input_size, embedding_size)  # 4维度转为10维度嵌入
        self.rnn = torch.nn.RNN(input_size=embedding_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        # True代表batchSize放前面 seq_len, batch_size, input_size
        self.fc = torch.nn.Linear(hidden_size, num_class)

    def forward(self, x):
        hidden = torch.zeros(num_layers, x.size(0), hidden_size)  # batch_size*seq_len
        x = self.emb(x)  # batch_size*seq_len*embedding_size
        x, _ = self.rnn(x, hidden)  # 5个8维向量
        x = self.fc(x)  # 5个输出全拿到 batch*seqLen*hiddenSize
        return x.view(-1, num_class)  # seqLen*hiddenSize


net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
    optimizer.zero_grad()
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.max(dim=1)
    idx = idx.data.numpy()
    print("Predicted:", "".join([idx2char[x] for x in idx]), end="")
    print(",Epoch[%d/15] loss=%.4f" % (epoch+1, loss.item()))

十、RNN进阶

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import gzip
import csv
import time
import math

class NameDataset(Dataset):
    def __init__(self, is_train_set):
        filename = './names_train.csv.gz' if is_train_set else './names_test.csv.gz'
        with gzip.open(filename, 'rt') as f:  # r表示只读,从文件头开始 t表示文本模式
            reader = csv.reader(f)
            rows = list(reader)
        self.names = [row[0] for row in rows]  # 把名字读成一个数组
        self.len = len(self.names)  # 得到名字序列长度 13374
        self.countries = [row[1] for row in rows]  # 把国家读成一个数组可能有重复
        self.country_list = list(sorted(set(self.countries)))  # 去重 排序 写列表
        self.country_dict = self.getCountryDict()  # 获得字典 国家序列是键 数字序列是值
        self.country_num = len(self.country_list)  # 国家序列长度

    def __getitem__(self, index):  # 根据索引拿到的是 名字,国家的索引(国家拿出的是数字)
        return self.names[index], self.country_dict[self.countries[index]]

    def __len__(self):
        return self.len

    def getCountryDict(self):
        country_dict = dict()
        for idx, country_name in enumerate(self.country_list, 0):
            country_dict[country_name] = idx
            # 给出字典 国家序列是键 数字序列是值
        return country_dict

    def idx2country(self, index):
        return self.country_list[index]  # 给出国家小数字返回名字

    def getCountriesNum(self):  # 返回国家序列长度
        return self.country_num


HIDDEN_SIZE = 100  # 隐层100
BATCH_SIZE = 256  # 批量256
N_LAYER = 2  # 两个RNN
N_EPOCHS = 100  # 100轮训练
N_CHARS = 128  # 字符输入128维度

trainSet = NameDataset(is_train_set=True)
trainLoader = DataLoader(trainSet, batch_size=BATCH_SIZE, shuffle=True)
testSet = NameDataset(is_train_set=False)
testLoader = DataLoader(testSet, batch_size=BATCH_SIZE, shuffle=False)

N_COUNTRY = trainSet.getCountriesNum()


class RNNClassifier(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
        # 默认一层 但是也可以自己输入
        super(RNNClassifier, self).__init__()
        self.hidden_size = hidden_size  # 一层多少单元
        self.n_layers = n_layers  # 几个RNN叠加
        self.n_directions = 2 if bidirectional else 1  # 默认True 使用双向的GRU

        # 嵌入层(𝑠𝑒𝑞𝐿𝑒𝑛, 𝑏𝑎𝑡𝑐ℎ𝑆𝑖𝑧𝑒) --> (𝑠𝑒𝑞𝐿𝑒𝑛, 𝑏𝑎𝑡𝑐ℎ𝑆𝑖𝑧𝑒, hidden_size)
        self.embedding = torch.nn.Embedding(input_size, hidden_size)  # 输入维度 转到嵌入维度
        self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional=bidirectional)
        # 每个小单元输出是拼接为两个列向量 拼接为一列 输入输出维度都设置为hidden_size 其实输出为hidden_size*2
        self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)

    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
        # 两侧的启动输入
        return hidden

    def forward(self, input, seq_lengths):
        # input shape : B x S -> S x B (batch*seq)
        input = input.t()  # 转置
        batch_size = input.size(1)
        hidden = self._init_hidden(batch_size)  # 两侧启动输入
        embedding = self.embedding(input)  # 嵌入层初始化

        # pack them up
        gru_input = torch.nn.utils.rnn.pack_padded_sequence(embedding, seq_lengths)
        output, hidden = self.gru(gru_input, hidden)
        # hidden 4*256*100
        # 只拼最后输出两个256*100 不管第一个layer输出
        if self.n_directions == 2:
            hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)  # 拼为256*200
        else:
            hidden_cat = hidden[-1]
        fc_output = self.fc(hidden_cat)
        return fc_output


def name2list(name):
    arr = [ord(c) for c in name]
    return arr, len(arr)


def make_tensors(names, countries):
    sequences_and_lengths = [name2list(name) for name in names]  # 名字到码表 返回码序列和长度
    name_sequences = [s1[0] for s1 in sequences_and_lengths]  # 取出序列
    seq_lengths = torch.LongTensor([s1[1] for s1 in sequences_and_lengths])  # 取出长度
    countries = countries.long()  # 转换长整型

    # make tensor of name, BatchSize * seqLen
    # 他这里补零的方式先将所有的0 Tensor给初始化出来,然后在每行前面填充每个名字
    seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long()
    # print("seq_lengths.max:", seq_lengths.max())
    for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0):
        seq_tensor[idx, :seq_len] = torch.LongTensor(seq)

    # sort by length to use pack_padded_sequence
    # 将名字长度降序排列,并且返回降序之后的长度在原tensor中的小标perm_idx
    seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True)
    # 这个Tensor中的类似于列表中切片的方法神奇啊,直接返回下标对应的元素,相等于排序了
    seq_tensor = seq_tensor[perm_idx]
    countries = countries[perm_idx]

    # 返回排序之后名字Tensor,排序之后的名字长度Tensor,排序之后的国家名字Tensor
    return seq_tensor, seq_lengths, countries


classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)


def trainModel():
    def time_since(since):
        s = time.time() - since
        m = math.floor(s / 60)
        s -= m * 60
        return '%dm %ds' % (m, s)

    total_loss = 0
    for i, (names, countries) in enumerate(trainLoader, 1):
        # 从i=1开始枚举
        # print(type(names), type(countries))
        # print(len(names), countries.shape)
        # 一轮循环拿出256个名字 256个数字

        inputs, seq_lengths, target = make_tensors(names, countries)
        # 排序后的 名字向量序列、名字向量所对应长度序列、国家代表号序列
        output = classifier(inputs, seq_lengths)
        # print("Shape:", output.shape, target.shape)
        # 注意输出和目标的维度:Shape: torch.Size([256, 18]) torch.Size([256])
        loss = criterion(output, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        if i % 10 == 0:
            print(f'[{time_since(start)}] Epoch {epoch} ', end='')
            print(f'[{i * len(inputs)}/{len(trainSet)}] ', end='')
            print(f'loss={total_loss / (i * len(inputs))}')
    return total_loss


def testModel():
    correct = 0
    total = len(testSet)
    print("evaluating trained model ... ")
    with torch.no_grad():
        for i, (names, countries) in enumerate(testLoader):
            inputs, seq_lengths, target = make_tensors(names, countries)
            output = classifier(inputs, seq_lengths)
            # 注意这个keepdim的使用,为了直接和target计算loss
            pred = output.max(dim=1, keepdim=True)[1]
            # 注意这个view_as 和 eq
            correct += pred.eq(target.view_as(pred)).sum().item()

        percent = '%.2f' % (100 * correct / total)
        print(f'Test set: Accuracy {correct}/{total} {percent}%')

    return correct / total


N_EPOCHS = 50
start = time.time()
print("Training for %d epochs..." % N_EPOCHS)
acc_list = []
for epoch in range(1, N_EPOCHS + 1):
    # Train cycle
    trainModel()
    acc = testModel()
    acc_list.append(acc)

十一、LSTM

import torch
import torch.nn as nn  # 神经网络模块

# 输入数据x的向量维数10, 设定lstm隐藏层的特征维度20, 此model用2个lstm层。如果是1,可以省略,默认为1)
# 输入维度 隐层维度 LSTM层数
rnn = nn.LSTM(10, 20, 2)

# 输入的input为,序列长度seq_len=5, 每次取的minibatch大小,batch_size=3, 数据向量维数=10(仍然为x的维度)。每次运行时取3个含有5个字的句子(且句子中每个字的维度为10进行运行)
# 序列长度 batch_size 输入维度
input = torch.randn(5, 3, 10)


# 初始化的隐藏元和记忆元,通常它们的维度是一样的
# 2个LSTM层,batch_size=3, 隐层维度20
h0 = torch.randn(2, 3, 20)
c0 = torch.randn(2, 3, 20)

# 这里有2层lstm,output是最后一层lstm的每个词向量对应隐藏层的输出,其与层数无关,只与序列长度相关
# hn,cn是所有层最后一个隐藏元和记忆元的输出

output, (hn, cn) = rnn(input, (h0, c0))

# 模型的三个输入与三个输出。三个输入与输出的理解见上三输入,三输出

print(output.size(), hn.size(), cn.size())
# 输出:torch.Size([5, 3, 20]) torch.Size([2, 3, 20]) torch.Size([2, 3, 20])

十二、双向GRU

import torch
from torch import nn
batch = 4
input_size = 32
hidden_size = 128
G = nn.GRUCell(input_size=input_size,hidden_size=hidden_size)
inputs = torch.ones([batch,input_size])
hidden = torch.ones([batch,hidden_size])
print(inputs.shape)  # [batch,input_size]
print(hidden.shape)  # [batch,hidden_size]
output = G(input=inputs,hx=hidden)
print(output.shape)  # [batch,hidden_size]
import torch
from torch import nn
batch = 10
seq_len = 32
input_size = 64
hidden_size = 256
num_layers = 4
G = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,bidirectional=True,batch_first=True)
inputs = torch.ones(batch,seq_len,input_size)  #  输入序列 [batch seq_len input_size]
hid = torch.ones(num_layers*2,batch,hidden_size) # [num_layers*2 batch hidden_size]
pri = G(input=inputs,hx=hid)
print(pri[0].shape)  # 输出序列(拼接) torch.Size([10, 32, 512]) [batch seq_len hidden_size*2]
print(pri[1].shape)  # torch.Size([8, 10, 256])  [2*num_layers batch hidden_size]

在这里插入图片描述

十三、网络的保存与加载

model = LogisticRegressionModel()
# 训练model
net3 = LogisticRegressionModel()
torch.save(model, 'net.pkl')  # 整个网络
# 不保留图 只保留节点参数
torch.save(model.state_dict(), 'nek_params.pkl')  # 保留参数
net2 = torch.load('net.pkl')
net3.load_state_dict(torch.load('nek_params.pkl'))
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值