3. 手势识别(LeNet、Vgg16、ResNet50)

Show me your code

1. 模型 model.py

import torch
from torch import nn


class ConvBlock(nn.Module):
    """
        一层卷积:
            - 卷积层
            - 批规范化层
            - 激活层
    """
    def __init__(self, in_channels, out_channels, 
                 kernel_size=3, stride=1, padding=1):
        super().__init__()
        self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                             kernel_size=kernel_size, stride=stride,padding=padding)
        self.bn = nn.BatchNorm2d(num_features=out_channels)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

class LeNet(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        # 1, 特征抽取部分
        self.feature_extractor = nn.Sequential(
            # 卷积层1
            ConvBlock(in_channels=3, 
                      out_channels=6, 
                      kernel_size=5,
                      stride=1,
                      padding=0),
            
            # 亚采样(池化)
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
            # 卷积层2
            ConvBlock(in_channels=6, 
                      out_channels=16, 
                      kernel_size=5,
                      stride=1,
                      padding=0),
            
            # 亚采样(池化)
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
        )
        
        # 2, 分类
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=400, out_features=120),
            nn.ReLU(),
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Linear(in_features=84, out_features=num_classes)
        )
        
    def forward(self, x):
        # 1, 提取特征
        x = self.feature_extractor(x)
        # 2, 分类输出
        x = self.classifier(x)
        return x
    
class Vgg16(nn.Module):
    def __init__(self, n_classes=1000):
        super().__init__()
        # 1, 特征抽取部分
        self.feature_extractor = nn.Sequential(
            
            # stage1
            
            # 卷积1
            ConvBlock(in_channels=3, 
                      out_channels=64, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积2
            ConvBlock(in_channels=64, 
                      out_channels=64, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 池化
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
            # stage2
            
            # 卷积1
            ConvBlock(in_channels=64, 
                      out_channels=128, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积2
            ConvBlock(in_channels=128, 
                      out_channels=128, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 池化
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
            # stage3
            
            # 卷积1
            ConvBlock(in_channels=128, 
                      out_channels=256, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积2
            ConvBlock(in_channels=256, 
                      out_channels=256, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积3
            ConvBlock(in_channels=256, 
                      out_channels=256, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 池化
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
            # stage4
            
            # 卷积1
            ConvBlock(in_channels=256, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积2
            ConvBlock(in_channels=512, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积3
            ConvBlock(in_channels=512, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 池化
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
            
            
            # stage5
            
            # 卷积1
            ConvBlock(in_channels=512, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积2
            ConvBlock(in_channels=512, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 卷积3
            ConvBlock(in_channels=512, 
                      out_channels=512, 
                      kernel_size=3,
                      stride=1,
                      padding=1),
            # 池化
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
        )
        
        # 2, 分类
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=7 * 7 * 512, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),
            nn.Linear(in_features=4096, out_features=n_classes)
        )
        
    def forward(self, x):
        # 1, 提取特征
        x = self.feature_extractor(x)
        # 2, 分类输出
        x = self.classifier(x)
        return x


"""
    ResBlock
"""

class ResConvBlock(nn.Module):
    """
        虚线块,每一个大的重复逻辑块前面,第一个短接块就是这个
        实现逻辑:
            y = F(x) + Conv(x)
    """
    def __init__(self, in_channels, out_channels, stride):
        # 调用父类初始化方法
        super().__init__()
                
        # 1,核心处理逻辑
        self.stage = nn.Sequential(
            # 1  1 * 1
            nn.Conv2d(in_channels=in_channels,
                     out_channels=out_channels[0],
                     kernel_size=1,
                     stride=stride,
                     padding=0,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[0]),
            nn.ReLU(),
            # 2  3 * 3
            nn.Conv2d(in_channels=out_channels[0],
                     out_channels=out_channels[1],
                     kernel_size=3,
                     padding=1,
                     stride=1,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[1]),
            nn.ReLU(),
            # 3  1 * 1
            nn.Conv2d(in_channels=out_channels[1],
                     out_channels=out_channels[2],
                     kernel_size=1,
                     stride=1,
                     padding=0,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[2]))
        
        # 2,短路层
        self.shortcut = nn.Sequential(
            nn.Conv2d(in_channels=in_channels,
                                   out_channels=out_channels[2],
                                   kernel_size=1,
                                   stride=stride,
                                   padding=0,
                                   bias=False),
            nn.BatchNorm2d(num_features=out_channels[2])
        )
        
        # 3,最后的激活
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # 1,短接处理
        s = self.shortcut(x)
        
        # 2,核心处理
        h = self.stage(x)
        
        # 3,两部分相加 add
        h = h + s
        
        # 4,输出 激活
        o = self.relu(h)
        
        return o
    
class IdentityBlock(nn.Module):
    """
        实线块
            y = F(x) + x
    """
    def __init__(self, in_channels, out_channels):
        super().__init__()
        
        self.stage = nn.Sequential(
            # 1:1 x 1
            nn.Conv2d(in_channels=in_channels,
                     out_channels=out_channels[0],
                     kernel_size=1,
                     padding=0,
                     stride=1,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[0]),
            nn.ReLU(),
            
            # 2:3 x 3
            nn.Conv2d(in_channels=out_channels[0],
                     out_channels=out_channels[1],
                     kernel_size=3,
                     padding=1,
                     stride=1,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[1]),
            nn.ReLU(),
            
            # 3:1 x 1
            nn.Conv2d(in_channels=out_channels[1],
                     out_channels=out_channels[2],
                     kernel_size=1,
                     padding=0,
                     stride=1,
                     bias=False),
            nn.BatchNorm2d(num_features=out_channels[2])
        )
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        h = x + self.stage(x)
        o = self.relu(h)  
        return o
    
class ResNet50(nn.Module):
    """
        自定义 ResNet50
    """
    def __init__(self, n_classes=1000):
        super(ResNet50, self).__init__()
        self.stage1 = nn.Sequential(
            nn.Conv2d(in_channels=3, 
                     out_channels=64,
                     kernel_size=7,
                     padding=3,
                     stride=2,
                     bias=False),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3,
                        stride=2,
                        padding=1)
        )
        self.stage2 = nn.Sequential(
            ResConvBlock(in_channels=64,
                     out_channels=(64, 64, 256),
                     stride=1),
            IdentityBlock(in_channels=256,
                         out_channels=(64, 64, 256)),
            IdentityBlock(in_channels=256,
                         out_channels=(64, 64, 256)),
        )
        
        self.stage3 = nn.Sequential(
            ResConvBlock(in_channels=256,
                      out_channels=(128, 128, 512),
                      stride=2),
            IdentityBlock(in_channels=512,
                         out_channels=(128, 128, 512)),
            IdentityBlock(in_channels=512,
                         out_channels=(128, 128, 512)),
            IdentityBlock(in_channels=512,
                         out_channels=(128, 128, 512))
        )
        
        self.stage4 = nn.Sequential(
            ResConvBlock(in_channels=512,
                      out_channels=(256, 256, 1024),
                      stride=2),
            IdentityBlock(in_channels=1024,
                         out_channels=(256, 256, 1024)),
            IdentityBlock(in_channels=1024,
                         out_channels=(256, 256, 1024)),
            IdentityBlock(in_channels=1024,
                         out_channels=(256, 256, 1024)),
            IdentityBlock(in_channels=1024,
                         out_channels=(256, 256, 1024)),
            IdentityBlock(in_channels=1024,
                         out_channels=(256, 256, 1024))
        )
        self.stage5 = nn.Sequential(
            ResConvBlock(in_channels=1024,
                      out_channels=(512, 512, 2048),
                      stride=2),
            IdentityBlock(in_channels=2048,
                         out_channels=(512, 512, 2048)),
            IdentityBlock(in_channels=2048,
                         out_channels=(512, 512, 2048))
        )
        self.pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        
        self.fc = nn.Linear(in_features=2048,
                           out_features=n_classes)
    def forward(self, x):
        h = self.stage1(x)
        h = self.stage2(h)
        h = self.stage3(h)
        h = self.stage4(h)
        h = self.stage5(h)
        h = self.pool(h)
        h = h.view(h.size(0), -1)
        o = self.fc(h)
        return o

if __name__ == "__main__":
    model = LeNet()
    print(model)
    x = torch.randn(1, 1, 32, 32)
    y = model(x)
    print(y.shape)

2. LeNet 实现手势识别(详细)

2.1 数据打包

"""
    1,数据打包
        - [N, C, H, W]
        - 读取原始图像信息(每个图像的路径及其类别)
        - 批量化打包(Dataset,Dataloader)
"""
import os
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
from PIL import Image
import torch
from torch import nn
import time
from matplotlib import pyplot as plt
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

# 读取基图像的本信息
root = "gestures"

# 训练集
train_root = os.path.join(root, 'train')
img_train = []
label_train = []
for label in os.listdir(train_root):
    label_root = os.path.join(train_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_train.append(img_path)
        label_train.append(label)
        
# 测试集
test_root = os.path.join(root, 'test')
img_test = []
label_test = []
for label in os.listdir(test_root):
    label_root = os.path.join(test_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_test.append(img_path)
        label_test.append(label)

# 2, 标签字典
label_list = list(set(label_train))
# 原地 + 升序
label_list.sort()
# 构建标签字典
label2idx = {label: idx for idx, label in enumerate(label_list)}
idx2label = {idx: label for label, idx in label2idx.items()}


class GesturesDataset(Dataset):
    """
        自定义数据集
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self,idx):
        img_path = self.X[idx]
        img_label = self.y[idx]
        
        # 1,处理图像
        # 读取图像
        img = Image.open(fp=img_path)
        # 转 为 32 * 32
        img = img.resize((32,32))
        # 转 NumPy数组
        img = np.array(img)
        # 范围 [-1, 1]
        img = img / 255
        img = (img - 0.5) / 0.5
        # 转张量
        img = torch.tensor(data=img, dtype=torch.float32)
        # 转 维度 [H, W, C] ---> [C, H, W]
        img = img.permute(dims=(2, 0, 1))
        
        
        # 2,处理标签
        label = label2idx.get(img_label)
        # 转张量
        label = torch.tensor(data=label, dtype=torch.long)
        
        
        # 返回样本
        return img, label

# 训练集加载器
train_dataset = GesturesDataset(X=img_train, y=label_train)
train_dataloader = DataLoader(dataset=train_dataset, 
                              shuffle=True, 
                              batch_size=8)
# 测试集加载器
test_dataset = GesturesDataset(X=img_test, y=label_test)
test_dataloader = DataLoader(dataset=test_dataset, 
                              shuffle=False, 
                              batch_size=32)

2.2 搭建模型

from models import LeNet
# 32 x 32 
model = LeNet()

2.3 训练模型

# 训练筹备
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device=device)
epochs = 80
lr = 1e-3
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)

# 准确率计算
def get_acc(data_loader):
    accs = []
    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            X = X.to(device=device)
            y = y.to(device=device)
            y_pred = model(X)
            y_pred = y_pred.argmax(dim=-1)
            acc = (y_pred == y).to(torch.float32).mean().item()
            accs.append(acc)
    final_acc = round(number=sum(accs) / len(accs), ndigits=5)
    return final_acc

# 训练过程
def train():
    
    train_accs = []
    test_accs = []
    cur_test_acc = 0
    
    # 1,训练之前,检测一下准确率
    train_acc = get_acc(data_loader=train_dataloader)
    test_acc = get_acc(data_loader=test_dataloader)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    print(f"训练之前:train_acc: {train_acc},test_acc: {test_acc}")
    
    # 每一轮次
    for epoch in range(epochs):
        # 模型设置为 train 模式
        model.train()
        # 计时
        start_train = time.time()
        # 每一批量
        for X, y in train_dataloader:
            # 数据搬家
            X = X.to(device=device)
            y = y.to(device=device)
            # 1,正向传播
            y_pred = model(X)
            # 2,计算损失
            loss = loss_fn(y_pred, y)
            # 3,反向传播
            loss.backward()
            # 4,优化一步
            optimizer.step()
            # 5,清空梯度
            optimizer.zero_grad()
        # 计时结束
        stop_train = time.time()
        # 测试准确率
        train_acc = get_acc(data_loader=train_dataloader)
        test_acc = get_acc(data_loader=test_dataloader)
        train_accs.append(train_acc)
        test_accs.append(test_acc)
        # 保存模型
        if cur_test_acc < test_acc:
            cur_test_acc = test_acc
            # 保存最好模型
            torch.save(obj=model.state_dict(), f="lenet_best.pt")
        # 保存最后模型
        torch.save(obj=model.state_dict(), f="lenet_last.pt") 
        
        print(f"""当前是第 {epoch + 1} 轮:
                --> train_acc: {train_acc},
                --> test_acc: {test_acc},
                --> elapsed_time: {round(number=stop_train - start_train, ndigits=3)}秒""")
    return train_accs, test_accs


train_accs, test_accs = train()

当前是第 497 轮:
–> train_acc: 0.99457,
–> test_acc: 0.95536,
–> elapsed_time: 1.341秒
当前是第 498 轮:
–> train_acc: 0.99459,
–> test_acc: 0.95759,
–> elapsed_time: 1.347秒
当前是第 499 轮:
–> train_acc: 0.99339,
–> test_acc: 0.95759,
–> elapsed_time: 1.329秒
当前是第 500 轮:
–> train_acc: 0.99219,
–> test_acc: 0.95536,
–> elapsed_time: 1.308秒

2.4 结果分析

plt.plot(train_accs, label="train_acc")
plt.plot(test_accs, label="train_acc")
plt.legend()
plt.grid()
plt.xlabel(xlabel='epoch')
plt.ylabel(ylabel="acc")
plt.title(label="LeNet Training Process")

在这里插入图片描述

2.5 推理过程

# 检测设备
device = "cuda" if torch.cuda.is_available() else "cpu"

# 构建一个模型
m1 = LeNet()
m1.to(device=device)
# 加载权重
m1.load_state_dict(state_dict=torch.load(f="lenet_best.pt", map_location=device),
                   strict=False)

def infer(img_path):
    """
        输入:图像地址
        输出:预测类别
    """
    # 1,读取图像
    if not os.path.exists(img_path):
        raise FileNotFoundError("文件没找到")
    
    # 2, 判断当前局部变量中是否有model
    if "m1" not in globals() or not isinstance(globals()["m1"], LeNet):
        raise ValueError("m1模型不存在")
        
    # 3,读取图像
    img = Image.open(fp=img_path)
        
    # 4,预处理
    img = img.resize((32, 32))
    img = np.array(img)
    img = img / 255
    img = (img - 0.5) / 0.5
    
    # 5, 转张量
    img = torch.tensor(data=img, dtype=torch.float32)
    
    # 6, 转换维度
    img = img.permute(dims=(2, 0, 1))
    
    # 7, 新增一个批量维度
    img = img.unsqueeze(dim=0)
    
    # 8,数据搬家
    img = img.to(device=device)
    
    # 9,模型设为评估模式
    model.eval()
    
    # 10,无梯度环境
    with torch.no_grad():
        # 11,正向传播
        y_pred = m1(img)
        
        # 12, 解析结果
        y_pred = y_pred.argmax(dim=-1).item()
        
        # 13,标签转换
        label = idx2label.get(y_pred)
        
        # 14, 返回结果
        return label

‘G5’

3. Vgg16 手势识别

  • Vgg 最多搭建到19层,模型再深就出问题了,梯度无法回传,梯度消失了。后面被 Resnet 拯救了。

请添加图片描述

"""
    1,数据打包
        - [N, C, H, W]
        - 读取原始图像信息(每个图像的路径及其类别)
        - 批量化打包(Dataset,Dataloader)
"""
import os
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
from PIL import Image
import torch
from torch import nn
import time
from matplotlib import pyplot as plt
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

# 1,读取基图像的本信息
root = "gestures"

# 1,训练集
train_root = os.path.join(root, 'train')
img_train = []
label_train = []
for label in os.listdir(train_root):
    label_root = os.path.join(train_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_train.append(img_path)
        label_train.append(label)
        
# 2,测试集
test_root = os.path.join(root, 'test')
img_test = []
label_test = []
for label in os.listdir(test_root):
    label_root = os.path.join(test_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_test.append(img_path)
        label_test.append(label)

# 2, 标签字典
label_list = list(set(label_train))
# 原地 + 升序
label_list.sort()
# 构建标签字典
label2idx = {label: idx for idx, label in enumerate(label_list)}
idx2label = {idx: label for label, idx in label2idx.items()}

# 3,自定义数据集
class GesturesDataset(Dataset):
    """
        自定义数据集
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self,idx):
        img_path = self.X[idx]
        img_label = self.y[idx]
        
        # 1,处理图像
        # 读取图像
        img = Image.open(fp=img_path)
        # 转 为 32 * 32
        img = img.resize((224,224))
        # 转 NumPy数组
        img = np.array(img)
        # 范围 [-1, 1]
        img = img / 255
        img = (img - 0.5) / 0.5
        # 转张量
        img = torch.tensor(data=img, dtype=torch.float32)
        # 转 维度 [H, W, C] ---> [C, H, W]
        img = img.permute(dims=(2, 0, 1))
        
        
        # 2,处理标签
        label = label2idx.get(img_label)
        # 转张量
        label = torch.tensor(data=label, dtype=torch.long)
        
        
        # 返回样本
        return img, label

# 训练集加载器
train_dataset = GesturesDataset(X=img_train, y=label_train)
train_dataloader = DataLoader(dataset=train_dataset, 
                              shuffle=True, 
                              batch_size=12)
                              
# 测试集加载器
test_dataset = GesturesDataset(X=img_test, y=label_test)
test_dataloader = DataLoader(dataset=test_dataset, 
                              shuffle=False, 
                              batch_size=12)
                              
from models import Vgg16
# 224 x 224
model = Vgg16(n_classes=10)

# 训练筹备
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device=device)
epochs = 10
lr = 1e-3
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)


# 准确率计算
def get_acc(data_loader):
    accs = []
    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            X = X.to(device=device)
            y = y.to(device=device)
            y_pred = model(X)
            y_pred = y_pred.argmax(dim=-1)
            acc = (y_pred == y).to(torch.float32).mean().item()
            accs.append(acc)
    final_acc = round(number=sum(accs) / len(accs), ndigits=5)
    return final_acc

# 训练过程
def train():
    
    train_accs = []
    test_accs = []
    cur_test_acc = 0
    
    # 1,训练之前,检测一下准确率
    train_acc = get_acc(data_loader=train_dataloader)
    test_acc = get_acc(data_loader=test_dataloader)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    print(f"训练之前:train_acc: {train_acc},test_acc: {test_acc}")
    
    # 每一轮次
    for epoch in range(epochs):
        # 模型设置为 train 模式
        model.train()
        # 计时
        start_train = time.time()
        # 每一批量
        for X, y in train_dataloader:
            # 数据搬家
            X = X.to(device=device)
            y = y.to(device=device)
            # 1,正向传播
            y_pred = model(X)
            # 2,计算损失
            loss = loss_fn(y_pred, y)
            # 3,反向传播
            loss.backward()
            # 4,优化一步
            optimizer.step()
            # 5,清空梯度
            optimizer.zero_grad()
        # 计时结束
        stop_train = time.time()
        # 测试准确率
        train_acc = get_acc(data_loader=train_dataloader)
        test_acc = get_acc(data_loader=test_dataloader)
        train_accs.append(train_acc)
        test_accs.append(test_acc)
        # 保存模型
        if cur_test_acc < test_acc:
            cur_test_acc = test_acc
            # 保存最好模型
            torch.save(obj=model.state_dict(), f="vgg16_best.pt")
        # 保存最后模型
        torch.save(obj=model.state_dict(), f="vgg16_last.pt") 
        
        print(f"""当前是第 {epoch + 1} 轮:
                --> train_acc: {train_acc},
                --> test_acc: {test_acc},
                --> elapsed_time: {round(number=stop_train - start_train, ndigits=3)}秒""")
    return train_accs, test_accs

train_accs, test_accs = train()

训练之前:train_acc: 0.09832,test_acc: 0.11765
当前是第 1 轮:
–> train_acc: 0.66367,
–> test_acc: 0.65686,
–> elapsed_time: 18.589秒
当前是第 2 轮:
–> train_acc: 0.78897,
–> test_acc: 0.75735,
–> elapsed_time: 18.167秒
当前是第 3 轮:
–> train_acc: 0.92206,
–> test_acc: 0.89461,
–> elapsed_time: 18.214秒
当前是第 4 轮:
–> train_acc: 0.96163,
–> test_acc: 0.92892,
–> elapsed_time: 18.928秒
当前是第 5 轮:
–> train_acc: 0.97782,
–> test_acc: 0.94363,
–> elapsed_time: 18.666秒
当前是第 6 轮:
–> train_acc: 0.97242,
–> test_acc: 0.93137,
–> elapsed_time: 18.443秒
当前是第 7 轮:
–> train_acc: 0.994,
–> test_acc: 0.95343,
–> elapsed_time: 18.449秒
当前是第 8 轮:
–> train_acc: 0.9982,
–> test_acc: 0.97059,
–> elapsed_time: 18.66秒
当前是第 9 轮:
–> train_acc: 0.9982,
–> test_acc: 0.94853,
–> elapsed_time: 18.459秒
当前是第 10 轮:
–> train_acc: 0.9994,
–> test_acc: 0.96569,
–> elapsed_time: 18.508秒

plt.plot(train_accs, label="train_acc")
plt.plot(test_accs, label="train_acc")
plt.legend()
plt.grid()
plt.xlabel(xlabel='epoch')
plt.ylabel(ylabel="acc")
plt.title(label="Vgg16 Training Process")

在这里插入图片描述

4. ResNet50 手势识别

"""
    1,数据打包
        - [N, C, H, W]
        - 读取原始图像信息(每个图像的路径及其类别)
        - 批量化打包(Dataset,Dataloader)
"""
import os
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
from PIL import Image
import torch
from torch import nn
import time
from matplotlib import pyplot as plt
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

# 1,读取基图像的本信息
root = "gestures"

# 1,训练集
train_root = os.path.join(root, 'train')
img_train = []
label_train = []
for label in os.listdir(train_root):
    label_root = os.path.join(train_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_train.append(img_path)
        label_train.append(label)
        
# 2,测试集
test_root = os.path.join(root, 'test')
img_test = []
label_test = []
for label in os.listdir(test_root):
    label_root = os.path.join(test_root, label)
    for img in os.listdir(label_root):
        img_path = os.path.join(label_root, img)
        img_test.append(img_path)
        label_test.append(label)

# 2, 标签字典
label_list = list(set(label_train))
# 原地 + 升序
label_list.sort()
# 构建标签字典
label2idx = {label: idx for idx, label in enumerate(label_list)}
idx2label = {idx: label for label, idx in label2idx.items()}


# 3,自定义数据集
class GesturesDataset(Dataset):
    """
        自定义数据集
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self,idx):
        img_path = self.X[idx]
        img_label = self.y[idx]
        
        # 1,处理图像
        # 读取图像
        img = Image.open(fp=img_path)
        # 转 为 32 * 32
        img = img.resize((224, 224))
        # 转 NumPy数组
        img = np.array(img)
        # 范围 [-1, 1]
        img = img / 255
        img = (img - 0.5) / 0.5
        # 转张量
        img = torch.tensor(data=img, dtype=torch.float32)
        # 转 维度 [H, W, C] ---> [C, H, W]
        img = img.permute(dims=(2, 0, 1))
        
        
        # 2,处理标签
        label = label2idx.get(img_label)
        # 转张量
        label = torch.tensor(data=label, dtype=torch.long)
        
        
        # 返回样本
        return img, label


# 训练集加载器
train_dataset = GesturesDataset(X=img_train, y=label_train)
train_dataloader = DataLoader(dataset=train_dataset, 
                              shuffle=True, 
                              batch_size=16)
# 测试集加载器
test_dataset = GesturesDataset(X=img_test, y=label_test)
test_dataloader = DataLoader(dataset=test_dataset, 
                              shuffle=False, 
                              batch_size=16)

from models import ResNet50
# 224 x 224
model = ResNet50(n_classes=10)

# 训练筹备
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device=device)
epochs = 10
lr = 1e-3
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)

# 准确率计算
def get_acc(data_loader):
    accs = []
    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            X = X.to(device=device)
            y = y.to(device=device)
            y_pred = model(X)
            y_pred = y_pred.argmax(dim=-1)
            acc = (y_pred == y).to(torch.float32).mean().item()
            accs.append(acc)
    final_acc = round(number=sum(accs) / len(accs), ndigits=5)
    return final_acc


# 训练过程
def train():
    
    train_accs = []
    test_accs = []
    cur_test_acc = 0
    
    # 1,训练之前,检测一下准确率
    train_acc = get_acc(data_loader=train_dataloader)
    test_acc = get_acc(data_loader=test_dataloader)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    print(f"训练之前:train_acc: {train_acc},test_acc: {test_acc}")
    
    # 每一轮次
    for epoch in range(epochs):
        # 模型设置为 train 模式
        model.train()
        # 计时
        start_train = time.time()
        # 每一批量
        for X, y in train_dataloader:
            # 数据搬家
            X = X.to(device=device)
            y = y.to(device=device)
            # 1,正向传播
            y_pred = model(X)
            # 2,计算损失
            loss = loss_fn(y_pred, y)
            # 3,反向传播
            loss.backward()
            # 4,优化一步
            optimizer.step()
            # 5,清空梯度
            optimizer.zero_grad()
        # 计时结束
        stop_train = time.time()
        # 测试准确率
        train_acc = get_acc(data_loader=train_dataloader)
        test_acc = get_acc(data_loader=test_dataloader)
        train_accs.append(train_acc)
        test_accs.append(test_acc)
        # 保存模型
        if cur_test_acc < test_acc:
            cur_test_acc = test_acc
            # 保存最好模型
            torch.save(obj=model.state_dict(), f="resnet50_best.pt")
        # 保存最后模型
        torch.save(obj=model.state_dict(), f="resnet50_last.pt") 
        
        print(f"""当前是第 {epoch + 1} 轮:
                --> train_acc: {train_acc},
                --> test_acc: {test_acc},
                --> elapsed_time: {round(number=stop_train - start_train, ndigits=3)}秒""")
    return train_accs, test_accs

train_accs, test_accs = train()
  • 没有训练很多轮,没耐心等他了。

训练之前:train_acc: 0.10072,test_acc: 0.09804
当前是第 1 轮:
–> train_acc: 0.20564,
–> test_acc: 0.19608,
–> elapsed_time: 8.413秒
当前是第 2 轮:
–> train_acc: 0.22542,
–> test_acc: 0.25,
–> elapsed_time: 8.083秒
当前是第 3 轮:
–> train_acc: 0.30276,
–> test_acc: 0.27451,
–> elapsed_time: 8.087秒
当前是第 4 轮:
–> train_acc: 0.43106,
–> test_acc: 0.45098,
–> elapsed_time: 8.054秒
当前是第 5 轮:
–> train_acc: 0.48681,
–> test_acc: 0.47304,
–> elapsed_time: 8.034秒
当前是第 6 轮:
–> train_acc: 0.53177,
–> test_acc: 0.52696,
–> elapsed_time: 8.025秒
当前是第 7 轮:
–> train_acc: 0.68165,
–> test_acc: 0.63971,
–> elapsed_time: 8.23秒
当前是第 8 轮:
–> train_acc: 0.71403,
–> test_acc: 0.67647,
–> elapsed_time: 8.168秒
当前是第 9 轮:
–> train_acc: 0.83153,
–> test_acc: 0.78922,
–> elapsed_time: 8.169秒
当前是第 10 轮:
–> train_acc: 0.85012,
–> test_acc: 0.7598,
–> elapsed_time: 8.161秒

  • 结果分析
plt.plot(train_accs, label="train_acc")
plt.plot(test_accs, label="train_acc")
plt.legend()
plt.grid()
plt.xlabel(xlabel='epoch')
plt.ylabel(ylabel="acc")
plt.title(label="ResNet50 Training Process")

# 参数量
n_params = 0
for param in model.parameters():
    n_params += param.numel()
n_params   # : 23528522

在这里插入图片描述

  • 10
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

MechMaster

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值