pytorch 实现alexNet + imagenet 部分数据

文章:《ImageNet Classification with Deep Convolutional Neural Networks》

        个人认为alexNet 真正的将深度学习带热了,当时的有非常大数据库 imagenet 1000类,硬件性能也大幅提升 如gpu。简直可以成为天时地利人和。 alexNet 的效果也是不负众望,当时的imageNet 分类比赛中遥遥领先第二名。 

        alexNet 在网络结构中只要引入了relu 层 和dropout层,同时进行了Data augmentation ,都是针对大数据过拟合,从而使百万级大数据的训练成为了可能。数据的增加主要是包括 随机crop,resize ,旋转,亮度,饱和度等调节,尽可能的增加数据量,深度学习中有句话叫 “数据为王”,没有数据再好的算法也没用。

        关于relu  函数为 f(x)=max(0,x) , 在此之前主要是通过sigmod来计算节点,该方法存在梯度消失和梯度爆炸的风险。relu能很大程度上解决这个问题,现在对relu 有好多改进, 如prelu ,lrelu。主要是针对x<0 的情况。

      关于dropout 网络已百分比丢弃节点,赋值0,目的是使节点变得稀疏,防止过拟合现象。

     alexnet 网络如下:

    


再说pytorch ,最近比较火热,一些大神都在力推。大致学习了下网络代码如下:



import torch
import torch.backends.cudnn as cudnn
import cv2
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
root="../alexnet/"

# -----------------ready the dataset--------------------------
def opencvLoad(imgPath,resizeH,resizeW):
    image = cv2.imread(imgPath)
    image = cv2.resize(image, (resizeH, resizeW), interpolation=cv2.INTER_CUBIC)
    image = image.astype(np.float32)
    image = np.transpose(image, (2, 1, 0))  
    image = torch.from_numpy(image)
    return image
    
class LoadPartDataset(Dataset):
    def __init__(self, txt):
        fh = open(txt, 'r')
        fh = open(txt, 'r')
        imgs = []
        for line in fh:
            line = line.strip('\n')
            line = line.rstrip()
            words = line.split()
            labelList = int(words[1])
            imageList = words[0]
            imgs.append((imageList, labelList))
        self.imgs = imgs
            
    def __getitem__(self, item):
        image, label = self.imgs[item]
        img = opencvLoad(image,227,227)
        return img,label
    def __len__(self):
        return len(self.imgs)
        
def loadTrainData(txt=None):
    fh = open(txt, 'r')
    imgs = []
    for line in fh:
        line = line.strip('\n')
        line = line.rstrip()
        words = line.split()
        label = int(words[1])
        image = cv2.imread(words[0])
        image = cv2.resize(image, (227, 227), interpolation=cv2.INTER_CUBIC)
        image = image.astype(np.float32)
        image = np.transpose(image, (2, 1, 0))  
        image = torch.from_numpy(image)
        imgs.append((image, label))
    return imgs

# trainSet=loadTrainData(txt=root+'train.txt')
# test_data=loadTrainData(txt=root+'train.txt')
trainSet =LoadPartDataset(txt=root+'train.txt')
test_data=LoadPartDataset(txt=root+'train.txt')
train_loader = DataLoader(dataset=trainSet, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=64)


#-----------------create the Net and training------------------------

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(3, 96, 11, 4, 0),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2)
        )
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(96, 256, 5, 1, 2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2)
        )
        self.conv3 = torch.nn.Sequential(
            torch.nn.Conv2d(256,384, 3, 1, 1),
            torch.nn.ReLU(),
        )
        self.conv4 = torch.nn.Sequential(
            torch.nn.Conv2d(384,384, 3, 1, 1),
            torch.nn.ReLU(),
        )
        self.conv5 = torch.nn.Sequential(
            torch.nn.Conv2d(384,256, 3, 1, 1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(3,2)
        )
        self.dense = torch.nn.Sequential(
            torch.nn.Linear(9216, 4096),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 4096),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(4096, 50)
        )

    def forward(self, x):
        conv1_out = self.conv1(x)
        conv2_out = self.conv2(conv1_out)
        conv3_out = self.conv3(conv2_out)
        conv4_out = self.conv4(conv3_out)
        conv5_out = self.conv5(conv4_out)
        res = conv5_out.view(conv5_out.size(0), -1)
        out = self.dense(res)
        return out


model = Net()

finetune = None
finetune = r'./model/_iter_99.pth'

if finetune is not None:
    print '[0] Load Model {}'.format(finetune)

    pretrained_dict = model.state_dict()
    finetune_dict = torch.load(finetune)

    # model_dict = torch.load(finetune)
    # pretrained_dict = net.state_dict()

    model_dict = {k: v for k, v in finetune_dict.items() if k in pretrained_dict}
    pretrained_dict.update(model_dict)

    model.load_state_dict(pretrained_dict)

model = torch.nn.DataParallel(model, device_ids=[0, 1])
model.cuda()
cudnn.benchmark = True
print(model)


#optimizer = torch.optim.Adam(model.parameters())
#loss_func = torch.nn.CrossEntropyLoss()

# updata net
lr = 1e-5
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(list(model.parameters())[:], lr=lr, momentum=0.9)

for epoch in range(10000):
    print('epoch {}'.format(epoch + 1))
    # training-----------------------------
    train_loss = 0.
    train_acc = 0.
    for trainData, trainLabel in train_loader:
        trainData, trainLabel = Variable(trainData.cuda()), Variable(trainLabel.cuda())
        out = model(trainData)
        loss = loss_func(out, trainLabel)
        train_loss += loss.data[0]
        pred = torch.max(out, 1)[1]
        train_correct = (pred == trainLabel).sum()
        train_acc += train_correct.data[0]
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
  #  if epoch % 100 == 0:
    print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
        trainSet)), train_acc / (len(trainSet))))

    if (epoch + 1) % 10 == 0:
        sodir = './model/_iter_{}.pth'.format(epoch)
        print '[5] Model save {}'.format(sodir)
        torch.save(model.module.state_dict(), sodir)

    # adjust
    if (epoch + 1)% 100 == 0:
        lr = lr / 10
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # evaluation--------------------------------
    # model.eval()
    # eval_loss = 0.
    # eval_acc = 0.
    # for trainData, trainLabel in test_loader:
    #     trainData, trainLabel = Variable(trainData, volatile=True), Variable(trainLabel, volatile=True)
    #     out = model(trainData)
    #     loss = loss_func(out, trainData)
    #     eval_loss += loss.data[0]
    #     pred = torch.max(out, 1)[1]
    #     num_correct = (pred == trainData).sum()
    #     eval_acc += num_correct.data[0]
    # print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
    #     test_data)), eval_acc / (len(test_data))))
       

图片集太大就不上传了。

网络下载地址:https://download.csdn.net/download/eatapples/10394160

  • 3
    点赞
  • 25
    收藏
    觉得还不错? 一键收藏
  • 20
    评论
AlexNet是针对ImageNet数据集的深度卷积神经网络模型,而MNIST数据集是一个手写数字图像数据集,规模较小。因此,AlexNet在MNIST上的应用并不是很有意义,但是我们可以通过实现类似AlexNet的网络结构来学习和掌握深度学习模型的搭建过程。 下面是一个使用PyTorch实现AlexNet网络结构,基于此可以进行修改以适应MNIST数据集: ```python import torch.nn as nn class AlexNet(nn.Module): def __init__(self, num_classes=1000): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x ``` 上述代码中,我们基于PyTorch的`nn.Module`类定义了一个AlexNet的网络结构,包含`features`和`classifier`两个子网络。其中,`features`子网络由多个卷积层和池化层组成,用于提取图像特征,`classifier`子网络由多个全连接层组成,用于分类。在`__init__`函数中,我们定义了网络结构,并在`forward`函数中实现了前向传播过程。 针对MNIST数据集,我们需要对上述代码进行修改,主要修改如下: 1. 输入通道数为1,修改第一层卷积层的输入通道数为1; 2. 输出类别数为10,修改最后一层全连接层的输出类别数为10; 3. 修改输入图像大小,MNIST数据集中的图像大小为28x28,需要将第一层卷积层的输入大小修改为`nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2)`。 修改后的代码如下: ```python import torch.nn as nn class AlexNet_mnist(nn.Module): def __init__(self, num_classes=10): super(AlexNet_mnist, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = x.view(x.size(0), 256 * 6 * 6) x = self.classifier(x) return x ``` 在完成代码的编写后,我们需要对模型进行训练和测试。这里以PyTorch官方提供的MNIST数据集为例,代码如下: ```python import torch import torch.nn as nn import torch.optim as optim import torchvision import torchvision.transforms as transforms # 定义超参数 lr = 0.01 momentum = 0.9 batch_size = 128 num_epochs = 10 # 加载数据集 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2) testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2) # 定义模型、损失函数和优化器 net = AlexNet_mnist() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum) # 训练模型 for epoch in range(num_epochs): running_loss = 0.0 for i, data in enumerate(trainloader, 0): inputs, labels = data optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 100 == 99: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100)) running_loss = 0.0 # 测试模型 correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total)) ``` 在上述代码中,我们首先定义了超参数,然后使用PyTorch提供的`torchvision.datasets.MNIST`类加载了MNIST数据集,接着定义了模型、损失函数和优化器。在训练模型的过程中,我们对模型进行前向传播、计算损失、反向传播和优化器更新参数等操作。在测试模型的过程中,我们使用测试集对模型进行评估,并计算模型的分类准确率。
评论 20
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值