AlexNet复现在mini-imagenet以及fashionMNIST上的表现

网络上也有很多AlexNet 复现的博客,并且都在mini-imageNet上取得了非常高的准确率,甚至有人轻轻松松干到80%,但是我自己复现的时候发现有严重过拟合的问题,在非迁移学习的情况下只有46%的准确率。

在FashionMNIST上进行测试

fashionMNIST数据集train set为60000个样本,eval set为10000个样本

相关代码
#===============the AlexNet===============
class CAlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        #conv layer1
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2)
        #conv layer2
        self.conv2 = nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2)
        #conv layer3
        self.conv3 = nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1)
        #conv layer4
        self.conv4 = nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1)
        #conv layer5
        self.conv5 = nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=3,stride=2)
        self.relu = nn.ReLU()
        
        self.feature = nn.Sequential(
            self.conv1,
            self.relu,
            self.pool1,
            self.conv2,
            self.relu,
            self.pool2,
            self.conv3,
            self.relu,
            self.conv4,
            self.relu,
            self.conv5,
            self.relu,
            self.pool5,
        )
        
        self.fc1 = nn.Linear(in_features=9216,out_features=4096)
        self.fc2 = nn.Linear(in_features=4096,out_features=4096)
        self.fc3 = nn.Linear(in_features=4096,out_features=10)
        self.flat = nn.Flatten()
        self.linear_relu_stack= nn.Sequential(
            nn.Dropout(0.1),
            self.fc1,
            nn.ReLU(),
            nn.Dropout(0.1),
            self.fc2,
            nn.ReLU(),
            self.fc3,                
        )
        
    
    def forward(self,x):
        x = self.feature(x)
        x = self.flat(x)
        x = self.linear_relu_stack(x)
        return x
    

def train_epoch(model,loss,optimizer,dataloader,epoch):
    model.train()
    total_loss = 0
    total_accu = 0
    for batch_num,(imgs,labels) in enumerate(dataloader):
            imgs = imgs.expand(-1,3,-1,-1)
            imgs = imgs.to(device)
            labels = labels.to(device)
            predicts = model(imgs)
            loss_value = loss(predicts,labels)           
            total_loss += loss_value.item()
            correct_num = (predicts.argmax(dim=1)==labels).sum().item()
            total_accu+= (correct_num/labels.shape[0])
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            print(f"{epoch}-th epoch batch num:{batch_num}   loss:{loss_value}")
    avg_loss = total_loss/(batch_num+1)
    avg_accu = total_accu/(batch_num+1)
    torch.save(model.state_dict(),"myAlexNet"+str(epoch)+"loss"+str(avg_loss)[0:6]+"__accu"+str(avg_accu)[0:6]+"epoch0.01.pth")
    return avg_loss,avg_accu

def eval_epoch(model,loss,dataloader):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        total_accu = 0
        for batch_num,(imgs,labels) in enumerate(dataloader):
                imgs = imgs.expand(-1,3,-1,-1)
                imgs = imgs.to(device)
                labels = labels.to(device)
                predicts = model(imgs)
                loss_value = loss(predicts,labels)
                correct_num = (predicts.argmax(dim=1)==labels).sum().item()
                total_accu += (correct_num/predicts.shape[0])
                total_loss += loss_value.item()
        avg_loss = total_loss/(batch_num+1)
        avg_accu = total_accu/(batch_num+1)
        return avg_loss,avg_accu

def train_process(model,loss,optimizer,train_loader,eval_loader,epochs,lr,writer):
    for epoch in range(epochs):
        avg_loss_train,avg_accu_train = train_epoch(model,loss,optimizer,train_loader,epoch)
        avg_loss_eval,avg_accu_eval = eval_epoch(model,loss,eval_loader)
        writer.add_scalars(main_tag ="loss train vs eval", tag_scalar_dict={"train":torch.tensor(avg_loss_train),"eval":torch.tensor(avg_loss_eval)},global_step=epoch)
        writer.add_scalars(main_tag ="accuracy train vs eval", tag_scalar_dict={"train":torch.tensor(avg_accu_train),"eval":torch.tensor(avg_accu_eval)},global_step=epoch)
       

 上面代码是模型的定义和训练过程每个epoch的定义,包括有损失和准确率的统计。

 #download the Fashion MNIST data
    transforms_MNIST = transforms.Compose([
        transforms.Resize(227),
        transforms.ToTensor(),
        # transforms.RandomCrop(227),
    ])
    train_data_MNIST = datasets.FashionMNIST(
        root="./../data",
        train= True,
        download=True,
        transform=transforms_MNIST,
    )

    test_data_MNIST = datasets.FashionMNIST(
        root="./../data",
        train= False,
        download=False,
        transform=transforms_MNIST,
    )
    
    train_loader_MNIST = DataLoader(
        dataset=train_data_MNIST,
        batch_size=100,
        shuffle=True,
        num_workers=5,
    ) 
    imgs,labels = next(iter(train_loader_MNIST))
        
    test_loader_MNIST = DataLoader(
        dataset=test_data_MNIST,
        batch_size=100,
        shuffle=True,
        num_workers=5,
    ) 
    
    writer = SummaryWriter('MNISTlog')
    lr = 0.03
    epochs = 20
    loss = nn.CrossEntropyLoss().to(device=device)
    optimizer = torch.optim.SGD(params= myAlexNet.parameters(),lr= lr,weight_decay=0.001)
    train_process(myAlexNet,loss,optimizer,train_loader_MNIST,test_loader_MNIST,epochs,lr,writer)
    writer.close()

 上面这部分的代码是数据加载和整个模型训练的代码。

可视化训练过程中的损失和准确率

通过下面loss和accu可知,在step=10的时候loss在eval data上不下降,但是在train data上还在继续下降,这时候过拟合已经开始逐渐产生。在eval data上的准确率在90%的水平。

 

在mini-imageNet上进行训练测试

我找的mini-imageNet为100个类别,分辨率是原始分辨率(没有做下采样)每个类别是600个样本,在下面的训练测试时,我用95%作为训练集,5%作为eval  set.

相关代码

import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchvision import transforms
from torchvision.datasets import FashionMNIST
from torch.utils.tensorboard import SummaryWriter

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRU"
from torchvision.io import read_image
from torchvision.io import ImageReadMode
import matplotlib.pyplot as plt
from torchvision.transforms import v2

from torch import nn

from torchvision.models import AlexNet,AlexNet_Weights

import random
random.seed(0)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.benchmark = True

index_list = list(range(0,60000))
random.shuffle(index_list)


#================TinyImgNet=================
class CTinyImgNetTrain(Dataset):
    def __init__(self,data_dir,index_list):
        #the data root dir
        self.data_dir = data_dir
        self.index_list = index_list
        # the dict mapping from the class name to class index
        self.class_name2class_index = {}
        self.class_names = []
        wnids_file = open(data_dir+"/classnames.txt")
        class_name_lines = wnids_file.readlines()
        for index,class_name_line in enumerate(class_name_lines):
            self.class_name2class_index[class_name_line.split("\n")[0]]=index
            self.class_names.append(class_name_line.split("\n")[0])
        
        # the dict from the class name to words
        self.class_name2words = {}
        words_file = open(data_dir+"/words.txt")
        words_lines = words_file.readlines()
        for words_line in words_lines:
            words_info = words_line.split("\n")[0].split("\t")
            self.class_name2words[words_info[0]] = words_info[1]
        self.train_img_dirs = []
        self.train_img_labels = []
        self.train_dir = self.data_dir + "/train"
        for class_name_dir in os.listdir(path=self.train_dir):
            cur_class_dir = self.train_dir+"/"+class_name_dir
            for dir_tmp in os.listdir(cur_class_dir):
                if dir_tmp.endswith(".txt"):
                    cur_class_annotations_file = open(cur_class_dir+"/"+dir_tmp) 
                    cur_class_anno_infos = cur_class_annotations_file.readlines()
            
                    for line_info in cur_class_anno_infos:
                        img_name = line_info.split("\n")[0].split("\t")[0]
                        img_dir = cur_class_dir+"/images/"+img_name
                        #the img dir
                        self.train_img_dirs.append(img_dir)
                        #the img label
                        self.train_img_labels.append(self.class_name2class_index[class_name_dir])
    
        #shuffle the training data set,splite 1/10 to be eval0 data set to check the overfitting problem
        split_rate = 0.95
        selected_img_dirs = []
        selected_img_labels = []
        for i,index in enumerate(self.index_list):
            if i< split_rate*len(self.index_list):
                # if index >= len(self.train_img_dirs):
                    # print(f"index:{index} len:{len(self.train_img_dirs)}")
                selected_img_dirs.append(self.train_img_dirs[index])
                selected_img_labels.append(self.train_img_labels[index])
                
        self.train_img_dirs = selected_img_dirs
        self.train_img_labels = selected_img_labels
    
    
    def label2ClassName(self,label):
        return self.class_names[label]
        
    def className2Words(self,class_name):
        return self.class_name2words[class_name]
    
    def label2words(self,label):
        return self.className2Words(self.label2ClassName(label))            
    
    def __len__(self):
        return len(self.train_img_dirs)
    
    
    def __getitem__(self,index):
        img = read_image(self.train_img_dirs[index],ImageReadMode.RGB) 
        
        img_target_size = 227
        # resize_scale = img_target_size/min(img.shape[1],img.shape[2])
        # resize_width = resize_scale*max(img.shape[1],img.shape[2])
        resize_scale = img_target_size/max(img.shape[1],img.shape[2])
        height_size = int(img.shape[1]*resize_scale)
        width_size = int(img.shape[2]*resize_scale)
        
        # transform the imput img to float32 and resize the shape is crutial
        #(if you want the shape keep ration,need other steps)
        transforms = v2.Compose([
            v2.Resize((height_size,width_size)),
            v2.RandomCrop(size=img_target_size,padding= 1, pad_if_needed=True,fill=(255,0,255),padding_mode="constant"),
            v2.RandomHorizontalFlip(),
            v2.ToDtype(dtype=torch.float32,scale=True),
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        label = self.train_img_labels[index]
        img = transforms(img)
        return img,label

class CTinyImgNetEval(Dataset):
    def __init__(self,data_dir,index_list):
        self.index_list =index_list
        #the data root dir
        self.data_dir = data_dir
        # the dict mapping from the class name to class index
        self.class_name2class_index = {}
        self.class_names = []
        wnids_file = open(data_dir+"/classnames.txt")
        class_name_lines = wnids_file.readlines()
        for index,class_name_line in enumerate(class_name_lines):
            self.class_name2class_index[class_name_line.split("\n")[0]]=index
            self.class_names.append(class_name_line.split("\n")[0])
        
        # the dict from the class name to words
        self.class_name2words = {}
        words_file = open(data_dir+"/words.txt")
        words_lines = words_file.readlines()
        for words_line in words_lines:
            words_info = words_line.split("\n")[0].split("\t")
            self.class_name2words[words_info[0]] = words_info[1]
        
        self.train_img_dirs = []
        self.train_img_labels = []
        self.train_dir = self.data_dir + "/train"
        for class_name_dir in os.listdir(path=self.train_dir):
            cur_class_dir = self.train_dir+"/"+class_name_dir
            for dir_tmp in os.listdir(cur_class_dir):
                if dir_tmp.endswith(".txt"):
                    cur_class_annotations_file = open(cur_class_dir+"/"+dir_tmp) 
                    cur_class_anno_infos = cur_class_annotations_file.readlines()
            
                    for line_info in cur_class_anno_infos:
                        img_name = line_info.split("\n")[0].split("\t")[0]
                        img_dir = cur_class_dir+"/images/"+img_name
                        #the img dir
                        self.train_img_dirs.append(img_dir)
                        #the img label
                        self.train_img_labels.append(self.class_name2class_index[class_name_dir])
    
        #shuffle the training data set,splite 1/10 to be eval0 data set to check the overfitting problem
        split_rate = 0.95
        selected_img_dirs = []
        selected_img_labels = []
        for i,index in enumerate(self.index_list):
            if i>= split_rate*len(self.index_list):
                selected_img_dirs.append(self.train_img_dirs[index])
                selected_img_labels.append(self.train_img_labels[index])
        self.train_img_dirs = selected_img_dirs
        self.train_img_labels = selected_img_labels
    
    
    def label2ClassName(self,label):
        return self.class_names[label]
        
    def className2Words(self,class_name):
        return self.class_name2words[class_name]
    
    def label2words(self,label):
        return self.className2Words(self.label2ClassName(label))            
    
    def __len__(self):
        return len(self.train_img_dirs)
    
    
    def __getitem__(self,index):
        img = read_image(self.train_img_dirs[index],ImageReadMode.RGB) 
        
        img_target_size = 227
        resize_scale = img_target_size/max(img.shape[1],img.shape[2])
        height_size = int(img.shape[1]*resize_scale)
        width_size = int(img.shape[2]*resize_scale)
        # transform the imput img to float32 and resize the shape is crutial
        #(if you want the shape keep ration,need other steps)
        transforms = v2.Compose([
            v2.Resize((height_size,width_size)),
            # v2.CenterCrop(img_target_size),
            # v2.RandomCrop(img_target_size,1,True),
            v2.RandomCrop(size=img_target_size,padding= 1, pad_if_needed=True,fill=(255,0,255),padding_mode="constant"),
            v2.RandomHorizontalFlip(),
            v2.ToDtype(dtype=torch.float32,scale=True),
            v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        label = self.train_img_labels[index]
        img = transforms(img)
        return img,label
        
    


#===============the AlexNet===============
class CAlexNet(nn.Module):
    def __init__(self):
        super().__init__()
        #conv layer1
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=3,stride=2)
        #conv layer2
        self.conv2 = nn.Conv2d(in_channels=96,out_channels=256,kernel_size=5,stride=1,padding=2)
        self.pool2 = nn.MaxPool2d(kernel_size=3,stride=2)
        #conv layer3
        self.conv3 = nn.Conv2d(in_channels=256,out_channels=384,kernel_size=3,stride=1,padding=1)
        #conv layer4
        self.conv4 = nn.Conv2d(in_channels=384,out_channels=384,kernel_size=3,stride=1,padding=1)
        #conv layer5
        self.conv5 = nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1)
        self.pool5 = nn.MaxPool2d(kernel_size=3,stride=2)
        self.relu = nn.ReLU()
        
        self.feature = nn.Sequential(
            self.conv1,
            self.relu,
            self.pool1,
            self.conv2,
            self.relu,
            self.pool2,
            self.conv3,
            self.relu,
            self.conv4,
            self.relu,
            self.conv5,
            self.relu,
            self.pool5,
        )
        
        self.fc1 = nn.Linear(in_features=9216,out_features=4096)
        self.fc2 = nn.Linear(in_features=4096,out_features=4096)
        self.fc3 = nn.Linear(in_features=4096,out_features=100)
        self.flat = nn.Flatten()
        self.linear_relu_stack= nn.Sequential(
            nn.Dropout(0.5),
            self.fc1,
            nn.ReLU(),
            nn.Dropout(0.5),
            self.fc2,
            nn.ReLU(),
            self.fc3,                
        )
        
    
    def forward(self,x):
        x = self.feature(x)
        x = self.flat(x)
        x = self.linear_relu_stack(x)
        return x
    

def train_epoch(model,loss,optimizer,dataloader,epoch):
    model.train()
    total_loss = 0
    total_accu = 0
    for batch_num,(imgs,labels) in enumerate(dataloader):
            imgs = imgs.to(device)
            labels = labels.to(device)
            predicts = model(imgs)
            loss_value = loss(predicts,labels)           
            total_loss += loss_value.item()
            correct_num = (predicts.argmax(dim=1)==labels).sum().item()
            total_accu+= (correct_num/labels.shape[0])
            optimizer.zero_grad()
            loss_value.backward()
            optimizer.step()
            print(f"{epoch}-th epoch batch num:{batch_num}   loss:{loss_value}")
    avg_loss = total_loss/(batch_num+1)
    avg_accu = total_accu/(batch_num+1)
    torch.save(model.state_dict(),"myAlexNet"+str(epoch)+"loss"+str(avg_loss)[0:6]+"__accu"+str(avg_accu)[0:6]+"epoch0.01.pth")
    return avg_loss,avg_accu

def eval_epoch(model,loss,dataloader):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        total_accu = 0
        for batch_num,(imgs,labels) in enumerate(dataloader):
                imgs = imgs.to(device)
                labels = labels.to(device)
                predicts = model(imgs)
                loss_value = loss(predicts,labels)
                correct_num = (predicts.argmax(dim=1)==labels).sum().item()
                total_accu += (correct_num/predicts.shape[0])
                total_loss += loss_value.item()
        avg_loss = total_loss/(batch_num+1)
        avg_accu = total_accu/(batch_num+1)
        return avg_loss,avg_accu

def train_process(model,loss,optimizer,train_loader,eval_loader,epochs,lr,writer):
    for epoch in range(epochs):
        avg_loss_train,avg_accu_train = train_epoch(model,loss,optimizer,train_loader,epoch)
        avg_loss_eval,avg_accu_eval = eval_epoch(model,loss,eval_loader)
        writer.add_scalars(main_tag ="loss train vs eval", tag_scalar_dict={"train":torch.tensor(avg_loss_train),"eval":torch.tensor(avg_loss_eval)},global_step=epoch)
        writer.add_scalars(main_tag ="accuracy train vs eval", tag_scalar_dict={"train":torch.tensor(avg_accu_train),"eval":torch.tensor(avg_accu_eval)},global_step=epoch)
       
if __name__ == '__main__':
    #========data set & data loader===============
    tiny_img_net_dir = "E:/DeepLearning/data/mimi_imgNet224x224"
    train_set = CTinyImgNetTrain(tiny_img_net_dir,index_list)
    train_loader = DataLoader(
        dataset= train_set,
        batch_size=100,
        shuffle=True,
        num_workers=5,
        pin_memory=True,
        pin_memory_device="cuda:0"
    )

    #===========load and test the data=================
    imgs,labels = next(iter(train_loader))
    print(f"imgs shapes:{imgs.shape}")
    print(f"labels shape:{labels.shape}")

    imgs = imgs.permute(0,2,3,1)
    fig = plt.figure(figsize=[25,25])
    for i in range(1,26):
        img = imgs[i,:,:,:]
        fig.add_subplot(5,5,i)
        plt.axis("off")
        label = labels[i]
        words = train_set.label2words(label)
        plt.title(words)
        plt.imshow(img)
    plt.show()
    
    myAlexNet = CAlexNet()
    print(f"{myAlexNet}")
        
    myAlexNet = myAlexNet.to(device)
    # myAlexNet.load_state_dict(torch.load("myAlexNet47loss0.53epoch0.01.pth"))
    
    #the eval loader
    eval0_set = CTinyImgNetEval(data_dir=tiny_img_net_dir,index_list=index_list)
    eval0_loader = DataLoader(
        dataset=eval0_set,
        batch_size=100,
        shuffle=False,
        num_workers=5,
    ) 
    
    
    writer = SummaryWriter('mini-imageNetlog')
    lr = 0.03
    epochs = 30
    loss = nn.CrossEntropyLoss().to(device=device)
    optimizer = torch.optim.SGD(params= myAlexNet.parameters(),lr= lr,weight_decay=0.003)
    train_process(myAlexNet,loss,optimizer,train_loader,eval0_loader,epochs,lr,writer)
    writer.close()

   
可视化训练过程中的损失和准确率 

从下面的可视化loss可见在36个epoch的时候基本不再下降,准确率也基本没有太大提升,过拟合的现象开始出现。在网络中设置的dropout为0.5,也在optimizer的上添加了 regularization的值

 

对比总结网络在fashionMNIST和mini-imageNet上的表现 

该网络在fashionMNIST上只用12个epoch就达到90%的准确率,但是在mini-imageNet上即使到36个epoch也仅仅只有46%左右的准确率(eval set上) 。如果我们以MNIST上的表现为基准,来衡量imageNet上的表现,显然在imagenet上的表现很差,这是什么原因导致的呢?

原因分析:  

首先在两个实验中我们的网络是一样的。这两个数据集的大小其实很类似,都是接近60000个训练样本 (eval set差异略大)。但是在mini-imageNet上出现了严重的过拟合,我认为是以下原因导致:1、fashion-MNIST比mini-imageNet数据集简单,这种简单体现在两个方面。第一是MNIST都是在纯白背景上衣服裤子鞋子等等,纹理细节不多,注重轮廓,而且每一个类别的样本形态变化也不多。但是mini-imageNet的样本难很多,每个类别的场景完全不同,且同一个类别的形态各异,背景各异,这使得识别难度更大。即60000个样本难以将alexnet训练的能够处理imgnet那么复杂的纹理,形态等等特征  2、fashion-MNIST仅仅有10个类,60000个train 样本分在10个类别上,每个类别有6000个训练样本;但是mini-iamgenet有100个类别,每个类别仅有570个样本,因此样本数相对而言太少。(模型的本身的复杂程度是足够的,因为在完整的imgnet上的准确率是远高于46%的,所以模型的复杂度是足够这个分类任务的)

解决alexnet在mini-imagenet上过拟合的办法 :

办法1:增加数据量之迁移学习 

通常过拟合最有效的方法就是增加数据量,让模型尽可能得到整个数据空间样本的学习。迁移学习本质上就增加了特征提取网络学习的样本。加载官方的模型和参数,将最后用于分类的三层fc网络替换掉,代码如下:

  my_alex_net = models.alexnet(AlexNet_Weights.IMAGENET1K_V1)
    for para in my_alex_net.parameters():
        para.requires_grad = False
    my_alex_net.classifier = nn.Sequential(
            nn.Dropout(p=0.0001),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.0001),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 100),
        )
    print(f"{my_alex_net.classifier}")
    # my_alex_net.load_state_dict(torch.load("myAlexNet7loss1.1237__accu0.7018epoch0.01.pth"))
    my_alex_net = my_alex_net.to(device)
    
    #the eval loader
    eval0_set = CTinyImgNetEval(data_dir=tiny_img_net_dir,index_list=index_list)
    eval0_loader = DataLoader(
        dataset=eval0_set,
        batch_size=100,
        shuffle=False,
        num_workers=5,
    ) 
    
    
    writer = SummaryWriter('mini-imageNetTransferlog')
    lr = 0.002
    epochs = 40
    loss = nn.CrossEntropyLoss().to(device=device)
    optimizer = torch.optim.SGD(params= my_alex_net.classifier.parameters(),lr= lr,weight_decay=0.003)
    train_process(my_alex_net,loss,optimizer,train_loader,eval0_loader,epochs,lr,writer)
    writer.close()
可视化训练过程的损失和准确率

从accu的图可见 很快就到60%的准确率,相比于仅仅在mini-imagenet上训练的45%的准确率大幅度提高。

 

办法2:改变网络结构 

网络可以从三个基本面去看,第一基本面是网络结构是什么?是alexnet这种大卷积核还是vgg这种小卷积核?是resnet网络还是其他网络结构??第二基本面是网络的深度怎么样?alexnet就属于比较浅shallow的网络,比较深deep的网络三五十层。第三个层面是网络的宽度,即feature channel是否够多,这也是一个重要的因素。下一篇博客我们将从深度方向来改变网络结构并进行对应的测试

 

总结: 

本次实验的学习体会:1、小小的alexnet竟然可以在imagenet这种1000个类的百万级数据集上做到一定的分类正确率,可见神经网络还是蛮强大的   2、同样是60000张图,fashionMNIST就比mini-imagenet简单太多,因为fashionMNIST为统一背景且前景比较规范,种类也比较少,纹理,形态,结构也简单,因此fashionMNIST去训练alexnet能够得到不错的结果,而mini-imagenet却训练结果很差。因此数据量的大小并非由多少张图片来决定,数据量大小和任务本身有关。 3、迁移学习可以获得百万级别数据训练过的特征提取网络,它对特征的提取更为全面深刻而广泛,在手上的任务数据数量相对于任务本身显得匮乏的时候,就可以借助迁移学习将网络的能量(特征提取能力)发挥到极致

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值