文章:《ImageNet Classification with Deep Convolutional Neural Networks》
个人认为alexNet 真正的将深度学习带热了,当时的有非常大数据库 imagenet 1000类,硬件性能也大幅提升 如gpu。简直可以成为天时地利人和。 alexNet 的效果也是不负众望,当时的imageNet 分类比赛中遥遥领先第二名。
alexNet 在网络结构中只要引入了relu 层 和dropout层,同时进行了Data augmentation ,都是针对大数据过拟合,从而使百万级大数据的训练成为了可能。数据的增加主要是包括 随机crop,resize ,旋转,亮度,饱和度等调节,尽可能的增加数据量,深度学习中有句话叫 “数据为王”,没有数据再好的算法也没用。
关于relu 函数为 f(x)=max(0,x) , 在此之前主要是通过sigmod来计算节点,该方法存在梯度消失和梯度爆炸的风险。relu能很大程度上解决这个问题,现在对relu 有好多改进, 如prelu ,lrelu。主要是针对x<0 的情况。
关于dropout 网络已百分比丢弃节点,赋值0,目的是使节点变得稀疏,防止过拟合现象。
alexnet 网络如下:
再说pytorch ,最近比较火热,一些大神都在力推。大致学习了下网络代码如下:
import torch
import torch.backends.cudnn as cudnn
import cv2
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
root="../alexnet/"
# -----------------ready the dataset--------------------------
def opencvLoad(imgPath,resizeH,resizeW):
image = cv2.imread(imgPath)
image = cv2.resize(image, (resizeH, resizeW), interpolation=cv2.INTER_CUBIC)
image = image.astype(np.float32)
image = np.transpose(image, (2, 1, 0))
image = torch.from_numpy(image)
return image
class LoadPartDataset(Dataset):
def __init__(self, txt):
fh = open(txt, 'r')
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
labelList = int(words[1])
imageList = words[0]
imgs.append((imageList, labelList))
self.imgs = imgs
def __getitem__(self, item):
image, label = self.imgs[item]
img = opencvLoad(image,227,227)
return img,label
def __len__(self):
return len(self.imgs)
def loadTrainData(txt=None):
fh = open(txt, 'r')
imgs = []
for line in fh:
line = line.strip('\n')
line = line.rstrip()
words = line.split()
label = int(words[1])
image = cv2.imread(words[0])
image = cv2.resize(image, (227, 227), interpolation=cv2.INTER_CUBIC)
image = image.astype(np.float32)
image = np.transpose(image, (2, 1, 0))
image = torch.from_numpy(image)
imgs.append((image, label))
return imgs
# trainSet=loadTrainData(txt=root+'train.txt')
# test_data=loadTrainData(txt=root+'train.txt')
trainSet =LoadPartDataset(txt=root+'train.txt')
test_data=LoadPartDataset(txt=root+'train.txt')
train_loader = DataLoader(dataset=trainSet, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_data, batch_size=64)
#-----------------create the Net and training------------------------
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(3, 96, 11, 4, 0),
torch.nn.ReLU(),
torch.nn.MaxPool2d(3,2)
)
self.conv2 = torch.nn.Sequential(
torch.nn.Conv2d(96, 256, 5, 1, 2),
torch.nn.ReLU(),
torch.nn.MaxPool2d(3,2)
)
self.conv3 = torch.nn.Sequential(
torch.nn.Conv2d(256,384, 3, 1, 1),
torch.nn.ReLU(),
)
self.conv4 = torch.nn.Sequential(
torch.nn.Conv2d(384,384, 3, 1, 1),
torch.nn.ReLU(),
)
self.conv5 = torch.nn.Sequential(
torch.nn.Conv2d(384,256, 3, 1, 1),
torch.nn.ReLU(),
torch.nn.MaxPool2d(3,2)
)
self.dense = torch.nn.Sequential(
torch.nn.Linear(9216, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(4096, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(4096, 50)
)
def forward(self, x):
conv1_out = self.conv1(x)
conv2_out = self.conv2(conv1_out)
conv3_out = self.conv3(conv2_out)
conv4_out = self.conv4(conv3_out)
conv5_out = self.conv5(conv4_out)
res = conv5_out.view(conv5_out.size(0), -1)
out = self.dense(res)
return out
model = Net()
finetune = None
finetune = r'./model/_iter_99.pth'
if finetune is not None:
print '[0] Load Model {}'.format(finetune)
pretrained_dict = model.state_dict()
finetune_dict = torch.load(finetune)
# model_dict = torch.load(finetune)
# pretrained_dict = net.state_dict()
model_dict = {k: v for k, v in finetune_dict.items() if k in pretrained_dict}
pretrained_dict.update(model_dict)
model.load_state_dict(pretrained_dict)
model = torch.nn.DataParallel(model, device_ids=[0, 1])
model.cuda()
cudnn.benchmark = True
print(model)
#optimizer = torch.optim.Adam(model.parameters())
#loss_func = torch.nn.CrossEntropyLoss()
# updata net
lr = 1e-5
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(list(model.parameters())[:], lr=lr, momentum=0.9)
for epoch in range(10000):
print('epoch {}'.format(epoch + 1))
# training-----------------------------
train_loss = 0.
train_acc = 0.
for trainData, trainLabel in train_loader:
trainData, trainLabel = Variable(trainData.cuda()), Variable(trainLabel.cuda())
out = model(trainData)
loss = loss_func(out, trainLabel)
train_loss += loss.data[0]
pred = torch.max(out, 1)[1]
train_correct = (pred == trainLabel).sum()
train_acc += train_correct.data[0]
optimizer.zero_grad()
loss.backward()
optimizer.step()
# if epoch % 100 == 0:
print('Train Loss: {:.6f}, Acc: {:.6f}'.format(train_loss / (len(
trainSet)), train_acc / (len(trainSet))))
if (epoch + 1) % 10 == 0:
sodir = './model/_iter_{}.pth'.format(epoch)
print '[5] Model save {}'.format(sodir)
torch.save(model.module.state_dict(), sodir)
# adjust
if (epoch + 1)% 100 == 0:
lr = lr / 10
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
# evaluation--------------------------------
# model.eval()
# eval_loss = 0.
# eval_acc = 0.
# for trainData, trainLabel in test_loader:
# trainData, trainLabel = Variable(trainData, volatile=True), Variable(trainLabel, volatile=True)
# out = model(trainData)
# loss = loss_func(out, trainData)
# eval_loss += loss.data[0]
# pred = torch.max(out, 1)[1]
# num_correct = (pred == trainData).sum()
# eval_acc += num_correct.data[0]
# print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
# test_data)), eval_acc / (len(test_data))))
图片集太大就不上传了。
网络下载地址:https://download.csdn.net/download/eatapples/10394160