之前学习cnn的时候拿kaggle上的cat vs dog练练手。
本模型是用pytorch写的,是一个两层卷积的简单cnn。代码不太难,贴出来看看,应该能懂,就不解释了。
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
import torch.nn.functional as F
from torchvision import transforms
import pandas as pd
class Model(nn.Module):
def __init__(self):
super(Model,self).__init__()
self.conv1=nn.Sequential(
nn.Conv2d(3,6,5),
nn.BatchNorm2d(6),
nn.ReLU(inplace=True),
nn.MaxPool2d(2,2),
)
self.conv2=nn.Sequential(
nn.Conv2d(6,16,5),
nn.BatchNorm2d(16),
nn.ReLU(inplace=True),
nn.MaxPool2d(2,2),
)
self.fc1 = nn.Linear(16 * 18 * 18, 800)
self.fc2 = nn.Linear(800, 120)
self.fc3 = nn.Linear(120, 2)
def forward(self, x):
x=self.conv1(x)
x=self.conv2(x)
#x = x.view(-1, 16 * 18 * 18)
x = x.view(x.size(0), -1)
x=self.fc1(x)
x=self.fc2(x)
x=self.fc3(x)
x=F.softmax(x)
return x
def train_model(self,x,y):
optimizer = torch.optim.SGD(self.parameters(), lr=0.05)
loss_func=nn.CrossEntropyLoss()
out=self.forward(x)
y=y.squeeze_()
loss=loss_func(out,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# print(loss.cpu().data.numpy())
def model_save(self, model_path):
torch.save(self.state_dict(), model_path)
def model_load(self,model_path):
m=torch.load(model_path)
return m
def letterbox_image(img, inp_dim):
img_w, img_h = img.shape[1], img.shape[0]
w, h = inp_dim
new_w = int(img_w * min(w / img_w, h / img_h))
new_h = int(img_h * min(w / img_w, h / img_h))
resized_image = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
canvas[(h - new_h) // 2:(h - new_h) // 2 + new_h, (w - new_w) // 2:(w - new_w) // 2 + new_w,:] = resized_image
return canvas
def prep_image(img, inp_dim):
img = letterbox_image(img, (inp_dim, inp_dim))
img = img[:,:,::-1].transpose((2,0,1)).copy()
img = torch.Tensor(img).float().div(255.0).unsqueeze(0)
return img
if __name__ == '__main__':
model=Model()
model.cuda()
model.train()
file=r'E:\python\Dogs vs. Cats\train'
imfile=os.listdir(file)
for i in range(2000):
x=[]
y=[]
for j in range(30):
n=np.random.randint(len(imfile))
im=os.path.join(file,imfile[n])
im=cv2.imread(im)
im=prep_image(im,84)
im=im.data.numpy()
im=im/225
im=im-np.mean(im)
x.append(im)
if imfile[n][:3]=='dog':
yone=[1]
if imfile[n][:3]=='cat':
yone=[0]
y.append(yone)
x=Variable(torch.Tensor(x).cuda())
x=x.squeeze_()
y=Variable(torch.Tensor(np.array(y)).cuda().long())
model.train_model(x,y)
model.model_save(r'E:\python\Dogs vs. Cats\3.pkl')
# model.load_state_dict(model.model_load(r'E:\python\Dogs vs. Cats\1.pkl'))
model.eval()
file=r'E:\python\Dogs vs. Cats\test'
imfile=os.listdir(file)
for i in range(10):
n=np.random.randint(len(imfile))
imf=os.path.join(file,imfile[n])
image=cv2.imread(imf)
im=prep_image(image,84)
im=im.data.numpy()
im=im/225
im=im-np.mean(im)
x=Variable(torch.Tensor(im).cuda())
out=model.forward(x)
outo=torch.argmax(out)
if outo==1:
print('dog',' ',out)
else:
print('cat',' ',out)
cv2.namedWindow('IM')
cv2.imshow('IM',image)
cv2.waitKey(0)
cv2.destroyAllWindows()
问题:
1.刚开始loss一直是0.69左右,不管怎么改参数都不变,百度了一下说是类别区分不明显,loss会一直在log(类别 数)。https://blog.csdn.net/u010725283/article/details/78929684
后来是对图片进行归一化等等的处理,loss才开始下降,但是我还是觉得这没有根本的解决问题,望有大佬赐教!!!
2.批训练时用CrossEntropyLoss(),会出现‘multi-target not supported’,要去掉lables的维度1,labels.squeeze_()。