VGG16对10种猴子分类
dataset:
import os
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
class Datasets(Dataset):
def __init__(self,path):
self.path=path
self.dataset=[]
self.dataset.extend(open(os.path.join(path,"label.txt")).readlines())
# self.dataset.extend(open(os.path.join(path, "negative.txt")).readlines())
# self.dataset.extend(open(os.path.join(path,"part.txt")).readlines())
def __getitem__(self, index):
strs=self.dataset[index].strip().split( )
# print(strs)
image_path=os.path.join(self.path,strs[0])
label=torch.Tensor([int(strs[1])])
# offset=torch.Tensor([float(strs[2]),float(strs[3]),float(strs[4]),float(strs[5])])
image_data=Image.open(image_path)
image_data = image_data.convert('RGB')
#把图片制作成正方形,否则采样的时候会报错
w, h = image_data.size
background = Image.new('RGB', size=(max(w, h), max(w, h)), color=(127, 127, 127)) # 创建背景图,颜色值为127
length = int(abs(w - h) // 2) # 一侧需要填充的长度
box = (length, 0) if w < h else (0, length) # 粘贴的位置
background.paste(image_data, box)
#VGG16输入图像尺寸224*224
image_data=background.resize((224,224))
image_data=torch.Tensor(np.array(image_data)/255-0.5)
# image_data = torch.Tensor(np.array(Image.open(image_path)) / 255 - 0.5)
# print(image_data.shape)
image_data=image_data.permute(2,0,1)
# print(image_data.shape)
return image_data,label
def __len__(self):
return len(self.dataset)
if __name__ == '__main__':
dataset1=Datasets(r"F:\study\MyProject2\2020_4_26_ten_monkey\ten_monkey\train")
# print(dataset1[1008][0])
# print(dataset1[1008][1])
dataload=DataLoader(dataset1,batch_size=100,shuffle=False)
for i,(x,y) in enumerate(dataload):
print(i)
print(x)
Net:
import torch
import numpy as np
from dataset import Datasets
from torch.utils.data import DataLoader
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super().__init__()
self.layer1=nn.Sequential(
#layer 1
nn.Conv2d(in_channels=3,out_channels=64,kernel_size=3,padding=1,stride=1),#224*224*64
# nn.MaxPool2d(kernel_size=2,stride=2)
nn.ReLU(),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3,padding=1,stride=1),#224*224*64
# nn.MaxPool2d(2)
nn.ReLU(),
#pooling
nn.MaxPool2d(kernel_size=2,stride=2),#112*112*64
#layer 2
nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3,padding=1,stride=1),#112*112*128
# nn.MaxPool2d(2),#8*8*128
nn.ReLU(),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3,padding=1,stride=1),#112*112*128
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1, stride=1),#112*112*128
# nn.MaxPool2d(2)
nn.ReLU(),
#pooling
nn.MaxPool2d(kernel_size=2, stride=2), # 56*56*128
#layer 3
nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1, stride=1),#56*56*256
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), # 56*56*256
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1, stride=1), # 56*56*256
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
#pooling
nn.MaxPool2d(kernel_size=2, stride=2), # 28*28*256
#layer 4
nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1, stride=1), # 28*28*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), # 28*28*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), # 28*28*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
#pooling
nn.MaxPool2d(kernel_size=2, stride=2), # 14*14*512
#layer 5
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), # 14*14*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), # 14*14*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1, stride=1), # 14*14*512
# nn.MaxPool2d(2), # 4*4*64
nn.ReLU(),
#pooling
nn.MaxPool2d(kernel_size=2, stride=2), # 7*7*512
)
self.layer2=nn.Sequential(
nn.Linear(7*7*512,4096),
nn.Dropout(0.5),
nn.ReLU(),
nn.Linear(4096, 4096),
nn.Dropout(0.5),
nn.ReLU(),
nn.Linear(4096, 102),
nn.Softmax(),
)
def forward(self, x):
output=self.layer1(x)
output=output.view(-1,7*7*512)
output=self.layer2(output)
return output
if __name__ == '__main__':
dataset1 = Datasets(r"F:\study\MyProject2\2020_4_26_ten_monkey\ten_monkey\train")
# print(dataset1[1008][0])
# print(dataset1[1008][1])
train_data = DataLoader(dataset1, batch_size=100, shuffle=False,drop_last=True)
net = Net()
optimizer = torch.optim.Adam(net.parameters())
loss_func = nn.MSELoss()
for epoch in range(10000):
for i, (x, y) in enumerate(train_data):
xs=torch.Tensor(x)
output=net(xs)
ys = y.long()
target=torch.zeros(ys.size(0),102).scatter_(1,ys.view(-1,1),1)
loss=loss_func(target,output)
optimizer.zero_grad()
loss.backward()
optimizer.step()
out=torch.argmax(output,dim=1)
acc=np.mean(np.array(out==ys,dtype=np.float32))
if i%10==0:
print("loss:",loss.item())
print("acc:",acc)