采用 Omniglot 数据 (https://github.com/brendenlake/omniglot) [1][2]。 该数据集包涵 50 种不同字母系统中的共计 1623 种字符。对于这 1623 种字符中的每一种字符, 数据集收集了由 20 个不同人书写该字符的手写图片。用机器学习的术语,该数据集可以看作 一共由 1623*20=32460 个样本组成,样本被均匀分到 1623 类中
随机从所有类别中取出 50 类进行分类,每个类别中使用 15 张图片 作为训练数据,5 张图片作为测试数据。要求对比(a) 至少有一个隐层的全连接神经 网络; (b) 一种基于卷积神经网络(CNN)的模型,该模型可以是现有架构,也可以为 自己设计。其他超参数(如优化器、梯度下降步长、迭代次数等)均可自行设置。汇 报实验结果,并比较不同的方法有何优缺点。
由于目录是二级目录,实现了自定义的dataset,同时进行了数据增强
同时注意其他pytorch使用流程,包括求导、反向传播、acc计算、验证集上计算
# import libraries
import argparse
import numpy as np
import torch
import random
from torchsummary import summary
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
os.environ["CUDA_VISIBLE_DEVICES"]="3"
import setproctitle
setproctitle.setproctitle("pytorch@yjh")
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import copy
from torchvision import transforms, utils
# define settings
parser = argparse.ArgumentParser()
parser.add_argument('--num_classes', type=int, default=50,
help='number of classes used')
parser.add_argument('--num_samples_train', type=int, default=15,
help='number of samples per class used for training')
parser.add_argument('--num_samples_test', type=int, default=5,
help='number of samples per class used for testing')
parser.add_argument('--seed', type=int, default=1,
help='random seed')
parser.add_argument('--val_period', type=int, default=10,
help='how long to validate')
args = parser.parse_args()
# define you model, loss functions, hyperparameters, and optimizers
### Your Code Here ###
epochs = 150
class OmnDataset(Dataset):
def __init__(self,data_folder= './omniglot_resized',num_classes=50,num_samples_per_class_train=15,
num_samples_per_class_test=5,mode='train',seed=2021,train_transform=None,val_transform=None):
super(OmnDataset,self).__init__()
random.seed(seed)
np.random.seed(seed)
character_folders = []
for family in os.listdir(data_folder):
if os.path.isdir(os.path.join(data_folder, family)):
for character in os.listdir(os.path.join(data_folder, family)):
if os.path.isdir(os.path.join(data_folder, family, character)):
character_folders.append(os.path.join(data_folder, family, character))
# self.class_idx[family+character]=idx
# idx+=1
# character_folders = [os.path.join(data_folder, family, character)
# for family in os.listdir(data_folder)
# if os.path.isdir(os.path.join(data_folder, family))
# for character in os.listdir(os.path.join(data_folder, family))
# if os.path.isdir(os.path.join(data_folder, family, character))]
num_samples_per_class = num_samples_per_class_train + num_samples_per_class_test
self.num_samples_per_class_train=num_samples_per_class_train
self.num_samples_per_class_test=num_samples_per_class_test
random.shuffle(character_folders)
if num_classes == -1:
num_classes = len(character_folders)
else:
character_folders = character_folders[: num_classes]
self.class_idx={}
idx=0
for all_path in character_folders:
path1=os.path.split(all_path)[1]
path2=os.path.split(os.path.split(all_path)[0])[1]
self.class_idx[path2+'_'+path1]=idx
idx+=1
# print('init:',character_folders)
# print('dict:',self.class_idx)
self.imgs_train=[]
self.imgs_val=[]
for i in character_folders:
path1=os.path.split(i)[1]
path2=os.path.split(os.path.split(i)[0])[1]
label=self.class_idx[path2+'_'+path1]
xx=os.listdir(i)
random.shuffle(xx)
self.imgs_train.extend([(os.path.join(i,j),label) for j in xx[:self.num_samples_per_class_train] if j[-4:] == '.png']) #没有考虑存在不是照片的文件
self.imgs_val.extend([(os.path.join(i,j),label) for j in xx[self.num_samples_per_class_train:] if j[-4:] == '.png'])
self.character_folders = character_folders
self.mode=mode
self.train_transform = train_transform
self.val_transform = val_transform
def __getitem__(self, index):
if self.mode=='train':
fn, label = self.imgs_train[index]
img = Image.open(fn)#.convert('RGB')
if self.train_transform is not None:
img = self.train_transform(img)
return img,label
elif self.mode=='val':
fn, label = self.imgs_val[index]
img = Image.open(fn)#.convert('RGB') #不加这行 ,mode是1
#print(img.mode)
if self.val_transform is not None:
img = self.val_transform(img)
return img,label
else:
raise ValueError("wrong mode")
def __len__(self):
if self.mode=='train':
return len(self.imgs_train)
elif self.mode=='val':
return len(self.imgs_val)
else:
raise ValueError("wrong mode")
train_transforms_fn = transforms.Compose([
transforms.Resize(28),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.RandomRotation(45),
transforms.ToTensor(),
#transforms.Normalize(mean=[0.5], std=[0.5])
])
val_transforms_fn = transforms.Compose([
transforms.Resize(28),
transforms.ToTensor(),
#transforms.Normalize(mean=[0.5], std=[0.5])
])
train_data=OmnDataset(mode='train', train_transform=train_transforms_fn)#,num_samples_per_class_train=18,num_samples_per_class_test=2)
test_data = OmnDataset(mode='val', val_transform=val_transforms_fn)#,num_samples_per_class_train=18,num_samples_per_class_test=2)
trainloader = DataLoader(
train_data,
batch_size=32,
shuffle=True,
num_workers=2
)
testloader = DataLoader(
test_data,
batch_size=32,
shuffle=False, #设置为false
num_workers=2
)
class MLP(nn.Module):
def __init__(self,dim_input = 28 * 28):
super(MLP,self).__init__()
self.base=nn.Sequential(
nn.Conv2d(1,32,kernel_size=3),
#nn.Conv2d(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True))
nn.BatchNorm2d(32),
nn.ReLU(),
nn.Conv2d(32,32,kernel_size=3),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Conv2d(32,64,kernel_size=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64,64,kernel_size=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2),
nn.Flatten(),
# nn.Conv2d(1,64,kernel_size=3),
# nn.BatchNorm2d(64),
# nn.ReLU(),
# nn.Conv2d(64,64,kernel_size=3),
# nn.BatchNorm2d(64),
# nn.ReLU(),
# nn.MaxPool2d(kernel_size=2),
# nn.Conv2d(64,32,kernel_size=3),
# nn.BatchNorm2d(32),
# nn.ReLU(),
# nn.Conv2d(32,32,kernel_size=3),
# nn.BatchNorm2d(32),
# nn.ReLU(),
# nn.MaxPool2d(kernel_size=2),
# nn.Conv2d(32,32,kernel_size=3),
# nn.BatchNorm2d(32),
# nn.ReLU(),
# nn.Conv2d(32,32,kernel_size=3),
# nn.BatchNorm2d(32),
# nn.ReLU(),
# nn.MaxPool2d(kernel_size=2),
# nn.Flatten(),
# nn.Linear(1600,1024), #28:1024;64:10816;32:1600
# nn.BatchNorm1d(1024),
# nn.ReLU(),
nn.Linear(1024,512), #28:1024;64:10816;32:1600
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512,256), #28:1024;64:10816;32:1600
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Linear(256,128), #28:1024;64:10816;32:1600
nn.BatchNorm1d(128),
nn.ReLU(),
nn.Linear(128,64), #28:1024;64:10816;32:1600
nn.BatchNorm1d(64),
nn.ReLU(),
nn.Dropout(p=0.2),
nn.Linear(64,args.num_classes),
)
def forward(self,x):
return self.base(x)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model=MLP()
#下面是channel , height , width的顺序
summary(model,(1,28,28),batch_size=1,device="cpu") #这里指定成cpu
model.to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=5e-4)
loss_fn = nn.CrossEntropyLoss()
loss_list = []
acc_list = []
# load data
#train_image, train_label, test_image, test_label = LoadData(args.num_classes, args.num_samples_train, args.num_samples_test, args.seed)
# note: you should use train_image, train_label for training, apply the model to test_image to get predictions and use test_label to evaluate the predictions
#print('load data done!')
# train model using train_image and train_label
model.train()
for epoch in range(epochs):
acc_=[]
loss_=[]
for i, data in enumerate(trainloader):
# get the inputs
xt, yt = data #torch.Size([32, 1, 28, 28])
#print(xt.size(),yt.size())
xt=xt.to(device) #
yt=yt.to(device) #必须显示赋值
optimizer.zero_grad()
yp=model(xt)
loss = loss_fn(yp,yt)
loss.backward()
optimizer.step()
acc = (yp.max(1)[1] == yt).float().mean() #如果drop_last=false,那么这里得改一下,不能用mean
acc_.append(acc.item())
loss_.append(loss.item())
acc_list.append(np.mean(acc_))
loss_list.append(np.mean(loss_))
print('train epoch:',epoch,' train acc:',np.mean(acc_),' ,loss:',np.mean(loss_))
#print('train acc:',(acc_list))
#print('train loss:',(loss_list))
# get predictions on test_image
model.eval()
with torch.no_grad():
acc_=[]
loss_=[]
for data in testloader:
#读取数据的数据内容及标签
xv, yv = data
xv=xv.to(device) #
yv=yv.to(device)
yp=model(xv)
acc = (yp.max(1)[1] == yv).float().mean()
loss = loss_fn(yp,yv)
acc_.append(acc.item())
loss_.append(loss.item())
print('val acc:',np.mean(acc_),' ,loss:',np.mean(loss_))
# evaluation
#print("Test Accuracy:", np.mean(1.0 * (pred == test_label)))
# note that you should not use test_label elsewhere