1 DataSet和DataLoader
1.1自定义数据的加载 继承DataSet
需要实现len 初始化,getitem的函数 数据 文件夹对应label
class Pokemon(Dataset):
def __init__(self,root,resize,mode):#定义 根目录,图像大小,什么模式(测试还是训练模式)
super(Pokemon, self).__init__()
self.root = root
self.resize = resize
self.name2label = {}#采用字典保存映射关系如{'bu': 0,'palan':1 }
#listdir方法用于返回指定的文件夹包含的文件或文件夹的名字的列表。
for name in sorted(os.listdir(os.path.join(root))):
if not os.path.isdir(os.path.join(root,name)):
continue
self.name2label[name] = len(self.name2label.keys()) #获取标签,也就是文件夹的名称
self.images,self.labels = self.load_csv('images.csv')#保存为images.csv文件
#划分数据集
if mode=='train':
self.images = self.images[:int(0.6*len(self.images))]
self.labels = self.labels[:int(0.6 * len(self.labels))]
elif mode=='val':
self.images = self.images[int(0.6*len(self.images)):int(0.8*len(self.images))]
self.labels = self.labels[int(0.6*len(self.labels)):int(0.8*len(self.labels))]
else:
self.images = self.images[int(0.8*len(self.images)):]
self.labels = self.labels[int(0.8*len(self.labels)):]
def load_csv(self,filename):
#创建csv文件
global labels
images =[]#保存当前文件夹下的图片
if not os.path.exists(os.path.join(self.root,filename)):#不存在filename才创建
for name in self.name2label.keys():
#glob.glob()一次性获取一个文件夹内所有文件的地址,放入到imges[]
#保存的格式是 根目录+文件夹名称+xxx.png/jpg/jepg
#如(pokeman\\bulbasaur\\000.png)等
images+=(glob.glob(os.path.join(self.root,name,'*.png')))
images+=(glob.glob(os.path.join(self.root,name,'*.jpg')))
images+=(glob.glob(os.path.join(self.root, name,'*.jpeg')))
random.shuffle(images)
#保存为root/filename/csv文件 必须指定 newline=’’,否则每插入一行就有一个空行
with open(os.path.join(self.root,filename),mode='w',newline='') as f:
writer = csv.writer(f)#传入一个文化对象
for img in images:#'pokeman\\bulbasaur\\000.png'
name = img.split(os.sep)[-2]#取出 bulbasaur这个标签
label = self.name2label[name]
writer.writerow([img,label])#如[pokeman\\bulbasaur\\000.png,1]是写入的内容
images = []
labels = []
# 读取csv文件
with open(os.path.join(self.root,filename)) as f:
reader = csv.reader(f)
for row in reader:
img,label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(labels)==len(images)
return images,labels
#逆标准化只是为了展示好看点
def denormalize(self, x_hat):
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
x = x_hat * std + mean
return x
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
#idx:0~len(images)
#img:'pokeman\\bulbasaur\\000.png'
img,label = self.images[idx],self.labels[idx]
tf = transforms.Compose([
lambda x:Image.open(x).convert('RGB'),
transforms.Resize((int(self.resize*1.25),int(self.resize*1.25))),
transforms.RandomRotation(15),
transforms.CenterCrop(self.resize),
transforms.ToTensor(),
#归一化就是要把图片3个通道中的数据整理到[-1, 1]区间。
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img)
label = torch.tensor(label)
return img,label
使用Visdom来展示 或者tensorborad都可以 ,继承了dataset,就可以很方便的使用dataloader来分batch训练模型
if __name__ == '__main__':
viz = visdom.Visdom()
db = Pokemon('pokeman',224,'train')
x,y = next(iter(db))#返回一个可迭代的对象 执行next()方法时,__getitem__会被执行
viz.images(db.denormalize(x),win='sample_x',opts=dict(title = 'sample_x'))
#一次加载多张图像 batch
loader = DataLoader(db,batch_size=32,shuffle=True)
for x,y in loader:
viz.images(db.denormalize(x), win='DataLoader',nrow=8 ,opts=dict(title='DataLoader'))
viz.text(str(y.numpy()),win='label',opts=dict(title = 'batch-y'))
time.sleep(10)#展示10秒
1.2使用torch自带的数据集加载
train_data = torchvision.datasets.CIFAR10('cifar',True,transform=transforms.ToTensor(),download=True)
test_data = torchvision.datasets.CIFAR10('cifar',False,transform=transforms.ToTensor(),download=True)
train_data_len = len(train_data)
test_data_size = len(test_data)
train_loader = DataLoader(train_data,batch_size=64,shuffle=True)
test_loader = DataLoader(test_data,batch_size=64,shuffle=True)
1.3 使用ImageFolder要求是同一个文件夹下面是同一类,文件夹名称是标签
db = torchvision.datasets.ImageFolder(root='pokeman',transform=tf)#ImageFolder要求文件夹下的是同一类
2nn.Moudle模块
2.1常规的cnn框架(必须实现forward函数和初始化函数)
class lenet5(nn.Module):
def __init__(self):
super(lenet5, self).__init__()
self.conv_unit = nn.Sequential(
nn.Conv2d(3,6,kernel_size=5,stride=1,padding=0),#输入是[b,3,w,h]==>[b,6,w2,h2]
nn.AvgPool2d(kernel_size=2,stride=2,padding=0),
nn.Conv2d(6,16,kernel_size=5,stride=1,padding=0),
nn.AvgPool2d(kernel_size=2,stride=2,padding=0)#早期的torch没有nn.Flatten()
)
self.fv_unit = nn.Sequential(
nn.Linear(16*5*5,120),
nn.ReLU(),
nn.Linear(120,84),
nn.ReLU(),
nn.Linear(84,10)
)
#分类使用交叉熵 self.criteon = nn.CrossEntropyLoss()#实现了softmax无需再次argmax
def forward(self, x ):
batch_size = x.size(0)#等同于x.size()[0]
x = self.conv_unit(x)
x = x.view(batch_size,-1)#打平操作
logits = self.fv_unit(x)
# pre = F.softmax(logits,dim=1) CrossEntropyLoss包含了softmax操作
#loss - self.criteon(logits,y)
return logits
2.2 残差网络的结构(需要定义残差模块)
class ResBlk(nn.Module):
#输入的是ch_in 通道数(64维度) ====> 输入是 ch_out 128
def __init__(self,ch_in,ch_out,stride=1):
super(ResBlk, self).__init__()
self.conv1 = nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=stride,padding=1)
# nn.BatchNorm1d(num_features) 对小批量(mini-batch)的2d或3d输入进行批标准化(Batch Normalization)操作
#对小批量(mini - batch)3d数据组成的 4d输入 进行批标准化操作
self.bn1 = nn.BatchNorm2d(ch_out)#输入图像的通道数量-C。
self.conv2 = nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1)
self.bn2 = nn.BatchNorm2d(ch_out)
#这个是关键
self.extra = nn.Sequential()
if ch_out != ch_in:
self.extra = nn.Sequential(
nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=stride),
nn.BatchNorm2d(ch_out)
)
def forward(self,x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = self.extra(x) + out
out = F.relu(out)
return out
class ResNet18(nn.Module):
def __init__(self):
super(ResNet18, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3,64,kernel_size=3,stride=3,padding=0),
nn.BatchNorm2d(64)
)
#四个block模块 使得[b,64,h,w]===>[b,128,h,w]
self.resblk1 = ResBlk(64,128,stride=2)
self.resblk2 = ResBlk(128, 256,stride=2)
self.resblk3 = ResBlk(256, 512,stride=2)
self.resblk4 = ResBlk(512,512,stride=2)
self.outlayer = nn.Linear(512,10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.resblk1(x)
x = self.resblk2(x)
x = self.resblk3(x)
x = self.resblk4(x)
#[b,512,2,2]==>[b,512,1,1]adaptive_avg_pool2d输出我想要的maxpool后的格式
x = F.adaptive_avg_pool2d(x,[1,1])
x = x.view(x.size(0),-1)#打平
x= self.outlayer(x)
return x
3数据的测试和验证
device = torch.device('cuda')
model = ResNet18().to(device)#或者.cuda() 注意cuda和cpu的数据不能相互做运算
if os.path.exists('save_res.md1'):
model.load_state_dict(torch.load('save_res.md1')) #读取保存的训练参数
criteon = nn.CrossEntropyLoss().to(device)#使用交叉熵
op = optim.Adam(model.parameters(),lr = 1e-3)#定义优化器
for epoch in range(1000):
model.train()
for batch_id,(x,y) in enumerate(cifar_train):
x, y = x.to(device),y.to(device)
logits = model(x)# logits[b,10] y[b]
loss = criteon(logits,y)
op.zero_grad()
loss.backward()
op.step()
print(epoch,loss.item())
model.eval()
with torch.no_grad():
total_correct = 0
total_num = 0
for x,y in cifar_test:
x,y = x.to(device),y.to(device)
logits = model(x)
pre = logits.argmax(dim=1)
total_correct += torch.eq(pre,y).float().sum()
total_num += x.size(0)#获得batch的数目
acc = total_correct /total_num
print('epoch:{0},acc是{1}'.format(epoch,acc))
torch.save(model.state_dict(), 'save_res.md1')
5一些杂碎的知识
1.因为nn,model模块实现了call函数调用forward函数,所以直接实例化mode(x)就可以调用forward函数
2.transform.compose([])里面必须是transform类的方法,或者是PIL类的方法
3.nn.sformax(0)是在竖的维度, randn正态分布,rand均匀分布
tensorboard --logdir=文件夹名称
通过model.train(), model.eval()显式地在训练中关掉 BN/Dropout
Dropout在训练阶段:
1.dropout在标准神经网络结构上功能,神经网络会通过激励函数将一部分比例的参数变为0,换言之,让一部分隐藏层的功能失效。
2.神经网络通过不停的的更新参数获得更好的结果,通过dropout可以给一个较大的学习率,而不用担心参数过大导致掉入局部,而且可以搜索更广阔的区域