1. 采用步进(Step into)的调试方法从 for i, data in enumerate(train_loader) 这一行代码开始,进入到每一个被调用函数,直到进入RMBDataset类中的__getitem__函数,记录从 for循环到RMBDataset的__getitem__所设计的类与函数?
第一步:for i, data in enumerate(train_loader) 第二步:DataLoader类,__iter__函数,判断是多进程还是单进程 第三步:_SingleProcessDataLoaderIter类,__next__函数 第四步,_BaseDataLoaderIter类,_next_index函数 第五步,BatchSampler类,__iter__函数 ,明确每个batchsize应该读取哪些数据 第六步,RandomSampler类,__iter__函数 第七步,_MapDatasetFetcher类,fetch函数 第八步,RMBDataset类,__getitem__函数
2. 训练RMB二分类模型,熟悉数据读取机制,并且从kaggle中下载猫狗二分类训练数据,自己编写一个DogCatDataset,使得pytorch可以对猫狗二分类训练集进行读取。
import os
import random
from PIL import Image
from torch. utils. data import Dataset
import numpy as np
import torch
from torch. utils. data import DataLoader
import torchvision. transforms as transforms
random. seed( 1 )
dogcat_label = { 'dog' : 0 , 'cat' : 1 }
class DogCatDataset ( Dataset) :
def __init__ ( self, data_dir, transform= None ) :
"""
DogCat分类任务的Dataset
:param data_dir: str, 数据集所在路径
:param transform: torch.transform,数据预处理
"""
self. label_name = { 'dog' : 0 , 'cat' : 1 }
self. data_info = self. get_img_info( data_dir)
self. transform = transform
def __getitem__ ( self, index) :
path_img, label = self. data_info[ index]
img = Image. open ( path_img) . convert( 'RGB' )
if self. transform is not None :
img = self. transform( img)
return img, label
def __len__ ( self) :
return len ( self. data_info)
def get_img_info ( self, data_dir) :
data_info = list ( )
for root, dirs, _ in os. walk( data_dir) :
img_names = os. listdir( root)
img_names = list ( filter ( lambda x: x. endswith( '.jpg' ) , img_names) )
for i in range ( len ( img_names) ) :
img_name = img_names[ i]
path_img = os. path. join( root, img_name)
label = dogcat_label[ img_name[ : 3 ] ]
data_info. append( ( path_img, int ( label) ) )
return data_info
def set_seed ( seed = 1 ) :
random. seed( seed)
np. random. seed( seed)
torch. manual_seed( seed)
torch. cuda. manual_seed( seed)
def main ( ) :
set_seed( )
rmb_label = { "1" : 0 , "100" : 1 }
BATCH_SIZE = 16
train_dir = os. path. join( 'data' , 'DogCat' )
norm_mean = [ 0.485 , 0.456 , 0.406 ]
norm_std = [ 0.229 , 0.224 , 0.225 ]
train_transform = transforms. Compose( [
transforms. Resize( ( 32 , 32 ) ) ,
transforms. RandomCrop( 32 , padding= 4 ) ,
transforms. ToTensor( ) ,
transforms. Normalize( norm_mean, norm_std) ,
] )
valid_transform = transforms. Compose( [
transforms. Resize( ( 32 , 32 ) ) ,
transforms. ToTensor( ) ,
transforms. Normalize( norm_mean, norm_std) ,
] )
train_data = DogCatDataset( data_dir= train_dir, transform= train_transform)
train_loader = DataLoader( dataset= train_data, batch_size= BATCH_SIZE, shuffle= True )
print ( len ( train_loader) )
if __name__ == '__main__' :
main( )