经过前面几章的介绍,我们以及可以得到处理好的训练样本图像,在本节中将对这些图像进行训练。主要利用到的是keras。
一、构建Dataset类
1.1 init 完成初始化工作
def __init__(self,path_name):
self.train_img = None
self.train_labels = None
self.valid_img = None
self.valid_labels = None
self.test_img = None
self.test_labels = None
self.path_name = path_name
self.input_shape = None
1.2 loadAllData(self,path_name):
加载正样本以及负样本数据 并且将数据images和labels拼接在一起
def loadAllData(self,path_name):
positive_data_images,positive_data_labels=load_dataset(path_name,'traindata')
negative_data_images,negative_data_labels=load_dataset(path_name,'testdata')
#数组拼接
images =np.concatenate((positive_data_images, negative_data_images),axis=0)
labels=np.concatenate((positive_data_labels, negative_data_labels),axis=0)
return images,labels
1.3 加载数据集
def load(self,img_rows=IMAGE_SIZE,img_cols=IMAGE_SIZE,
img_channels=3,nb_classes=2):
images,labels = self.loadAllData(self.path_name)
#images为四维数组,尺寸为(图片数量总(包括test+train)*IMAGE_SIZE*IMAGE_SIZE*3)
#随机划分训练集和验证集
train_images,valid_images,train_labels,valid_labels = train_test_split(images, labels,random_state = random.randint(0,100))
_, test_images,_, test_labels = train_test_split(images, labels,test_size = 0.3,random_state = random.randint(0,100))
# 当前的维度顺序如果为'th',则输入图片数据时的顺序为:channels,rows,cols,
# 否则:rows,cols,channels
# 这部分代码就是根据keras库要求的维度顺序重组训练数据集
if K.image_dim_ordering() == 'th': #theano的格式
train_images = train_images.reshape(train_images.shape[0],
img_channels, img_rows, img_cols)
valid_images = valid_images.reshape(valid_images.shape[0],
img_channels, img_rows, img_cols)
test_images = test_images.reshape( test_images.shape[0],
img_channels, img_rows, img_cols)
self.input_shape = (img_channels, img_rows, img_cols)
else: # tensorflow格式
train_images = train_images.reshape(train_images.shape[0],
img_rows, img_cols, img_channels)
valid