前言
Kaggle挑战赛的一个项目,非参赛项目,做这个只是研究实验用,另外这只是一个简单快速的算法验证实验,目前的acc=0.55,后
续可能会做一些优化,骨干网络选用resnet系列或者shufflenet系列或者MaskRcnn或者Fast-SCNN等较为优秀的模型。
数据集 数据是算法的灵魂
算法是用来分析数据总结规律的工具,并且把这个规律应用于未知数据的过程,与数据最匹配的算法即为最优的算法。所以确定具
体算法之前一定要对数据集有充分的研究分析。
数据内容是卫星云图,图中云的形状有四种类型:Fish, Flower, Gravel, Sugar。既可以利用目标检测的算法建立一个目标检测模
型,也可以建立一个多分类模型。目标检测模型可以考虑Faster-RCNN、SSD、YOLO等,多分类模型可以尝试U-NET、Mask
RCNN等语意分割网络。
博主是用U-NET模型进行的实验,你可以根据自己的需求选择合适的网络结构。
Kaggle提供的数据集有四个部分组成:train.csv, train_images, test_images, sample_submission.csv
第一步:分析训练样本
#查看训练样本
import pandas as pd
df = pd.read_csv('/data/train.csv')
df.head()
输出train.csv中的前五行数据:
可以看到,一个训练样本对应四个mask并且有的mask为空,也即是训练样本中没有这个类别的实体。mask的标注是像素级的,
把mask展开成一维数组,EncodedPixels中奇数位置代表一维数组中的物体的起始位置,随后的偶数位置代表持续的长度,比如
(264918,937)表示从位置264918开始,有937个像素都是属于Fish这一类的。
另外图像的每一个mask对应的类别是直接与图像名字在一起的,为了方便使用数据集做一步拆分处理。
train_df = pd.read_csv('../input/train.csv')
print(train_df.head())
base_path='data/train_images/'
# Split Image_Label into ImageId and Label
train_df['ImageId'] = train_df['Image_Label'].apply(lambda x : x.split('_')[0])
train_df['Label'] = train_df['Image_Label'].apply(lambda x : x.split('_')[1])
train_df['hasMask'] = ~ train_df['EncodedPixels'].isna()
image = base_path + train_df['ImageId'][0]
img = cv2.imread(image)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
print(train_df.shape)
print(train_df.head())
plt.imshow(img)
plt.show()
可以看到Image_Label这一列被拆分成了ImageId, Label两列,这样在读取特定ImageId时可以很方便的获得所有的mask。
第二步:数据预处理
这一步包含数据增强以及从原始数据中读取训练样本等。
def np_resize(img, input_shape,graystyle=False):
"""
Reshape a numpy array, which is input_shape=(height, width),
as opposed to input_shape=(width, height) for cv2
"""
height, width = input_shape
resized_img = cv2.resize(img, (width, height))
# keep dimension
if graystyle:
resized_img = resized_img[..., None]
return resized_img
def rle2mask(rle, input_shape):
width, height = input_shape[:2]
mask= np.zeros( width*height ).astype(np.uint8)
array = np.asarray([int(x) for x in rle.split()])
## starts 表示起始位置,在奇数位置
## lengths 表示连续的像素数,在偶数位置
starts = array[0::2]
lengths = array[1::2]
current_position = 0
for index, start in enumerate(starts):
mask[int(start):int(start+lengths[index])] = 1
current_position += lengths[index]
return mask.reshape(height, width).T
def build_masks(rles, input_shape, reshape=None):
depth = len(rles)
if reshape is None:
masks = np.zeros((*input_shape, depth))
else:
masks = np.zeros((*reshape, depth))
for i, rle in enumerate(rles):
if type(rle) is str:
if reshape is None:
masks[:, :, i] = rle2mask(rle, input_shape)
else:
mask = rle2mask(rle, input_shape)
reshaped_mask = np_resize(mask, reshape)
masks[:, :, i] = reshaped_mask
return masks
之后可以定义一个读取数据的类。
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, list_IDs, df, target_df=None, mode='fit',
base_path='../input/train_images',
batch_size=32, dim=(1400, 2100), n_channels=3, reshape=None,
augment=False, n_classes=4, random_state=42, shuffle=True, graystyle=False):
self.dim = dim
self.batch_size = batch_size
self.df = df
self.mode = mode
self.base_path = base_path
self.target_df = target_df
self.list_IDs = list_IDs
self.reshape = reshape
self.n_channels = n_channels
self.augment = augment
self.n_classes = n_classes
self.shuffle = shuffle
self.random_state = random_state
self.graystyle = graystyle
self.on_epoch_end()
np.random.seed(self.random_state)
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.list_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_IDs_batch = [self.list_IDs[k] for k in indexes]
X = self.__generate_X(list_IDs_batch)
if self.mode == 'fit':
y = self.__generate_y(list_IDs_batch)
if self.augment:
X, y = self.__augment_batch(X, y)
return X, y
elif self.mode == 'predict':
return X
else:
raise AttributeError('The mode parameter should be set to "fit" or "predict".')
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.list_IDs))
if self.shuffle == True:
np.random.seed(self.random_state)
np.random.shuffle(self.indexes)
def __generate_X(self, list_IDs_batch):
'Generates data containing batch_size samples'
# Initialization
if self.reshape is None:
X = np.empty((self.batch_size, *self.dim, self.n_channels))
else:
X = np.empty((self.batch_size, *self.reshape, self.n_channels))
# Generate data
for i, ID in enumerate(list_IDs_batch):
im_name = self.df['ImageId'].iloc[ID]
img_path = f"{self.base_path}/{im_name}"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.
if self.reshape is not None:
img = np_resize(img, self.reshape)
# Store samples
X[i,] = img
return X
def __generate_y(self, list_IDs_batch):
if self.reshape is None:
y = np.empty((self.batch_size, *self.dim, self.n_classes), dtype=int)
else:
y = np.empty((self.batch_size, *self.reshape, self.n_classes), dtype=int)
for i, ID in enumerate(list_IDs_batch):
im_name = self.df['ImageId'].iloc[ID]
image_df = self.target_df[self.target_df['ImageId'] == im_name]
rles = image_df['EncodedPixels'].values
if self.reshape is not None:
masks = build_masks(rles, input_shape=self.dim, reshape=self.reshape)
else:
masks = build_masks(rles, input_shape=self.dim)
y[i, ] = masks
return y
def __load_rgb(self, img_path):
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.astype(np.float32) / 255.
return img
def __random_transform(self, img, masks):
composition = albu.Compose([
albu.HorizontalFlip(p=0.5),
albu.VerticalFlip(p=0.5),
# albu.RandomRotate90(p=1),
# albu.RandomBrightness(),
#albu.ElasticTransform(p=1,alpha=120,sigma=120*0.05,alpha_affine=120*0.03),
albu.GridDistortion(p=0.5)])
#albu.OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5),
#albu.ShiftScaleRotate(scale_limit=0.5,rotate_limit=30, shift_limit=0.1, p=1, border_mode=0)])
composed = composition(image=img, mask=masks)
aug_img = composed['image']
aug_masks = composed['mask']
return aug_img, aug_masks
def __augment_batch(self, img_batch, masks_batch):
for i in range(img_batch.shape[0]):
img_batch[i, ], masks_batch[i, ] = self.__random_transform(
img_batch[i, ], masks_batch[i, ])
return img_batch, masks_batch
def get_labels(self):
if self.shuffle:
images_current = self.list_IDs[:self.len * self.batch_size]
labels = [img_to_ohe_vector[img] for img in images_current]
else:
labels = self.labels
return np.array(labels)
数据增强的实现是通过albumentations这个工具包实现的,当然你可以自己定义数据增强的方式,这个利用cv2的函数可以很方便
的实现。
第三步:定义模型结构
一个很简单的U-NET模型结构。
def unet(input_shape):
inputs = Input(shape=input_shape)
'elu---> Exponential linear unit'
c1 = Conv2D(8,(3,3),activation='elu',padding='same')(inputs)
c1 = Conv2D(8,(3,3),activation='elu',padding='same')(c1)
p1 = MaxPooling2D((2,2),padding='same')(c1)
c2 = Conv2D(16,(3,3),activation='elu',padding='same')(p1)
c2 = Conv2D(16,(3,3),activation='elu',padding='same')(c2)
p2 = MaxPooling2D((2,2),padding='same')(c2)
c3 = Conv2D(32,(3,3),activation='elu',padding='same')(p2)
c3 = Conv2D(32,(3,3),activation='elu',padding='same')(c3)
p3 = MaxPooling2D((2,2),padding='same')(c3)
c4 = Conv2D(64,(3,3),activation='elu',padding='same')(p3)
c4 = Conv2D(64,(3,3),activation='elu',padding='same')(c4)
p4 = MaxPooling2D((2,2),padding='same')(c4)
c5 = Conv2D(64,(3,3),activation='elu',padding='same')(p4)
c5 = Conv2D(64,(3,3),activation='elu',padding='same')(c5)
p5 = MaxPooling2D((2,2),padding='same')(c5)
c55 = Conv2D(128,(3,3),activation='elu',padding='same')(p5)
c55 = Conv2D(128,(3,3),activation='elu',padding='same')(c55)
u6 = Conv2DTranspose(64,(2,2),strides=(2,2),padding='same')(c55)
u6 = concatenate([u6,c5])
c6 = Conv2D(64,(3,3),activation='elu',padding='same')(u6)
c6 = Conv2D(64,(3,3),activation='elu',padding='same')(c6)
u71 = Conv2DTranspose(32,(2,2),strides=(2,2),padding='same')(c6)
u71 = concatenate([u71,c4])
c71 = Conv2D(32,(3,3),activation='elu',padding='same')(u71)
c61 = Conv2D(32,(3,3),activation='elu',padding='same')(c71)
u7 = Conv2DTranspose(32,(2,2),strides=(2,2),padding='same')(c61)
u7 = concatenate([u7,c3])
c7 = Conv2D(32,(3,3),activation='elu',padding='same')(u7)
c7 = Conv2D(32,(3,3),activation='elu',padding='same')(c7)
u8 = Conv2DTranspose(16,(2,2),strides=(2,2),padding='same')(c7)
u8 = concatenate([u8,c2])
c8 = Conv2D(16,(3,3),activation='elu',padding='same')(u8)
c8 = Conv2D(16,(3,3),activation='elu',padding='same')(c8)
u9 = Conv2DTranspose(8,(2,2),strides=(2,2),padding='same')(c8)
u9 = concatenate([u9,c1],axis=3)
c9 = Conv2D(8,(3,3),activation='elu',padding='same')(u9)
c9 = Conv2D(8,(3,3),activation='elu',padding='same')(c9)
outputs = Conv2D(4,(1,1),activation='sigmoid')(c9)
model = Model(inputs=[inputs],outputs=[outputs])
return model
需要注意的是,outputs中卷积核的数目与实际的类别数保持一致。当然,你可以根据自己的想象改变模型结构,直到得出最适合
你的最优结构。
第四步:定义损失函数
对于outputs的每一个channel实际上都是一个二分类问题,所以可以选择binary_crossentropy加上dice_loss函数。你也可以只用
binary_crossentropy,博主也是参考的别人的经验,你可以自己尝试更优秀的loss函数。
def dice_loss(y_true,y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = y_true_f * y_pred_f
score = (2. * K.sum(intersection)+smooth) / (K.sum(y_true_f)+K.sum(y_pred_f)+smooth)
return 1. - score
def bce_dice_loss(y_true,y_pred):
return binary_crossentropy(y_true,y_pred) + dice_loss(y_true,y_pred)
由于引入了dice_loss,所以模型的metrics不再是'accuracy',选择dice_coef作为模型的metrics。
def dice_coef(y_true,y_pred,smooth=1):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2.* intersection + smooth) / (K.sum(y_true_f)+K.sum(y_pred_f)+smooth)
第五步:训练
定义模型的optimizer,选择SGD也可以选择Adam或者别的
设置learning_rate的衰减schedule
设置earlyStopping
定义参数存储位置
设置batch_size以及epochs
训练
earlystopping = EarlyStopping(monitor='loss',patience=config.es_patience)
reduce_lr = ReduceLROnPlateau(monitor='loss',patience=config.rlrop_patience,factor=config.decay_drop,min_lr=1e-6)
checkpoint = ModelCheckpoint(filepath='weights-{epoch:03d}-{loss:.2f}.h5',monitor='loss',save_best_only=False,save_weights_only=True)
metric_list = [dice_coef]
callback_list = [earlystopping,reduce_lr,checkpoint]
optimizer = Adam(lr=config.learning_rate)
model.compile(optimizer=optimizer,loss=bce_dice_loss,metrics=metric_list)
checkpoint.set_model(model)
model.fit_generator(train_generator,validation_data=val_generator,callbacks=callback_list,epochs=100,initial_epoch=0)
结论
训练100个epoch,测试集上的acc=0.55。
完整的项目地址:https://github.com/luckydog5/underStandingCloud.git
欢迎挑BUG。。