该篇博客主要分析别人的代码做个记录,为之后的自己的代码做个借鉴,使用tensorflow2,我想说比原来简单多了。
https://www.kaggle.com/ekhtiar/resunet-a-baseline-on-tensorflow
竞赛介绍
这个竞赛主要目的是定位和分类钢板表面缺陷,属于语义分割问题。
扁钢的生产工艺特别精细。从加热和轧制,到干燥和切割,几台机器在扁钢准备装运时接触到它。今天,Severstal使用来自高频摄像机的图像为缺陷检测算法提供能量。
数据描述
竞赛会提供给你4文件:第一个是train_images包含训练图片;第二个是test_images测试文件;第三个是sample_submission最终你提交文件的栗子;第四个train文件需要详细的说下。如下图:
一共三列:第一列:ImageId(图片编号);第二列:ClassId(类编号);第三例EncodedPixels(图像标签)。注意这个图像标签和我们平常遇到的不一样,平常的是一个mask里面有许多数字填充,背景为0,但是这里,为了缩小数据,它使用的是像素列位置-长度格式。举个例子:
我们把一个图像(h,w)flatten(注意不是按行而是按列),29102 12 29346 24 29602 24表示从29102像素位置开始的12长度均为非背景,后面以此类推。这就相当于在每个图像上画一条竖线。
一共有4种类别ClassId = [1, 2, 3, 4]。
代码分析
注意看注释!
#主要生成mask,根据rle_string(这玩意就是上面的EncodedPixels,图像标签)
# from https://www.kaggle.com/robertkag/rle-to-mask-converter
def rle_to_mask(rle_string,height,width):
'''
convert RLE(run length encoding) string to numpy array
Parameters:
rleString (str): Description of arg1
height (int): height of the mask
width (int): width of the mask
Returns:
numpy.array: numpy array of the mask
'''
rows, cols = height, width
if rle_string == -1:
return np.zeros((height, width))
else:
rleNumbers = [int(numstring) for numstring in rle_string.split(' ')]
#变换为多列格式(像素位置,长度)
rlePairs = np.array(rleNumbers).reshape(-1,2)
img = np.zeros(rows*cols,dtype=np.uint8)
#创建mask,此时还是一行
for index,length in rlePairs:
index -= 1
img[index:index+length] = 255
#以上画竖线,给出的+1了
img = img.reshape(cols,rows)#因为画的是竖线,所以cols为行
img = img.T#转置bianwei(rows,cols)
return img
以下为mask转为rle
def mask_to_rle(mask):
'''
Convert a mask into RLE
Parameters:
mask (numpy.array): binary mask of numpy array where 1 - mask, 0 - background
Returns:
sring: run length encoding
'''
pixels= mask.T.flatten()#转置之后再flatten同上
pixels = np.concatenate([[0], pixels, [0]])#加0是为了后面错位-
runs = np.where(pixels[1:] != pixels[:-1])[0] + 1#这里要+1
#其实就是再计算后面位置和前面位置的值相等;不等就输出索引号+1
runs[1::2] -= runs[::2]
#每一个不相等索引号相减,后一个-前一个就是长度
return ' '.join(str(x) for x in runs)
接下来,我们看下自定义的数据生成器
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, list_ids, labels, image_dir, batch_size=32,
img_h=256, img_w=512, shuffle=True):
self.list_ids = list_ids#image_id
self.labels = labels#标签rle
self.image_dir = image_dir#图像所在目录
self.batch_size = batch_size#不解释(*^_^*)
self.img_h = img_h
self.img_w = img_w
#图像输入高宽
self.shuffle = shuffle
self.on_epoch_end()
#在 end of epoch shuffle
def __len__(self):
'denotes the number of batches per epoch'
return int(np.floor(len(self.list_ids)) / self.batch_size)
def __getitem__(self, index):
'generate one batch of data'
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]#索引编号
# get list of IDs
list_ids_temp = [self.list_ids[k] for k in indexes]
#batch image id,file name
# generate data
X, y = self.__data_generation(list_ids_temp)
# return data
return X, y
def on_epoch_end(self):
'update ended after each epoch'
self.indexes = np.arange(len(self.list_ids))
if self.shuffle:
np.random.shuffle(self.indexes)
def __data_generation(self, list_ids_temp):
'generate data containing batch_size samples'
X = np.empty((self.batch_size, self.img_h, self.img_w, 1))
y = np.empty((self.batch_size, self.img_h, self.img_w, 4))
for idx, id in enumerate(list_ids_temp):
file_path = os.path.join(self.image_dir, id)
image = cv2.imread(file_path, 0)
image_resized = cv2.resize(image, (self.img_w, self.img_h))
#调整图片大小
image_resized = np.array(image_resized, dtype=np.float64)
# standardization of the image
image_resized -= image_resized.mean()
image_resized /= image_resized.std()
#图像 标准化
mask = np.empty((img_h, img_w, 4))
for idm, image_class in enumerate(['1','2','3','4']):
rle = self.labels.get(id + '_' + image_class)
# if there is no mask create empty mask
if rle is None:
class_mask = np.zeros((1600, 256))
else:
class_mask = rle_to_mask(rle, width=1600, height=256)
class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))#同时也要调整
mask[...,idm] = class_mask_resized
X[idx,] = np.expand_dims(image_resized, axis=2)
y[idx,] = mask
# normalize Y
y = (y > 0).astype(int)
return X, y
以上就是自定义数据生成器。
# create a dict of all the masks
masks = {}
for index, row in train_df[train_df['EncodedPixels']!=-1].iterrows():
masks[row['ImageId_ClassId']] = row['EncodedPixels']
注意这里的ImageId_ClassId是这样的形式imageid+"_"+ClassId
# repeat low represented samples more frequently to balance our dataset
#因为存在数据不平衡问题,所以需要重复数据
if repeat:
class_1_img_id = train_df[(train_df['EncodedPixels']!=-1) & (train_df['ClassId']=='1')]['ImageId'].values
class_1_img_id = np.repeat(class_1_img_id, class_1_repeat)
class_2_img_id = train_df[(train_df['EncodedPixels']!=-1) & (train_df['ClassId']=='2')]['ImageId'].values
class_2_img_id = np.repeat(class_2_img_id, class_2_repeat)
class_3_img_id = train_df[(train_df['EncodedPixels']!=-1) & (train_df['ClassId']=='3')]['ImageId'].values
class_3_img_id = np.repeat(class_3_img_id, class_3_repeat)
class_4_img_id = train_df[(train_df['EncodedPixels']!=-1) & (train_df['ClassId']=='4')]['ImageId'].values
class_4_img_id = np.repeat(class_4_img_id, class_4_repeat)
train_image_ids = np.concatenate([class_1_img_id, class_2_img_id, class_3_img_id, class_4_img_id])
else:
# split the training data into train and validation set (stratified)
train_image_ids = train_df['ImageId'].unique()
重复数据,缓解数据不平衡问题
X_train, X_val = train_test_split(train_image_ids, test_size=val_size, random_state=42)
params = {'img_h': img_h,
'img_w': img_w,
'image_dir': train_image_dir,
'batch_size': batch_size,
'shuffle': True}
# Get Generators
training_generator = DataGenerator(X_train, masks, **params)
validation_generator = DataGenerator(X_val, masks, **params)
#masks为字典。
实例化DataGenerator实例化
#define Resunet network
def bn_act(x, act=True):
'batch normalization layer with an optinal activation layer'
x = tf.keras.layers.BatchNormalization()(x)
if act == True:
x = tf.keras.layers.Activation('relu')(x)
return x
def conv_block(x, filters, kernel_size=3, padding='same', strides=1):
'convolutional layer which always uses the batch normalization layer'
conv = bn_act(x)
conv = Conv2D(filters, kernel_size, padding=padding, strides=strides)(conv)
return conv
def stem(x, filters, kernel_size=3, padding='same', strides=1):
conv = Conv2D(filters, kernel_size, padding=padding, strides=strides)(x)
conv = conv_block(conv, filters, kernel_size, padding, strides)
shortcut = Conv2D(filters, kernel_size=1, padding=padding, strides=strides)(x)
shortcut = bn_act(shortcut, act=False)
output = Add()([conv, shortcut])
return output
def residual_block(x, filters, kernel_size=3, padding='same', strides=1):
res = conv_block(x, filters, k_size, padding, strides)
res = conv_block(res, filters, k_size, padding, 1)
shortcut = Conv2D(filters, kernel_size, padding=padding, strides=strides)(x)
shortcut = bn_act(shortcut, act=False)
output = Add()([shortcut, res])
return output
def upsample_concat_block(x, xskip):
u = UpSampling2D((2,2))(x)
c = Concatenate()([u, xskip])
return c
def ResUNet(img_h, img_w):
f = [16, 32, 64, 128, 256]
inputs = Input((img_h, img_w, 1))
## Encoder
e0 = inputs
e1 = stem(e0, f[0])#主干网络
e2 = residual_block(e1, f[1], strides=2)
e3 = residual_block(e2, f[2], strides=2)
e4 = residual_block(e3, f[3], strides=2)
e5 = residual_block(e4, f[4], strides=2)
## Bridge
b0 = conv_block(e5, f[4], strides=1)
b1 = conv_block(b0, f[4], strides=1)
## Decoder
u1 = upsample_concat_block(b1, e4)
d1 = residual_block(u1, f[4])
u2 = upsample_concat_block(d1, e3)
d2 = residual_block(u2, f[3])
u3 = upsample_concat_block(d2, e2)
d3 = residual_block(u3, f[2])
u4 = upsample_concat_block(d3, e1)
d4 = residual_block(u4, f[1])
outputs = tf.keras.layers.Conv2D(4, (1, 1), padding="same", activation="sigmoid")(d4)
model = tf.keras.models.Model(inputs, outputs)
return model
以上创建resunet
#创建loss
# Dice similarity coefficient loss, brought to you by: https://github.com/nabsabraham/focal-tversky-unet
def dsc(y_true, y_pred):
smooth = 1.
y_true_f = Flatten()(y_true)
y_pred_f = Flatten()(y_pred)
intersection = reduce_sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (reduce_sum(y_true_f) + reduce_sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dsc(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
# Focal Tversky loss, brought to you by: https://github.com/nabsabraham/focal-tversky-unet
def tversky(y_true, y_pred, smooth=1e-6):
y_true_pos = tf.keras.layers.Flatten()(y_true)
y_pred_pos = tf.keras.layers.Flatten()(y_pred)
true_pos = tf.reduce_sum(y_true_pos * y_pred_pos)
false_neg = tf.reduce_sum(y_true_pos * (1-y_pred_pos))
false_pos = tf.reduce_sum((1-y_true_pos)*y_pred_pos)
alpha = 0.7
return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)
def tversky_loss(y_true, y_pred):
return 1 - tversky(y_true,y_pred)
def focal_tversky_loss(y_true,y_pred):
pt_1 = tversky(y_true, y_pred)
gamma = 0.75
return tf.keras.backend.pow((1-pt_1), gamma)
以上为create loss部分
这个竞赛的评估标准是Dice coefficient。公式:
2
∗
∣
X
∩
Y
∣
∣
X
∣
+
∣
Y
∣
\frac{2 * |X \cap Y|}{|X| + |Y|}
∣X∣+∣Y∣2∗∣X∩Y∣
X
X
X 是预测标签,
Y
Y
Y是真实标签。以上dsc函数在此基础上,加入smooth(平滑系数)。
2
∗
∣
X
∩
Y
∣
+
s
m
o
o
t
h
∣
X
∣
+
∣
Y
∣
+
s
m
o
o
t
h
\frac{2 * |X \cap Y|+smooth}{|X| + |Y|+smooth}
∣X∣+∣Y∣+smooth2∗∣X∩Y∣+smooth。
还要提到上面实现的Tversky 系数,Tversky系数是Dice系数和 Jaccard 系数的一种广义系数,公式如下:
T
(
A
,
B
)
=
∣
A
⋂
B
∣
∣
A
⋂
B
∣
+
α
∣
A
−
B
∣
+
β
∣
B
−
A
∣
T(A,B) = \frac{|A \bigcap B|}{|A \bigcap B| + \alpha |A - B| + \beta |B - A|}
T(A,B)=∣A⋂B∣+α∣A−B∣+β∣B−A∣∣A⋂B∣
A
A
A 是预测标签,
B
B
B是真实标签(其实反过来也一样)
model = ResUNet(img_h=img_h, img_w=img_w)
adam = tf.keras.optimizers.Adam(lr = 0.05, epsilon = 0.1)
model.compile(optimizer=adam, loss=focal_tversky_loss, metrics=[tversky])
if load_pretrained_model:
try:
model.load_weights(pretrained_model_path)
print('pre-trained model loaded!')
except OSError:
print('You need to run the model and load the trained model')
history = model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=epochs, verbose=1)
以上为拟合数据,如果之前保存了权重。
def get_test_tensor(img_dir, img_h, img_w, channels=1):
X = np.empty((1, img_h, img_w, channels))
# Store sample
image = cv2.imread(img_dir, 0)
image_resized = cv2.resize(image, (img_w, img_h))
image_resized = np.array(image_resized, dtype=np.float64)
# normalize image
image_resized -= image_resized.mean()
image_resized /= image_resized.std()
X[0,] = np.expand_dims(image_resized, axis=2)
return X
# this is an awesome little function to remove small spots in our predictions
from skimage import morphology
def remove_small_regions(img, size):
"""Morphologically removes small (less than size) connected regions of 0s or 1s."""
img = morphology.remove_small_objects(img, size)
#去除边缘大面积噪声
img = morphology.remove_small_holes(img, size)
#
return img
import glob
# get all files using glob
test_files = [f for f in glob.glob('../input/severstal-steel-defect-detection/test_images/' + "*.jpg", recursive=True)]
submission = []
# a function to apply all the processing steps necessery to each of the individual masks
def process_pred_mask(pred_mask):
pred_mask = cv2.resize(pred_mask.astype('float32'),(1600, 256))
pred_mask = (pred_mask > .5).astype(int)
pred_mask = remove_small_regions(pred_mask, 0.02 * np.prod(512)) * 255
pred_mask = mask_to_rle(pred_mask)
return pred_mask
# loop over all the test images
for f in test_files:
# get test tensor, output is in shape: (1, 256, 512, 3)
test = get_test_tensor(f, img_h, img_w)
# get prediction, output is in shape: (1, 256, 512, 4)
pred_masks = model.predict(test)
# get a list of masks with shape: 256, 512
pred_masks = [pred_masks[0][...,i] for i in range(0,4)]
# apply all the processing steps to each of the mask
pred_masks = [process_pred_mask(pred_mask) for pred_mask in pred_masks]
# get our image id
id = f.split('/')[-1]
# create ImageId_ClassId and get the EncodedPixels for the class ID, and append to our submissions list
[submission.append((id+'_%s' % (k+1), pred_mask)) for k, pred_mask in enumerate(pred_masks)]
以上用得出的模型算出最终的模型结果。