tensorflow 图像处理:边框检测(Bounding Box Regression)

转自:Getting Started With Bounding Box Regression In TensorFlow

 

数据集

来自kaggle,有373 张图像,分为三类(黄瓜、茄子、蘑菇),图像的边框信息存放在XML 文件中

 

边框坐标表示

如图所示,一个边框可以用元祖(xmin,ymin,xmax,ymax)表示

 

数据集制作

可以借助LabelImg 软件,对指定的图片添加边框,会自动生成xml格式的边框数据文件

 

数据处理

下载image-localization-dataset.zip文件,解压;使用glob 包过滤jpg文件

import numpy as np
input_dim = 228

from PIL import Image , ImageDraw
import os
import glob

images = []
image_paths = glob.glob( 'training_images/*.jpg' )
for imagefile in image_paths:
    image = Image.open( imagefile ).resize( ( input_dim , input_dim ))
    image = np.asarray( image ) / 255.0
    images.append( image )

 

处理边框文件XML ,使用xmltodict 包将改类型文件转为python字典

import xmltodict
import os

bboxes = []
annotations_paths = glob.glob( 'training_images/*.xml' )
for xmlfile in annotations_paths:
    x = xmltodict.parse( open( xmlfile , 'rb' ) )
    bndbox = x[ 'annotation' ][ 'object' ][ 'bndbox' ]
    bndbox = np.array([ int(bndbox[ 'xmin' ]) , int(bndbox[ 'ymin' ]) , int(bndbox[ 'xmax' ]) , int(bndbox[ 'ymax' ]) ])
    bboxes.append( bndbox / input_dim )

 

划分训练集与测试集

from sklearn.model_selection import train_test_split
Y = np.array( bboxes )
X = np.array( images )

Y = np.reshape( Y , ( -1 , 1 , 1 , 4 ) )

print( X.shape ) 
print( Y.shape )

x_train, x_test, y_train, y_test = train_test_split( X, Y, test_size=0.1 )

 

建立keras模型

先定义损失函数,同时用均方根误差 ( MSE ) 和交并比Intersection over Union (IOU).

交并比是两个边框重合的面积与两者覆盖面积之和的比值

import tensorflow as tf
input_shape = ( input_dim , input_dim , 3 )
dropout_rate = 0.5
alpha = 0.2

def calculate_iou( target_boxes , pred_boxes ):
    xA = tf.math.maximum( target_boxes[ ... , 0], pred_boxes[ ... , 0] )
    yA = tf.math.maximum( target_boxes[ ... , 1], pred_boxes[ ... , 1] )
    xB = tf.math.maximum( target_boxes[ ... , 2], pred_boxes[ ... , 2] )
    yB = tf.math.maximum( target_boxes[ ... , 3], pred_boxes[ ... , 3] )
    interArea = tf.math.maximum( 0.0 , xB - xA ) * tf.math.maximum( 0.0 , yB - yA )
    boxAArea = (target_boxes[ ... , 2] - target_boxes[ ... , 0]) * (target_boxes[ ... , 3] - target_boxes[ ... , 1])
    boxBArea = (pred_boxes[ ... , 2] - pred_boxes[ ... , 0]) * (pred_boxes[ ... , 3] - pred_boxes[ ... , 1])
    iou = interArea / ( boxAArea + boxBArea - interArea )
    return iou

def custom_loss( y_true , y_pred ):
    mse = tf.losses.mean_squared_error( y_true , y_pred ) 
    iou = calculate_iou( y_true , y_pred ) 
    return mse + ( 1 - iou )

def iou_metric( y_true , y_pred ):
    return calculate_iou( y_true , y_pred ) 

创建 CNN  模型,堆叠部分Conv2D卷积层,将其输出展开并通过全连接Dense 层

为避免过拟合,在全连接层使用Dropout 和LeakyReLU 激活函数

import tensorflow.keras as keras
model_layers = [       
    keras.layers.Conv2D( 256 , input_shape=( input_dim , input_dim , 3 ) , kernel_size=( 3 , 3 ) , strides=2 , activation='relu' ),
    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=2 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 256 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 128 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 64 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),

    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 32 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.BatchNormalization(),
    
    keras.layers.Conv2D( 16 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 16 , kernel_size=( 3 , 3 ) , strides=1 , activation='relu' ),

    keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='relu' ),
    keras.layers.Conv2D( 4 , kernel_size=( 2 , 2 ) , strides=1 , activation='sigmoid' ),
]

model = keras.Sequential( model_layers )
model.compile(
	optimizer=keras.optimizers.Adam( lr=0.0001 ),
	loss=custom_loss,
    metrics=[ iou_metric ]
)
model.summary()

 

训练模型

model.fit( 
    x_train ,
    y_train , 
    validation_data=( x_test , y_test ),
    epochs=100 ,
    batch_size=3 
)
model.save( 'model.h5')

 

输出结果

!rm -rf inference_images
!mkdir  inference_images

boxes = model.predict( x_test )
for i in range( boxes.shape[0] ):
    b = boxes[ i , 0 , 0 , 0 : 4 ] * input_dim
    img = x_test[i] * 255
    source_img = Image.fromarray( img.astype( np.uint8 ) , 'RGB' )
    draw = ImageDraw.Draw( source_img )
    draw.rectangle( b , outline="black" )
    source_img.save( 'inference_images/image_{}.png'.format( i + 1 ) , 'png' )

 

 

 

 

 

 

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值