yolo 学习系列（六）：数据增广与模型转换

最新推荐文章于 2023-12-24 13:02:14 发布

la_fe_

最新推荐文章于 2023-12-24 13:02:14 发布

阅读量1.9k

点赞数 2

分类专栏： YOLO 目标检测

本文链接：https://blog.csdn.net/la_fe_/article/details/84579346

版权

YOLO 目标检测专栏收录该内容

10 篇文章 4 订阅

订阅专栏

Tensorflow-cpu + Keras安装

1. 框架安装

ubuntu下有两个版本的 python，即 python2.7 和 python3.5
在终端输入 python 默认打开的是 python2.7
输入 python3 打开的是 python3.5

本文装的是 python3.5 下的 tensorflow 和 keras

sudo pip3 install tensorflow
sudo pip3 install keras
# 打开 python3，输入以下命令无报错即可
import tensorflow 
import keras
Using TensorFlow backend

2. Keras数据增广

数据集不足时需要进行数据增广
参考这里

2.1. 数据增广方式

数据增广具有多种方式，
从几何角度来讲，包括平移、旋转、镜像、裁剪、缩放等；
从像素角度来看，包括颜色抖动，添加椒盐噪音和高斯噪音干扰等，也可以进行不同操作间的排列组合。

2.1. Keras 数据增广

配置好以上环境后，直接运行 data_augmentation_by_keras.py 文件即可
具体代码原理见参考文献

from keras.preprocessing.image import ImageDataGenerator,array_to_img,img_to_array,load_img
datagen=ImageDataGenerator(
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      rescale=1./255,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')
img=load_img('test/00216.jpg')
x=img_to_array(img)
x=x.reshape((1,)+x.shape)

i=0
for batch in datagen.flow(x,batch_size=1,
                         save_to_dir='1',save_prefix='cucumber',save_format='jpg'):
    i+=1
    if i>50:
       break

如果不想配置以上环境，可运行以下代码，实现对图片的镜像，翻转和加噪

import cv2
import numpy as np
import os.path
import copy
 
 
def rotate(image, angle, center=None, scale=1.0):
    (h, w) = image.shape[:2]
    # If no rotation center is specified, the center of the image is set as the rotation center
    if center is None:
        center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, angle, scale)
    rotated = cv2.warpAffine(image, M, (w, h))
    return rotated
 
 
def noiseing(img):
    #img = cv2.cvtColor(rgbimg, cv2.COLOR_BGR2GRAY)
    param = 30
    grayscale = 256
    w = img.shape[1]
    h = img.shape[0]
    newimg = np.zeros((h, w, 3), np.uint8)
    #row and col
    for x in xrange(0, h):
        for y in xrange(0, w, 2): #Avoid exceeding boundaries
            r1 = np.random.random_sample()
            r2 = np.random.random_sample()
            z1 = param * np.cos(2 * np.pi * r2) * np.sqrt((-2) * np.log(r1))
            z2 = param * np.sin(2 * np.pi * r2) * np.sqrt((-2) * np.log(r1))
 
            fxy_0 = int(img[x, y, 0] + z1)
            fxy_1 = int(img[x, y, 1] + z1)
            fxy_2 = int(img[x, y, 2] + z1)
            fxy1_0 = int(img[x, y + 1, 0] + z2)
            fxy1_1 = int(img[x, y + 1, 1] + z2)
            fxy1_2 = int(img[x, y + 1, 2] + z2)
            # f(x,y)
            if fxy_0 < 0:
                fxy_val_0 = 0
            elif fxy_0 > grayscale - 1:
                fxy_val_0 = grayscale - 1
            else:
                fxy_val_0 = fxy_0
            if fxy_1 < 0:
                fxy_val_1 = 0
            elif fxy_1 > grayscale - 1:
                fxy_val_1 = grayscale - 1
            else:
                fxy_val_1 = fxy_1
            if fxy_2 < 0:
                fxy_val_2 = 0
            elif fxy_2 > grayscale - 1:
                fxy_val_2 = grayscale - 1
            else:
                fxy_val_2 = fxy_2
            # f(x,y+1)
            if fxy1_0 < 0:
                fxy1_val_0 = 0
            elif fxy1_0 > grayscale - 1:
                fxy1_val_0 = grayscale - 1
            else:
                fxy1_val_0 = fxy1_0
            if fxy1_1 < 0:
                fxy1_val_1 = 0
            elif fxy1_1 > grayscale - 1:
                fxy1_val_1 = grayscale - 1
            else:
                fxy1_val_1 = fxy1_1
            if fxy1_2 < 0:
                fxy1_val_2 = 0
            elif fxy1_2 > grayscale - 1:
                fxy1_val_2 = grayscale - 1
            else:
                fxy1_val_2 = fxy1_2
 
            newimg[x, y, 0] = fxy_val_0
            newimg[x, y, 1] = fxy_val_1
            newimg[x, y, 2] = fxy_val_2
            newimg[x, y + 1, 0] = fxy1_val_0
            newimg[x, y + 1, 1] = fxy1_val_1
            newimg[x, y + 1, 2] = fxy1_val_2
 
        #newimg = cv2.cvtColor(newimg, cv2.COLOR_GRAY2RGB)
    cv2.destroyAllWindows()
    return newimg
 
 
 
#i = 0
# 注意：该路径下应该是一个文件夹，而不是图片
# 将所有图片放进resize文件夹下的data文件夹内
file_dir = "/home/chris/darknet/scripts/VOCdevkit/VOC2007/resize/"
for class_name in os.listdir(file_dir):
#for index,name in enumerate(classes):
    class_path = file_dir+class_name+"/"
    for img_name in os.listdir(class_path):
        img_path = class_path + img_name
        image = cv2.imread(img_path)
 
        #Simple rotation 90 degrees
        rotated = rotate(image, 90)
        cv2.imwrite(class_path + '/' + img_name[0:7] +'_ro90.jpg', rotated)
 
        #Rotate 180 degrees and add Gaussian noise
        rotated = rotate(image, 180)
#        if __name__ == '__main__':
            #print 'load %s ...' % fn
            #img = cv2.imread(rotated)
#            coutn = 100000
#            for k in xrange(0, coutn):
                # get the random point
#                xi = int(np.random.uniform(0, rotated.shape[1]))
#                xj = int(np.random.uniform(0, rotated.shape[0]))
#                # add noise
#                if rotated.ndim == 2:
#                    rotated[xj, xi] = 255
#                elif rotated.ndim == 3:
#                    rotated[xj, xi, 0] = 25
#                    rotated[xj, xi, 1] = 20
#                    rotated[xj, xi, 2] = 20
            #cv2.namedWindow('img')
            #cv2.imshow('img', img)
            #cv2.waitKey()
#            cv2.destroyAllWindows()
        #newimg = skimage.util.random_noise(rotated, mode='salt', seed=None, clip=False)
        newimg = noiseing(rotated)
        #newimg = cv2.cvtColor(newing, cv2.COLOR_GRAY2BGR)
        cv2.imwrite(class_path + '/' + img_name[0:7] + '_rono.jpg', newimg)
 
        #Image processing
        size = image.shape
        #Get an image that is the same as the original image, note this to use deep copy
        iLR = copy.deepcopy(image)
        h = size[0]
        w = size[1]
        for i in range(h):  # row and col
            for j in range(w):
                iLR[i, w - 1 - j] = image[i, j]  # Mirror formula
        cv2.imwrite(class_path + '/' + img_name[0:7] + '_mirr.jpg', iLR)

3. YOLOv2模型转换

将 darknet 框架的 cfg 和 weights 文件转换成 Keras 框架可识别的文件

3.1. YAD2K 下载

YAD2K 的运行需要 Tensorflow + Keras 的环境

git clone https://github.com/allanzelener/YAD2K

3.2. 模型转换

# 将 Darknet YOLOv2 model 转换为 Keras model 
# 注意：该工具只支持 YOLOv2 版本
python3 yad2k.py yolo.cfg yolo.weights model_data/yolo.h5

3.3. 模型测试

首先更改 YAD2K-master\model_data\coco_classes.txt 对应类别名称

# 图片位于 images/文件夹
# 输出结果在images/out/文件夹
python3 test_yolo.py model_data/yolo.h5

3.4. 其他

修改字体大小
打开test_yolo.py，跳转至 153 行打开test_yolo.py，跳转至 153 行

        font = ImageFont.truetype(
            font='font/FiraMono-Medium.otf',
            size=np.floor(4e-2 * image.size[1] + 0.5).astype('int32'))
	      # size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
        thickness = (image.size[0] + image.size[1]) // 300

名称修改
为了避免修改源码，将 yolov2-tiny-voc.cfg 和相应的权重文件名称修改为 yolo.cfg 和 yolo.weights

4. YOLOv3模型转换

4.1. keras-yolo3下载

git clone https://github.com/qqwweee/keras-yolo3

4.2. 模型转换

 # 注意：该工具只支持 YOLOv3 版本
python3 convert.py yolov3.cfg yolov3.weights model_data/yolo.h5

4.3. 模型测试

运行以下命令，按照提示输入图片名称即可

 # 图片位于 images/文件夹
 # 输出结果在images/out/文件夹
python3 yolo_video.py --image

在这里插入图片描述

4.4. 其他

yolov3-tiny 转换问题
在转换 yolov3-tiny 后进行测试时报错模型/anchors boxes 不一致
是因为默认加载的是 yolo_anchors.txt 而不是 tiny_yolo_anchors.txt
这两个文件内的比例数量是不一样的，只需将 tiny_yolo_anchors.txt 的名称暂时改为 yolo_anchors.txt 即可。

la_fe_

关注

2
点赞
踩
7

收藏

觉得还不错? 一键收藏
打赏
1
评论
yolo 学习系列（六）：数据增广与模型转换

Tensorflow-cpu + Keras安装1. 框架安装ubuntu下有两个版本的 python，即 python2.7 和 python3.5在终端输入 python 默认打开的是 python2.7输入 python3 打开的是 python3.5本文装的是 python3.5 下的 tensorflow 和 kerassudo pip3 install tensorf...
复制链接

扫一扫