本文链接：https://blog.csdn.net/baidu_40840693/article/details/105810362

作为一个tensorflow的初学者，感觉这个框架好。。。。。被动

之前看tensorflow的代码时候，遇到

使用了如下的方式，进行数据读取，和batch打包，如果我们想对每一个batch载入的图像数据增强，应该写在_parse_data中：

dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks, attributes,euler_angles))
dataset = dataset.map(_parse_data)
dataset = dataset.shuffle(buffer_size=10000)
.......
train_dataset, num_train_file = DateSet(args.file_list, args, debug)
batch_train_dataset = train_dataset.batch(args.batch_size).repeat()
train_next_element = train_iterator.get_next()

但是，此时的数据已经是tensor格式，如果我们的数据增强全是numpy和opencv形式的自定义数据增强，我们该怎么处理，

原本我听说过@tf.function，准备试试，但是发现这是2.0的函数，并且这是把numpy行为写入tensorflow图的函数

查找了许多资料，找到了解决方式

这时候需要引入tf.py_func ：

dataset = dataset.map(
    lambda filename, landmarks, euler_angles: tf.py_func(
        _read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))

具体参考： http://d0evi1.com/tensorflow/datasets/

原始https://github.com/guoqiangqi/PFLD的代码如下：

import tensorflow as tf
import numpy as np
import cv2

def DateSet(file_list, args, debug=False):
    file_list, landmarks, attributes,euler_angles = gen_data(file_list)
    if debug:
        n = args.batch_size * 10
        file_list = file_list[:n]
        landmarks = landmarks[:n]
        attributes = attributes[:n]
        euler_angles=euler_angles[:n]
    dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks, attributes,euler_angles))

    def _parse_data(filename, landmarks, attributes,euler_angles):
        # filename, landmarks, attributes = data
        file_contents = tf.read_file(filename)
        image = tf.image.decode_png(file_contents, channels=args.image_channels)
        # print(image.get_shape())
        # image.set_shape((args.image_size, args.image_size, args.image_channels))
        image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
        image = tf.cast(image, tf.float32)

        image = image / 256.0
        return (image, landmarks, attributes,euler_angles)

    dataset = dataset.map(_parse_data)
    dataset = dataset.shuffle(buffer_size=10000)
    return dataset, len(file_list)

def gen_data(file_list):
    with open(file_list,'r') as f:
        lines = f.readlines()
    filenames, landmarks,attributes,euler_angles = [], [], [],[]
    for line in lines:
        line = line.strip().split()
        path = line[0]
        landmark = line[1:197]
        attribute = line[197:203]
        euler_angle = line[203:206]

        landmark = np.asarray(landmark, dtype=np.float32)
        attribute = np.asarray(attribute, dtype=np.int32)
        euler_angle = np.asarray(euler_angle,dtype=np.float32)
        filenames.append(path)
        landmarks.append(landmark)
        attributes.append(attribute)
        euler_angles.append(euler_angle)
        
    filenames = np.asarray(filenames, dtype=np.str)
    landmarks = np.asarray(landmarks, dtype=np.float32)
    attributes = np.asarray(attributes, dtype=np.int32)
    euler_angles = np.asarray(euler_angles,dtype=np.float32)
    return (filenames, landmarks, attributes,euler_angles)


if __name__ == '__main__':
    file_list = 'data/train_data/list.txt'
    filenames, landmarks, attributes = gen_data(file_list)
    for i in range(len(filenames)):
        filename = filenames[i]
        landmark = landmarks[i]
        attribute = attributes[i]
        print(attribute)
        img = cv2.imread(filename)
        h,w,_ = img.shape
        landmark = landmark.reshape(-1,2)*[h,w]
        for (x,y) in landmark.astype(np.int32):
            cv2.circle(img, (x,y),1,(0,0,255))
        cv2.imshow('0', img)
        cv2.waitKey(0)

改进后：

import tensorflow as tf
import numpy as np
import cv2
import random

def DateSet(file_list, args, debug=False):
    file_list, landmarks,euler_angles = gen_data(file_list)
    if debug:
        n = args.batch_size * 10
        file_list = file_list[:n]
        landmarks = landmarks[:n]
        #attributes = attributes[:n]
        euler_angles=euler_angles[:n]

    dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks,euler_angles))

    # yangninghua
    def _read_py_function(filename, landmarks, euler_angles):
        #print(filename.decode('ascii'))
        image_decoded = cv2.imread(filename.decode('ascii'))
        H,W = image_decoded.shape[:2]

        alpha = random.choice([-15, 15])
        # debug
        #print("alpha:", alpha)
        flag = np.random.randint(2)

        # debug
        # print("landmarks: ", landmarks)
        # print("euler_angles: ", euler_angles)
        # new_annotation = []
        # for key in range(0, len(landmarks), 2):
        #     new_annotation.append(landmarks[key])
        #     new_annotation.append(landmarks[key+1])
        # temp = np.array(new_annotation)
        # print("new_landmarks: ", temp)

        if flag==0:
            center = ((0 + W-1) / 2, (0 + H-1) / 2)
            rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
            # debug
            #print("rot_mat: ", rot_mat)

            image_decoded = cv2.warpAffine(image_decoded, rot_mat, (image_decoded.shape[1], image_decoded.shape[0]))
            new_annotation = []
            for key in range(0, len(landmarks), 2):
                x = landmarks[key]*W
                y = landmarks[key+1]*H
                new_annotation.append((rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2])/W)
                new_annotation.append((rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2])/H)
            landmarks = np.array(new_annotation)
            landmarks = landmarks.astype('float32')
            print(euler_angles)
            euler_angles[1] = euler_angles[1]+alpha
            print(euler_angles)
        return image_decoded, landmarks, euler_angles

    # yangninghua
    def _resize_function(image_decoded, landmarks, euler_angles):
        image_decoded.set_shape([None, None, None])
        image = tf.image.resize_images(image_decoded, (args.image_size, args.image_size), method=0)
        image = tf.cast(image, tf.float32)
        image = image / 256.0
        return (image, landmarks, euler_angles)

    # def _parse_data(filename, landmarks,euler_angles):
    #     file_contents = tf.read_file(filename)
    #     file_contents, landmarks, euler_angles = rotate(file_contents, landmarks, euler_angles)
    #     image = tf.image.decode_png(file_contents, channels=args.image_channels)
    #     image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
    #     image = tf.cast(image, tf.float32)
    #     image = image / 256.0
    #     return (image, landmarks, euler_angles)

    #dataset = dataset.map(_parse_data)

    # yangninghua
    dataset = dataset.map(
        lambda filename, landmarks, euler_angles: tf.py_func(
            _read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))
    dataset = dataset.map(_resize_function)

    dataset = dataset.shuffle(buffer_size=10000)
    return dataset, len(file_list)

def gen_data(file_list):
    with open(file_list,'r') as f:
        lines = f.readlines()
    filenames, landmarks,attributes,euler_angles = [], [], [],[]
    for line in lines:
        line = line.strip().split()
        path = line[0]
        landmark = line[1:43]
        attribute = line[43:49]
        euler_angle = line[49:52]

        landmark = np.asarray(landmark, dtype=np.float32)
        attribute = np.asarray(attribute, dtype=np.int32)
        euler_angle = np.asarray(euler_angle,dtype=np.float32)
        sub1 = path.split("/")[-1]
        sub2 = "D:/code/python/untitled/data/train_data/imgs/"
        path = sub2 + sub1
        filenames.append(path)
        landmarks.append(landmark)
        attributes.append(attribute)
        euler_angles.append(euler_angle)
        
    filenames = np.asarray(filenames, dtype=np.str)
    landmarks = np.asarray(landmarks, dtype=np.float32)
    attributes = np.asarray(attributes, dtype=np.int32)
    euler_angles = np.asarray(euler_angles,dtype=np.float32)
    return (filenames, landmarks, euler_angles)


if __name__ == '__main__':
    file_list = 'data/train_data/list.txt'
    filenames, landmarks, attributes = gen_data(file_list)
    for i in range(len(filenames)):
        filename = filenames[i]
        landmark = landmarks[i]
        attribute = attributes[i]
        print(attribute)
        img = cv2.imread(filename)
        h,w,_ = img.shape
        landmark = landmark.reshape(-1,2)*[h,w]
        for (x,y) in landmark.astype(np.int32):
            cv2.circle(img, (x,y),1,(0,0,255))
        cv2.imshow('0', img)
        cv2.waitKey(0)

同样的方式调库，数据增强，albumentations

import tensorflow as tf
import numpy as np
import cv2
import random

# from  albumentations  import (
#     HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
#     Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
#     IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
#     IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose
# ) # 图像变换函数
import albumentations as albu

def DateSet(file_list, args, debug=False):
    file_list, landmarks,euler_angles = gen_data(file_list)
    if debug:
        n = args.batch_size * 10
        file_list = file_list[:n]
        landmarks = landmarks[:n]
        #attributes = attributes[:n]
        euler_angles=euler_angles[:n]

    dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks,euler_angles))

    def create_transformer(transformations, image, landmarks):
        return albu.Compose(transformations, p=1,
                            keypoint_params=albu.KeypointParams(format='xy'))(image=image, keypoints=landmarks)

    def vis_points(image, points, diameter=2):
        H, W = image.shape[:2]
        im = image.copy()

        for key in range(0, len(points), 2):
            x = points[key] * W
            y = points[key + 1] * H
            cv2.circle(im, (int(x), int(y)), diameter, (0, 255, 0), -1)

        cv2.imshow("img", im)
        cv2.waitKey(10)

    # yangninghua
    def _read_py_function(filename, landmarks, euler_angles):
        #print(filename.decode('ascii'))
        image_decoded = cv2.imread(filename.decode('ascii'))
        if image_decoded is None:
            exit("yang error")
        H,W = image_decoded.shape[:2]

        #alpha = random.choice([-15, 15])
        #alpha = np.random.uniform(min_val=-15, max_val=15)
        alpha = np.random.randint(-15, 15)
        # debug
        #print("alpha:", alpha)
        flag = np.random.randint(2)
        # debug
        # print("landmarks: ", landmarks)
        # print("euler_angles: ", euler_angles)
        # new_annotation = []
        # for key in range(0, len(landmarks), 2):
        #     new_annotation.append(landmarks[key])
        #     new_annotation.append(landmarks[key+1])
        # temp = np.array(new_annotation)
        # print("new_landmarks: ", temp)

        if flag==0:
            center = ((0 + W-1) / 2, (0 + H-1) / 2)
            rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
            # debug
            #print("rot_mat: ", rot_mat)

            image_decoded = cv2.warpAffine(image_decoded, rot_mat, (image_decoded.shape[1], image_decoded.shape[0]))
            new_annotation = []
            for key in range(0, len(landmarks), 2):
                x = landmarks[key]*W
                y = landmarks[key+1]*H
                new_annotation.append((rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2])/W)
                new_annotation.append((rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2])/H)
            landmarks = np.array(new_annotation)
            landmarks = landmarks.astype('float32')
            #print(euler_angles)
            euler_angles[1] = euler_angles[1]+alpha
            #print(euler_angles)
            #vis_points(image_decoded, landmarks)

        #饱和度 色调
        #亮度 对比度
        #
        #运动模糊
        transformed = create_transformer(
            [albu.OneOf([albu.HueSaturationValue(p=0.5),
                         albu.RGBShift(p=0.7)], p=1),
             albu.Blur(blur_limit=11, p=0.5),
             albu.MotionBlur(blur_limit=37, p=0.5),
             albu.GaussNoise(),
             albu.RandomBrightnessContrast(p=0.5)], image_decoded, landmarks)

        landmarks = transformed['keypoints']
        landmarks = landmarks.astype('float32')
        image_decoded = transformed['image']

        vis_points(image_decoded, landmarks)

        return image_decoded, landmarks, euler_angles

    # yangninghua
    def _resize_function(image_decoded, landmarks, euler_angles):
        image_decoded.set_shape([None, None, None])
        image = tf.image.resize_images(image_decoded, (args.image_size, args.image_size), method=0)
        image = tf.cast(image, tf.float32)
        image = image / 256.0
        return (image, landmarks, euler_angles)

    # def _parse_data(filename, landmarks,euler_angles):
    #     file_contents = tf.read_file(filename)
    #     file_contents, landmarks, euler_angles = rotate(file_contents, landmarks, euler_angles)
    #     image = tf.image.decode_png(file_contents, channels=args.image_channels)
    #     image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
    #     image = tf.cast(image, tf.float32)
    #     image = image / 256.0
    #     return (image, landmarks, euler_angles)

    #dataset = dataset.map(_parse_data)
    # yangninghua
    dataset = dataset.map(
        lambda filename, landmarks, euler_angles: tf.py_func(
            _read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))
    dataset = dataset.map(_resize_function)

    dataset = dataset.shuffle(buffer_size=10000)
    return dataset, len(file_list)

def gen_data(file_list):
    with open(file_list,'r') as f:
        lines = f.readlines()
    filenames, landmarks,attributes,euler_angles = [], [], [],[]
    for line in lines:
        line = line.strip().split()
        path = line[0]
        landmark = line[1:43]
        attribute = line[43:49]
        euler_angle = line[49:52]

        landmark = np.asarray(landmark, dtype=np.float32)
        attribute = np.asarray(attribute, dtype=np.int32)
        euler_angle = np.asarray(euler_angle,dtype=np.float32)
        # sub1 = path.split("/")[-1]
        # sub2 = "D:/code/python/untitled/data/train_data/imgs/"
        # path = sub2 + sub1
        filenames.append(path)
        landmarks.append(landmark)
        attributes.append(attribute)
        euler_angles.append(euler_angle)
        
    filenames = np.asarray(filenames, dtype=np.str)
    landmarks = np.asarray(landmarks, dtype=np.float32)
    attributes = np.asarray(attributes, dtype=np.int32)
    euler_angles = np.asarray(euler_angles,dtype=np.float32)
    return (filenames, landmarks, euler_angles)


if __name__ == '__main__':
    file_list = 'data/train_data/list.txt'
    filenames, landmarks, attributes = gen_data(file_list)
    for i in range(len(filenames)):
        filename = filenames[i]
        landmark = landmarks[i]
        attribute = attributes[i]
        print(attribute)
        img = cv2.imread(filename)
        h,w,_ = img.shape
        landmark = landmark.reshape(-1,2)*[h,w]
        for (x,y) in landmark.astype(np.int32):
            cv2.circle(img, (x,y),1,(0,0,255))
        cv2.imshow('0', img)
        cv2.waitKey(0)