作为一个tensorflow的初学者,感觉这个框架好。。。。。被动
之前看tensorflow的代码时候,遇到
https://github.com/guoqiangqi/PFLD
使用了如下的方式,进行数据读取,和batch打包,如果我们想对每一个batch载入的图像数据增强,应该写在_parse_data中:
dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks, attributes,euler_angles))
dataset = dataset.map(_parse_data)
dataset = dataset.shuffle(buffer_size=10000)
.......
train_dataset, num_train_file = DateSet(args.file_list, args, debug)
batch_train_dataset = train_dataset.batch(args.batch_size).repeat()
train_next_element = train_iterator.get_next()
但是,此时的数据已经是tensor格式,如果我们的数据增强全是numpy和opencv形式的自定义数据增强,我们该怎么处理,
原本我听说过@tf.function,准备试试,但是发现这是2.0的函数,并且这是把numpy行为写入tensorflow图的函数
查找了许多资料,找到了解决方式
这时候需要引入tf.py_func :
dataset = dataset.map( lambda filename, landmarks, euler_angles: tf.py_func( _read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))
具体参考: http://d0evi1.com/tensorflow/datasets/
原始https://github.com/guoqiangqi/PFLD的代码如下:
import tensorflow as tf
import numpy as np
import cv2
def DateSet(file_list, args, debug=False):
file_list, landmarks, attributes,euler_angles = gen_data(file_list)
if debug:
n = args.batch_size * 10
file_list = file_list[:n]
landmarks = landmarks[:n]
attributes = attributes[:n]
euler_angles=euler_angles[:n]
dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks, attributes,euler_angles))
def _parse_data(filename, landmarks, attributes,euler_angles):
# filename, landmarks, attributes = data
file_contents = tf.read_file(filename)
image = tf.image.decode_png(file_contents, channels=args.image_channels)
# print(image.get_shape())
# image.set_shape((args.image_size, args.image_size, args.image_channels))
image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
image = tf.cast(image, tf.float32)
image = image / 256.0
return (image, landmarks, attributes,euler_angles)
dataset = dataset.map(_parse_data)
dataset = dataset.shuffle(buffer_size=10000)
return dataset, len(file_list)
def gen_data(file_list):
with open(file_list,'r') as f:
lines = f.readlines()
filenames, landmarks,attributes,euler_angles = [], [], [],[]
for line in lines:
line = line.strip().split()
path = line[0]
landmark = line[1:197]
attribute = line[197:203]
euler_angle = line[203:206]
landmark = np.asarray(landmark, dtype=np.float32)
attribute = np.asarray(attribute, dtype=np.int32)
euler_angle = np.asarray(euler_angle,dtype=np.float32)
filenames.append(path)
landmarks.append(landmark)
attributes.append(attribute)
euler_angles.append(euler_angle)
filenames = np.asarray(filenames, dtype=np.str)
landmarks = np.asarray(landmarks, dtype=np.float32)
attributes = np.asarray(attributes, dtype=np.int32)
euler_angles = np.asarray(euler_angles,dtype=np.float32)
return (filenames, landmarks, attributes,euler_angles)
if __name__ == '__main__':
file_list = 'data/train_data/list.txt'
filenames, landmarks, attributes = gen_data(file_list)
for i in range(len(filenames)):
filename = filenames[i]
landmark = landmarks[i]
attribute = attributes[i]
print(attribute)
img = cv2.imread(filename)
h,w,_ = img.shape
landmark = landmark.reshape(-1,2)*[h,w]
for (x,y) in landmark.astype(np.int32):
cv2.circle(img, (x,y),1,(0,0,255))
cv2.imshow('0', img)
cv2.waitKey(0)
改进后:
import tensorflow as tf
import numpy as np
import cv2
import random
def DateSet(file_list, args, debug=False):
file_list, landmarks,euler_angles = gen_data(file_list)
if debug:
n = args.batch_size * 10
file_list = file_list[:n]
landmarks = landmarks[:n]
#attributes = attributes[:n]
euler_angles=euler_angles[:n]
dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks,euler_angles))
# yangninghua
def _read_py_function(filename, landmarks, euler_angles):
#print(filename.decode('ascii'))
image_decoded = cv2.imread(filename.decode('ascii'))
H,W = image_decoded.shape[:2]
alpha = random.choice([-15, 15])
# debug
#print("alpha:", alpha)
flag = np.random.randint(2)
# debug
# print("landmarks: ", landmarks)
# print("euler_angles: ", euler_angles)
# new_annotation = []
# for key in range(0, len(landmarks), 2):
# new_annotation.append(landmarks[key])
# new_annotation.append(landmarks[key+1])
# temp = np.array(new_annotation)
# print("new_landmarks: ", temp)
if flag==0:
center = ((0 + W-1) / 2, (0 + H-1) / 2)
rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
# debug
#print("rot_mat: ", rot_mat)
image_decoded = cv2.warpAffine(image_decoded, rot_mat, (image_decoded.shape[1], image_decoded.shape[0]))
new_annotation = []
for key in range(0, len(landmarks), 2):
x = landmarks[key]*W
y = landmarks[key+1]*H
new_annotation.append((rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2])/W)
new_annotation.append((rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2])/H)
landmarks = np.array(new_annotation)
landmarks = landmarks.astype('float32')
print(euler_angles)
euler_angles[1] = euler_angles[1]+alpha
print(euler_angles)
return image_decoded, landmarks, euler_angles
# yangninghua
def _resize_function(image_decoded, landmarks, euler_angles):
image_decoded.set_shape([None, None, None])
image = tf.image.resize_images(image_decoded, (args.image_size, args.image_size), method=0)
image = tf.cast(image, tf.float32)
image = image / 256.0
return (image, landmarks, euler_angles)
# def _parse_data(filename, landmarks,euler_angles):
# file_contents = tf.read_file(filename)
# file_contents, landmarks, euler_angles = rotate(file_contents, landmarks, euler_angles)
# image = tf.image.decode_png(file_contents, channels=args.image_channels)
# image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
# image = tf.cast(image, tf.float32)
# image = image / 256.0
# return (image, landmarks, euler_angles)
#dataset = dataset.map(_parse_data)
# yangninghua
dataset = dataset.map(
lambda filename, landmarks, euler_angles: tf.py_func(
_read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))
dataset = dataset.map(_resize_function)
dataset = dataset.shuffle(buffer_size=10000)
return dataset, len(file_list)
def gen_data(file_list):
with open(file_list,'r') as f:
lines = f.readlines()
filenames, landmarks,attributes,euler_angles = [], [], [],[]
for line in lines:
line = line.strip().split()
path = line[0]
landmark = line[1:43]
attribute = line[43:49]
euler_angle = line[49:52]
landmark = np.asarray(landmark, dtype=np.float32)
attribute = np.asarray(attribute, dtype=np.int32)
euler_angle = np.asarray(euler_angle,dtype=np.float32)
sub1 = path.split("/")[-1]
sub2 = "D:/code/python/untitled/data/train_data/imgs/"
path = sub2 + sub1
filenames.append(path)
landmarks.append(landmark)
attributes.append(attribute)
euler_angles.append(euler_angle)
filenames = np.asarray(filenames, dtype=np.str)
landmarks = np.asarray(landmarks, dtype=np.float32)
attributes = np.asarray(attributes, dtype=np.int32)
euler_angles = np.asarray(euler_angles,dtype=np.float32)
return (filenames, landmarks, euler_angles)
if __name__ == '__main__':
file_list = 'data/train_data/list.txt'
filenames, landmarks, attributes = gen_data(file_list)
for i in range(len(filenames)):
filename = filenames[i]
landmark = landmarks[i]
attribute = attributes[i]
print(attribute)
img = cv2.imread(filename)
h,w,_ = img.shape
landmark = landmark.reshape(-1,2)*[h,w]
for (x,y) in landmark.astype(np.int32):
cv2.circle(img, (x,y),1,(0,0,255))
cv2.imshow('0', img)
cv2.waitKey(0)
同样的方式调库,数据增强,albumentations
import tensorflow as tf
import numpy as np
import cv2
import random
# from albumentations import (
# HorizontalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
# Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
# IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine,
# IAASharpen, IAAEmboss, RandomContrast, RandomBrightness, Flip, OneOf, Compose
# ) # 图像变换函数
import albumentations as albu
def DateSet(file_list, args, debug=False):
file_list, landmarks,euler_angles = gen_data(file_list)
if debug:
n = args.batch_size * 10
file_list = file_list[:n]
landmarks = landmarks[:n]
#attributes = attributes[:n]
euler_angles=euler_angles[:n]
dataset = tf.data.Dataset.from_tensor_slices((file_list, landmarks,euler_angles))
def create_transformer(transformations, image, landmarks):
return albu.Compose(transformations, p=1,
keypoint_params=albu.KeypointParams(format='xy'))(image=image, keypoints=landmarks)
def vis_points(image, points, diameter=2):
H, W = image.shape[:2]
im = image.copy()
for key in range(0, len(points), 2):
x = points[key] * W
y = points[key + 1] * H
cv2.circle(im, (int(x), int(y)), diameter, (0, 255, 0), -1)
cv2.imshow("img", im)
cv2.waitKey(10)
# yangninghua
def _read_py_function(filename, landmarks, euler_angles):
#print(filename.decode('ascii'))
image_decoded = cv2.imread(filename.decode('ascii'))
if image_decoded is None:
exit("yang error")
H,W = image_decoded.shape[:2]
#alpha = random.choice([-15, 15])
#alpha = np.random.uniform(min_val=-15, max_val=15)
alpha = np.random.randint(-15, 15)
# debug
#print("alpha:", alpha)
flag = np.random.randint(2)
# debug
# print("landmarks: ", landmarks)
# print("euler_angles: ", euler_angles)
# new_annotation = []
# for key in range(0, len(landmarks), 2):
# new_annotation.append(landmarks[key])
# new_annotation.append(landmarks[key+1])
# temp = np.array(new_annotation)
# print("new_landmarks: ", temp)
if flag==0:
center = ((0 + W-1) / 2, (0 + H-1) / 2)
rot_mat = cv2.getRotationMatrix2D(center, alpha, 1)
# debug
#print("rot_mat: ", rot_mat)
image_decoded = cv2.warpAffine(image_decoded, rot_mat, (image_decoded.shape[1], image_decoded.shape[0]))
new_annotation = []
for key in range(0, len(landmarks), 2):
x = landmarks[key]*W
y = landmarks[key+1]*H
new_annotation.append((rot_mat[0][0] * x + rot_mat[0][1] * y + rot_mat[0][2])/W)
new_annotation.append((rot_mat[1][0] * x + rot_mat[1][1] * y + rot_mat[1][2])/H)
landmarks = np.array(new_annotation)
landmarks = landmarks.astype('float32')
#print(euler_angles)
euler_angles[1] = euler_angles[1]+alpha
#print(euler_angles)
#vis_points(image_decoded, landmarks)
#饱和度 色调
#亮度 对比度
#
#运动模糊
transformed = create_transformer(
[albu.OneOf([albu.HueSaturationValue(p=0.5),
albu.RGBShift(p=0.7)], p=1),
albu.Blur(blur_limit=11, p=0.5),
albu.MotionBlur(blur_limit=37, p=0.5),
albu.GaussNoise(),
albu.RandomBrightnessContrast(p=0.5)], image_decoded, landmarks)
landmarks = transformed['keypoints']
landmarks = landmarks.astype('float32')
image_decoded = transformed['image']
vis_points(image_decoded, landmarks)
return image_decoded, landmarks, euler_angles
# yangninghua
def _resize_function(image_decoded, landmarks, euler_angles):
image_decoded.set_shape([None, None, None])
image = tf.image.resize_images(image_decoded, (args.image_size, args.image_size), method=0)
image = tf.cast(image, tf.float32)
image = image / 256.0
return (image, landmarks, euler_angles)
# def _parse_data(filename, landmarks,euler_angles):
# file_contents = tf.read_file(filename)
# file_contents, landmarks, euler_angles = rotate(file_contents, landmarks, euler_angles)
# image = tf.image.decode_png(file_contents, channels=args.image_channels)
# image = tf.image.resize_images(image, (args.image_size, args.image_size), method=0)
# image = tf.cast(image, tf.float32)
# image = image / 256.0
# return (image, landmarks, euler_angles)
#dataset = dataset.map(_parse_data)
# yangninghua
dataset = dataset.map(
lambda filename, landmarks, euler_angles: tf.py_func(
_read_py_function, [filename, landmarks, euler_angles], [tf.uint8, landmarks.dtype, euler_angles.dtype]))
dataset = dataset.map(_resize_function)
dataset = dataset.shuffle(buffer_size=10000)
return dataset, len(file_list)
def gen_data(file_list):
with open(file_list,'r') as f:
lines = f.readlines()
filenames, landmarks,attributes,euler_angles = [], [], [],[]
for line in lines:
line = line.strip().split()
path = line[0]
landmark = line[1:43]
attribute = line[43:49]
euler_angle = line[49:52]
landmark = np.asarray(landmark, dtype=np.float32)
attribute = np.asarray(attribute, dtype=np.int32)
euler_angle = np.asarray(euler_angle,dtype=np.float32)
# sub1 = path.split("/")[-1]
# sub2 = "D:/code/python/untitled/data/train_data/imgs/"
# path = sub2 + sub1
filenames.append(path)
landmarks.append(landmark)
attributes.append(attribute)
euler_angles.append(euler_angle)
filenames = np.asarray(filenames, dtype=np.str)
landmarks = np.asarray(landmarks, dtype=np.float32)
attributes = np.asarray(attributes, dtype=np.int32)
euler_angles = np.asarray(euler_angles,dtype=np.float32)
return (filenames, landmarks, euler_angles)
if __name__ == '__main__':
file_list = 'data/train_data/list.txt'
filenames, landmarks, attributes = gen_data(file_list)
for i in range(len(filenames)):
filename = filenames[i]
landmark = landmarks[i]
attribute = attributes[i]
print(attribute)
img = cv2.imread(filename)
h,w,_ = img.shape
landmark = landmark.reshape(-1,2)*[h,w]
for (x,y) in landmark.astype(np.int32):
cv2.circle(img, (x,y),1,(0,0,255))
cv2.imshow('0', img)
cv2.waitKey(0)