深度学习数据增强：旋转图像跟关键点坐标

最新推荐文章于 2024-08-14 12:00:10 发布
OoFrizyoO
最新推荐文章于 2024-08-14 12:00:10 发布
阅读量985
点赞数
分类专栏：深度学习机器学习 opencv
本文链接：https://blog.csdn.net/u011291667/article/details/103022740
版权
深度学习同时被 3 个专栏收录
16 篇文章 0 订阅
订阅专栏
opencv
5 篇文章 0 订阅
订阅专栏
机器学习
4 篇文章 0 订阅
订阅专栏
旋转图片：
$dst\_img = \begin{bmatrix} 1 & 0 & w/2\\ 0 & 1 & h/2\\ 0 & 0 & 1 \end{bmatrix}\ast\begin{bmatrix} \cos\theta & \sin\theta & 1\\ -\sin\theta & \cos\theta & 1\\ 0 & 0 & 1 \end{bmatrix}\ast\begin{bmatrix} 1 & 0 & -w/2\\ 0 & 1 & -h/2\\ 0 & 0 & 1 \end{bmatrix}\ast src\_img$
旋转坐标：
$\begin{bmatrix} dx\\ dy\\ 1 \end{bmatrix} = \begin{bmatrix} 1 & 0 & w/2\\ 0 & 1 & h/2\\ 0 & 0 & 1 \end{bmatrix}\ast\begin{bmatrix} \cos\theta & \sin\theta & 1\\ -\sin\theta & \cos\theta & 1\\ 0 & 0 & 1 \end{bmatrix}\ast\begin{bmatrix} 1 & 0 & -w/2\\ 0 & 1 & -h/2\\ 0 & 0 & 1 \end{bmatrix}\ast\begin{bmatrix} sx\\ sy\\ 1 \end{bmatrix}$
import numpy as np
import cv2
# import scipy.ndimage as ndi

def affine_rotation_matrix(angle=(-20, 20)):
    """Create an affine transform matrix for image rotation.
    NOTE: In OpenCV, x is width and y is height.

    Parameters
    -----------
    angle : int/float or tuple of two int/float
        Degree to rotate, usually -180 ~ 180.
            - int/float, a fixed angle.
            - tuple of 2 floats/ints, randomly sample a value as the angle between these 2 values.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """
    if isinstance(angle, tuple):
        theta = np.pi / 180 * np.random.uniform(angle[0], angle[1])
    else:
        theta = np.pi / 180 * angle
    rotation_matrix = np.array([[np.cos(theta), np.sin(theta), 0], \
                                [-np.sin(theta), np.cos(theta), 0], \
                                [0, 0, 1]])
    return rotation_matrix


def affine_horizontal_flip_matrix(prob=0.5):
    """Create an affine transformation matrix for image horizontal flipping.
    NOTE: In OpenCV, x is width and y is height.

    Parameters
    ----------
    prob : float
        Probability to flip the image. 1.0 means always flip.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """
    factor = np.random.uniform(0, 1)
    if prob >= factor:
        filp_matrix = np.array([[ -1. , 0., 0. ], \
              [ 0., 1., 0. ], \
              [ 0., 0., 1. ]])
        return filp_matrix
    else:
        filp_matrix = np.array([[ 1. , 0., 0. ], \
              [ 0., 1., 0. ], \
              [ 0., 0., 1. ]])
        return filp_matrix



def affine_shift_matrix(wrg=(-0.1, 0.1), hrg=(-0.1, 0.1), w=200, h=200):
    """Create an affine transform matrix for image shifting.
    NOTE: In OpenCV, x is width and y is height.

    Parameters
    -----------
    wrg : float or tuple of floats
        Range to shift on width axis, -1 ~ 1.
            - float, a fixed distance.
            - tuple of 2 floats, randomly sample a value as the distance between these 2 values.
    hrg : float or tuple of floats
        Range to shift on height axis, -1 ~ 1.
            - float, a fixed distance.
            - tuple of 2 floats, randomly sample a value as the distance between these 2 values.
    w, h : int
        The width and height of the image.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """
    if isinstance(wrg, tuple):
        tx = np.random.uniform(wrg[0], wrg[1]) * w
    else:
        tx = wrg * w
    if isinstance(hrg, tuple):
        ty = np.random.uniform(hrg[0], hrg[1]) * h
    else:
        ty = hrg * h
    shift_matrix = np.array([[1, 0, tx], \
                        [0, 1, ty], \
                        [0, 0, 1]])
    return shift_matrix


def affine_shear_matrix(x_shear=(-0.1, 0.1), y_shear=(-0.1, 0.1)):
    """Create affine transform matrix for image shearing.
    NOTE: In OpenCV, x is width and y is height.

    Parameters
    -----------
    shear : tuple of two floats
        Percentage of shears for width and height directions.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """
    # if len(shear) != 2:
    #     raise AssertionError(
    #         "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?"
    #     )
    # if isinstance(shear, tuple):
    #     shear = list(shear)
    # if is_random:
    #     shear[0] = np.random.uniform(-shear[0], shear[0])
    #     shear[1] = np.random.uniform(-shear[1], shear[1])
    if isinstance(x_shear, tuple):
        x_shear = np.random.uniform(x_shear[0], x_shear[1])
    if isinstance(y_shear, tuple):
        y_shear = np.random.uniform(y_shear[0], y_shear[1])

    shear_matrix = np.array([[1, x_shear, 0], \
                            [y_shear, 1, 0], \
                            [0, 0, 1]])
    return shear_matrix


def affine_zoom_matrix(zoom_range=(0.8, 1.1)):
    """Create an affine transform matrix for zooming/scaling an image's height and width.
    OpenCV format, x is width.

    Parameters
    -----------
    x : numpy.array
        An image with dimension of [row, col, channel] (default).
    zoom_range : float or tuple of 2 floats
        The zooming/scaling ratio, greater than 1 means larger.
            - float, a fixed ratio.
            - tuple of 2 floats, randomly sample a value as the ratio between these 2 values.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """

    if isinstance(zoom_range, (float, int)):
        scale = zoom_range
    elif isinstance(zoom_range, tuple):
        scale = np.random.uniform(zoom_range[0], zoom_range[1])
    else:
        raise Exception("zoom_range: float or tuple of 2 floats")

    zoom_matrix = np.array([[scale, 0, 0], \
                            [0, scale, 0], \
                            [0, 0, 1]])
    return zoom_matrix


def affine_respective_zoom_matrix(w_range=0.8, h_range=1.1):
    """Get affine transform matrix for zooming/scaling that height and width are changed independently.
    OpenCV format, x is width.

    Parameters
    -----------
    w_range : float or tuple of 2 floats
        The zooming/scaling ratio of width, greater than 1 means larger.
            - float, a fixed ratio.
            - tuple of 2 floats, randomly sample a value as the ratio between 2 values.
    h_range : float or tuple of 2 floats
        The zooming/scaling ratio of height, greater than 1 means larger.
            - float, a fixed ratio.
            - tuple of 2 floats, randomly sample a value as the ratio between 2 values.

    Returns
    -------
    numpy.array
        An affine transform matrix.

    """

    if isinstance(h_range, (float, int)):
        zy = h_range
    elif isinstance(h_range, tuple):
        zy = np.random.uniform(h_range[0], h_range[1])
    else:
        raise Exception("h_range: float or tuple of 2 floats")

    if isinstance(w_range, (float, int)):
        zx = w_range
    elif isinstance(w_range, tuple):
        zx = np.random.uniform(w_range[0], w_range[1])
    else:
        raise Exception("w_range: float or tuple of 2 floats")

    zoom_matrix = np.array([[zx, 0, 0], \
                            [0, zy, 0], \
                            [0, 0, 1]])
    return zoom_matrix


# affine transform
def transform_matrix_offset_center(matrix, x, y):
    """Convert the matrix from Cartesian coordinates (the origin in the middle of image) to Image coordinates (the origin on the top-left of image).

    Parameters
    ----------
    matrix : numpy.array
        Transform matrix.
    x and y : 2 int
        Size of image.

    Returns
    -------
    numpy.array
        The transform matrix.

    Examples
    --------
    - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``.
    """
    o_x = (x - 1) / 2.0
    o_y = (y - 1) / 2.0
    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
    return transform_matrix


# def affine_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1):
#     """Return transformed images by given an affine matrix in Scipy format (x is height).
#
#     Parameters
#     ----------
#     x : numpy.array
#         An image with dimension of [row, col, channel] (default).
#     transform_matrix : numpy.array
#         Transform matrix (offset center), can be generated by ``transform_matrix_offset_center``
#     channel_index : int
#         Index of channel, default 2.
#     fill_mode : str
#         Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
#     cval : float
#         Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
#     order : int
#         The order of interpolation. The order has to be in the range 0-5:
#             - 0 Nearest-neighbor
#             - 1 Bi-linear (default)
#             - 2 Bi-quadratic
#             - 3 Bi-cubic
#             - 4 Bi-quartic
#             - 5 Bi-quintic
#             - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`__
#
#     Returns
#     -------
#     numpy.array
#         A processed image.
#
#     Examples
#     --------
#     >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False)
#     >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8)
#     >>> M_combined = M_shear.dot(M_zoom)
#     >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, h, w)
#     >>> result = tl.prepro.affine_transform(image, transform_matrix)
#
#     """
#     # transform_matrix = transform_matrix_offset_center()
#     # asdihasid
#     # asd
#
#     x = np.rollaxis(x, channel_index, 0)
#     final_affine_matrix = transform_matrix[:2, :2]
#     final_offset = transform_matrix[:2, 2]
#     channel_images = [
#         ndi.interpolation.
#         affine_transform(x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval)
#         for x_channel in x
#     ]
#     x = np.stack(channel_images, axis=0)
#     x = np.rollaxis(x, 0, channel_index + 1)
#     return x
#
#
# apply_transform = affine_transform


def affine_transform_cv2(x, transform_matrix, flags=None, border_mode='constant'):
    """Return transformed images by given an affine matrix in OpenCV format (x is width). (Powered by OpenCV2, faster than ``tl.prepro.affine_transform``)

    Parameters
    ----------
    x : numpy.array
        An image with dimension of [row, col, channel] (default).
    transform_matrix : numpy.array
        A transform matrix, OpenCV format.
    border_mode : str
        - `constant`, pad the image with a constant value (i.e. black or 0)
        - `replicate`, the row or column at the very edge of the original is replicated to the extra border.

    Examples
    --------
    # >>> M_shear = tl.prepro.affine_shear_matrix(intensity=0.2, is_random=False)
    # >>> M_zoom = tl.prepro.affine_zoom_matrix(zoom_range=0.8)
    # >>> M_combined = M_shear.dot(M_zoom)
    # >>> result = tl.prepro.affine_transform_cv2(image, M_combined)
    """
    rows, cols = x.shape[0], x.shape[1]
    if flags is None:
        flags = cv2.INTER_AREA
    if border_mode is 'constant':
        border_mode = cv2.BORDER_CONSTANT
    elif border_mode is 'replicate':
        border_mode = cv2.BORDER_REPLICATE
    else:
        raise Exception("unsupport border_mode, check cv.BORDER_ for more details.")
    return cv2.warpAffine(x, transform_matrix[0:2,:], \
            (cols,rows), flags=flags, borderMode=border_mode)


def affine_transform_keypoints(coords_list, transform_matrix):
    """Transform keypoint coordinates according to a given affine transform matrix.
    OpenCV format, x is width.

    Note that, for pose estimation task, flipping requires maintaining the left and right body information.
    We should not flip the left and right body, so please use ``tl.prepro.keypoint_random_flip``.

    Parameters
    -----------
    coords_list : list of list of tuple/list
        The coordinates
        e.g., the keypoint coordinates of every person in an image.
    transform_matrix : numpy.array
        Transform matrix, OpenCV format.

    Examples
    ---------
    # >>> # 1. get all affine transform matrices
    # >>> M_rotate = tl.prepro.affine_rotation_matrix(angle=20)
    # >>> M_flip = tl.prepro.affine_horizontal_flip_matrix(prob=1)
    # >>> # 2. combine all affine transform matrices to one matrix
    # >>> M_combined = dot(M_flip).dot(M_rotate)
    # >>> # 3. transfrom the matrix from Cartesian coordinate (the origin in the middle of image)
    # >>> # to Image coordinate (the origin on the top-left of image)
    # >>> transform_matrix = tl.prepro.transform_matrix_offset_center(M_combined, x=w, y=h)
    # >>> # 4. then we can transfrom the image once for all transformations
    # >>> result = tl.prepro.affine_transform_cv2(image, transform_matrix)  # 76 times faster
    # >>> # 5. transform keypoint coordinates
    # >>> coords = [[(50, 100), (100, 100), (100, 50), (200, 200)], [(250, 50), (200, 50), (200, 100)]]
    # >>> coords_result = tl.prepro.affine_transform_keypoints(coords, transform_matrix)
    """
    coords_result_list = []
    for coords in coords_list:
        coords = np.asarray(coords)
        coords = coords.transpose([1, 0])
        coords = np.insert(coords, 2, 1, axis=0)
        # print(coords)
        # print(transform_matrix)
        coords_result = np.matmul(transform_matrix, coords)
        coords_result = coords_result[0:2, :].transpose([1, 0])
        coords_result_list.append(coords_result)
    return coords_result_list


src = cv2.imread("1.jpg")
# src = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
h, w = src.shape
# 1. get all affine transform matrices
M_rotate = affine_rotation_matrix(angle=15)
# 3. transfrom the matrix from Cartesian coordinate (the origin in the middle of image) to Image coordinate (the origin on the top-left of image)
transform_matrix = transform_matrix_offset_center(M_rotate, x=w, y=h)
# 4. then we can transfrom the image once for all transformations
dst = affine_transform_cv2(src, transform_matrix)  # 76 times faster
# 5. transform keypoint coordinates
coords = [[(145,151),(250,151),(250,502),(145,502)]]
# coords = [[((145+250)/2,151),(250,(151+502)/2),((145+250)/2,502),(145,(151+502)/2)]]
coords_result = affine_transform_keypoints(coords, transform_matrix)

# dst_color = cv2.cvtColor(dst,cv2.COLOR_GRAY2BGR)
dst_color = dst
dst_points_lt = (int(coords_result[0][0,0]), int(coords_result[0][0,1]))
cv2.circle(dst_color,dst_points_lt,3,(0,0,255), -1)

# coords_x = coords_result[0][:,0]
# coords_y = coords_result[0][:,1]
# x_min = coords_x[np.argmin(coords_x)]
# x_max = coords_x[np.argmax(coords_x)]
# y_min = coords_y[np.argmin(coords_y)]
# y_max = coords_y[np.argmax(coords_y)]
# cv2.rectangle(dst_color,(int(x_min),int(y_min)), (int(x_max),int(y_max)), (255,0,0))
cv2.imwrite("rotation_result.jpg",dst_color)