【人工智能笔记】第十四节 Tensorflow 2.0 实现Disout，防止过拟合

最新推荐文章于 2020-06-12 12:36:03 发布

PPHT-H

最新推荐文章于 2020-06-12 12:36:03 发布

阅读量912

点赞数

分类专栏：深度学习文章标签： disout tensorflow2.0 python 人工智能过拟合

本文链接：https://blog.csdn.net/highlevels/article/details/106303825

版权

深度学习专栏收录该内容

53 篇文章 22 订阅

订阅专栏

华为诺亚实验室开源Disout算法，tensorflow 2.0 的实现。

算法通过增加扰动，而不是屏蔽某些特征，来解决过拟合问题。

论文：https://arxiv.org/abs/2002.11022

实现代码：

import tensorflow as tf


class Disout(tf.keras.layers.Layer):
    '''
    disout
    论文：https://arxiv.org/abs/2002.11022
    '''

    def __init__(self, dist_prob, block_size=5, alpha=0.5, **kwargs):
        super(Disout, self).__init__(**kwargs)
        self.dist_prob = dist_prob

        self.alpha = alpha
        self.block_size = block_size

    def build(self, input_shape):
        pass

    @tf.function
    def call(self, x):
        '''x：(batch_size,h,w,c)'''
        if not self.trainable:
            return x
        else:
            if tf.math.equal(tf.rank(x),4):
                x_shape = tf.shape(x)
                x_size = x_shape[1:3]
                x_size_f = tf.cast(x_size, tf.float32)
                # 计算block_size
                x_block_size_f = tf.constant((self.block_size, self.block_size), tf.float32)
                x_block_size = tf.cast(x_block_size_f, tf.int32)
                # 根据dist_prob，计算block_num
                x_block_num = (x_size_f[0] * x_size_f[1]) * self.dist_prob / (x_block_size_f[0] * x_block_size_f[1])
                # 计算block在中心区域出现的概率
                x_block_rate = x_block_num / ((x_size_f[0] - x_block_size_f[0] + 1) * (x_size_f[1] - x_block_size_f[1] + 1))
                # 根据概率生成block区域
                x_block_center = tf.random.uniform((x_shape[0], x_size[0] - x_block_size[0] + 1, x_size[1] - x_block_size[1] + 1, x_shape[3]), dtype=tf.float32)
                x_block_padding_t = x_block_size[0] // 2
                x_block_padding_b = x_size_f[0] - tf.cast(x_block_padding_t, tf.float32) - (x_size_f[0] - x_block_size_f[0] + 1.0)
                x_block_padding_b = tf.cast(x_block_padding_b, tf.int32)
                x_block_padding_l = x_block_size[1] // 2
                x_block_padding_r = x_size_f[1] - tf.cast(x_block_padding_l, tf.float32) - (x_size_f[1] - x_block_size_f[1] + 1.0)
                x_block_padding_r = tf.cast(x_block_padding_r, tf.int32)
                x_block_padding = tf.pad(x_block_center,[[0, 0],[x_block_padding_t, x_block_padding_b],[x_block_padding_l, x_block_padding_r],[0, 0]])
                x_block = tf.cast(x_block_padding<x_block_rate, tf.float32)
                x_block = tf.nn.max_pool2d(x_block, ksize=[self.block_size, self.block_size], strides=[1, 1], padding='SAME')
                # 叠加扰动
                x_max = tf.reduce_max(x, axis=(1,2), keepdims=True)
                x_min = tf.reduce_min(x, axis=(1,2), keepdims=True)
                x_block_random = tf.random.uniform(x_shape, dtype=x.dtype) * (x_max - x_min) + x_min
                x_block_random = x_block_random * (1.0 - self.alpha) + x * self.alpha
                x = x * (1-x_block) + x_block_random * x_block
                return x
            else:
                return x

    def compute_output_shape(self, input_shape):
        '''计算输出shape'''
        return input_shape

class Disout1D(tf.keras.layers.Layer):
    '''
    disout
    论文：https://arxiv.org/abs/2002.11022
    '''

    def __init__(self, dist_prob, block_size=5, alpha=0.5, **kwargs):
        super(Disout1D, self).__init__(**kwargs)
        self.dist_prob = dist_prob

        self.alpha = alpha
        self.block_size = block_size

    def build(self, input_shape):
        pass

    @tf.function
    def call(self, x):
        '''x：(batch_size,h,w,c)'''
        if not self.trainable:
            return x
        else:
            if tf.math.equal(tf.rank(x),2):
                x_shape = tf.shape(x)
                x_size = x_shape[1]
                x_size_f = tf.cast(x_size, tf.float32)
                # 计算block_size
                x_block_size_f = tf.constant(self.block_size, tf.float32)
                # x_block_size_f = x_size_f * self.block_size
                # x_block_size_f = tf.math.maximum(x_block_size_f, 1)
                x_block_size = tf.cast(x_block_size_f, tf.int32)
                # 根据dist_prob，计算block_num
                x_block_num = (x_size_f) * self.dist_prob / (x_block_size_f)
                # 计算block在中心区域出现的概率
                x_block_rate = x_block_num / ((x_size_f - x_block_size_f + 1))
                # 根据概率生成block区域
                x_block_center = tf.random.uniform((x_shape[0], x_size - x_block_size + 1), dtype=tf.float32)
                x_block_padding_t = x_block_size // 2
                x_block_padding_b = x_size_f - tf.cast(x_block_padding_t, tf.float32) - (x_size_f - x_block_size_f + 1.0)
                x_block_padding_b = tf.cast(x_block_padding_b, tf.int32)
                x_block_padding = tf.pad(x_block_center,[[0, 0],[x_block_padding_t, x_block_padding_b]])
                x_block = tf.cast(x_block_padding<x_block_rate, tf.float32)
                x_block = tf.expand_dims(x_block, axis=-1)
                x_block = tf.nn.max_pool1d(x_block, ksize=[self.block_size], strides=[1], padding='SAME')
                x_block = tf.reshape(x_block, x_shape)
                # 叠加扰动
                x_max = tf.reduce_max(x, axis=1, keepdims=True)
                x_min = tf.reduce_min(x, axis=1, keepdims=True)
                x_block_random = tf.random.uniform(x_shape, dtype=x.dtype) * (x_max - x_min) + x_min
                x_block_random = x_block_random * (1.0 - self.alpha) + x * self.alpha
                x = x * (1-x_block) + x_block_random * x_block
                return x
            else:
                return x

    def compute_output_shape(self, input_shape):
        '''计算输出shape'''
        return input_shape

源码：https://github.com/tfwcn/tensorflow2-disout