As a workaround (besides just using GammaFlopsRegularizer) and for future reference, most modern convolutional networks forgo the flatten/fully_connected pattern, and instead use 1x1conv/reduce_mean.
- 原模型
def base_model(x_ph, is_training_ph, scope, channels=[32, 64, 64], reuse=False): norm_params = {'is_training': False, 'scale': True, 'center': False} # Network Definition with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=norm_params, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): conv1 = slim.conv2d(x_ph, num_outputs=channels[0], kernel_size=3, scope='conv1') pool1 = slim.max_pool2d(conv1, kernel_size=2, scope='pool1') conv2 = slim.conv2d(pool1, num_outputs=channels[1], kernel_size=3, scope='conv2') pool2 = slim.max_pool2d(conv2, kernel_size=2, scope='pool2') conv3 = slim.conv2d(pool2, num_outputs=channels[2], kernel_size=3, scope='conv3') conv3_flat = slim.flatten(conv3) out = slim.fully_connected(conv3_flat, num_outputs=10, normalizer_fn=None, normalizer_params=None, activation_fn=None, scope='output') pred = tf.argmax(out, axis=1) return out, pred
- 替换全连接
def base_model(x_ph, is_training_ph, scope, channels=[32, 64, 64], reuse=False): norm_params = {'is_training': False, 'scale': True, 'center': False} # Network Definition with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params=norm_params, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): conv1 = slim.conv2d(x_ph, num_outputs=channels[0], kernel_size=3, scope='conv1') pool1 = slim.max_pool2d(conv1, kernel_size=2, scope='pool1') conv2 = slim.conv2d(pool1, num_outputs=channels[1], kernel_size=3, scope='conv2') pool2 = slim.max_pool2d(conv2, kernel_size=2, scope='pool2') conv3 = slim.conv2d(pool2, num_outputs=channels[2], kernel_size=3, scope='conv3') out = slim.conv2d( conv3, 10, [1, 1], activation_fn=None, normalizer_fn=None, scope='output_conv') out = tf.reduce_mean(out, [1, 2], name='output', keepdims=False) pred = tf.argmax(out, axis=1) return out, pred