引言
一、数据竞赛介绍
二、赛题介绍
三、赛题思路
四、baseline代码解析
1.自编码器模型
def create_autoencoder(input_dim,output_dim,noise=0.05):
i = Input(input_dim)
# 自编码部分
# 自编码器— x = decoder(encoder(x)) => 130 -> 64 -> 64 -> 130
# 编码器—对数据进行降维
encoded = BatchNormalization()(i)
encoded = GaussianNoise(noise)(encoded)
encoded = Dense(64,activation='relu')(encoded)
# 解码器
# 对数据进行升维
decoded = Dropout(0.2)(encoded)
decoded = Dense(input_dim,name='decoded')(decoded)
# 将解码后的数据在训练一个分类模型
x = Dense(32,activation='relu')(decoded)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(32,activation='relu')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(output_dim,activation='sigmoid',name='label_output')(x)
encoder = Model(inputs=i,outputs=encoded)
autoencoder = Model(inputs=i,outputs=[decoded,x])
# 损失函数由二部分构成。损失:均方误差。分类:交叉熵损失
autoencoder.compile(optimizer=Adam(0.005),loss={
'decoded':'mse','label_output':'binary_crossentropy'})
return autoencoder, encoder
2.全连接网络(MLP)模型
def create_model(input_dim,output_dim,encoder):
inputs = Input(input_dim)
# encoder进行降维,可以学习到数据集更有效的表征方法
x = encoder(inputs)
x = Concatenate()([x,inputs]) #use both raw and encoded features
x = BatchNormalization()(x)
x = Dropout(0.13)(x)
# 多个隐藏层
hidden_units = [384, 896, 896, 394]
for idx, hidden_unit in enumerate(hidden_units):
x = Dense(hidden_unit)(x)
x = BatchNormalization()(x)
x = Lambda(tf.keras.activations.relu)(x)
x = Dropout(0.25)(x)
# 输出
x = Dense(output_dim,activation='sigmoid')(x)
model = Model(inputs=inputs,outputs=x)
# label_smoothing标签平滑操作
model.compile(optimizer=Adam(0.0005),loss=BinaryCrossentropy(label_smoothing=0.05),metrics=[tf.keras.metrics.AUC(name = 'auc')])
return model
3.完整代码
tf.keras.layers.BatchNormalization
tf.keras.layers.Lambda
tf.keras.layers.GaussianNoise
tf.keras.layers.Activation
tf.keras.losses.BinaryCrossentropy
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Concatenate, Lambda