FGM(Fast Gradient Method)是一种生成对抗样本的方法,它通过对原始样本的梯度方向施加扰动来提高模型的鲁棒性。在对抗训练中,FGM通过给模型的嵌入层施加扰动来增强模型对输入数据中小扰动的鲁棒性。
- 计算正常训练步:正向传播,计算损失,反向传播,应用梯度更新。
- FGM对抗训练:
- 计算损失梯度。
- 生成对抗扰动并应用到嵌入层。
- 在对抗样本上计算损失,反向传播,并更新模型。
- 恢复嵌入层。
代码实现
import tensorflow as tf
class FGM:
def __init__(self, model, epsilon=1.0):
self.model = model
self.epsilon = epsilon
self.emb_name = None
self.emb_backup = {}
def attack(self, emb_name, epsilon=None):
if epsilon is None:
epsilon = self.epsilon
self.emb_name = emb_name
for emb_layer in self.model.trainable_variables:
if self.emb_name in emb_layer.name:
# 保存原始的embedding
self.emb_backup[emb_layer.name] = emb_layer.numpy()
# 计算梯度方向的扰动
grad = tf.sign(emb_layer)
# 添加扰动
emb_layer.assign_add(epsilon * grad)
def restore(self):
# 恢复原始embedding
for emb_layer in self.model.trainable_variables:
if self.emb_name in emb_layer.name:
emb_layer.assign(self.emb_backup[emb_layer.name])
self.emb_backup = {}
# 定义自定义训练步骤
def train_step(model, data, optimizer, loss_fn, fgm, emb_name):
inputs, labels = data
# 正常的前向和反向传播
with tf.GradientTape() as tape:
predictions = model(inputs, training=True)
loss = loss_fn(labels, predictions)
# 计算正常梯度
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
# FGM对抗训练
with tf.GradientTape() as tape:
# 在嵌入层上添加对抗扰动
fgm.attack(emb_name)
# 计算对抗样本的损失
predictions_adv = model(inputs, training=True)
loss_adv = loss_fn(labels, predictions_adv)
# 计算对抗样本的梯度
gradients_adv = tape.gradient(loss_adv, model.trainable_variables)
optimizer.apply_gradients(zip(gradients_adv, model.trainable_variables))
# 恢复嵌入层
fgm.restore()
return loss, loss_adv
# 示例:如何使用上述代码
# 创建模型、优化器、损失函数
model = ... # 定义你的模型
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 初始化FGM对抗模块
fgm = FGM(model, epsilon=0.5)
# 假设你有一个数据生成器或数据集
for epoch in range(epochs):
for step, data in enumerate(dataset):
loss, loss_adv = train_step(model, data, optimizer, loss_fn, fgm, emb_name='embedding')
# 打印或记录损失
print(f"Epoch {epoch} Step {step} Loss: {loss:.4f} Adv Loss: {loss_adv:.4f}")
其他实现:
import tensorflow as tf
class FGMTrainer:
def __init__(self, model, epsilon=1.0):
self.model = model
self.epsilon = epsilon
self.optimizer = tf.keras.optimizers.Adam()
self.loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
@tf.function
def train_step(self, inputs, labels, embedding_layer_name='embedding'):
# 1. 正常前向传播
with tf.GradientTape() as tape:
predictions = self.model(inputs, training=True)
loss = self.loss_fn(labels, predictions)
# 2. 计算梯度
gradients = tape.gradient(loss, self.model.trainable_variables)
# 3. 找到Embedding层
embedding_layer = None
for var in self.model.trainable_variables:
if embedding_layer_name in var.name:
embedding_layer = var
break
if embedding_layer is None:
raise ValueError(f"Embedding layer '{embedding_layer_name}' not found in model.")
# 4. 计算对抗扰动
grad = gradients[self.model.trainable_variables.index(embedding_layer)]
delta = self.epsilon * tf.sign(grad) # 使用梯度符号生成对抗扰动
# 5. 添加扰动到Embedding层
embedding_layer.assign_add(delta)
# 6. 对抗前向传播
with tf.GradientTape() as tape_adv:
predictions_adv = self.model(inputs, training=True)
loss_adv = self.loss_fn(labels, predictions_adv)
# 7. 计算对抗梯度
gradients_adv = tape_adv.gradient(loss_adv, self.model.trainable_variables)
# 8. 恢复Embedding层
embedding_layer.assign_sub(delta)
# 9. 结合正常损失和对抗损失进行优化
self.optimizer.apply_gradients(zip(gradients_adv, self.model.trainable_variables))
# 返回损失用于记录
return loss, loss_adv
model_variables = model.trainable_variables # 或者 model.variables
all_variable_names = [var.name for var in model_variables]
# 提取名字中包含 "emb" 的变量
emb_variable_names = [var.name for var in model_variables if "emb" in var.name]