C&W攻击是数字空间中的对抗攻击算法,下面使用到的一些函数和代码
import torch import torchvision from torch.autograd import Variable from torch.autograd.gradcheck import * from torchvision import datasets,transforms import torch.utils.data.dataloader as Data import torch.nn as nn from torchvision import models import numpy as np import cv2 #获取计算设备 默认是CPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #图像加载以及预处理 image_path="../picture/cow.jpeg" orig=cv2.imread(image_path_[...,::-1] orig=cv2.resize(orig,(224,224)) img=orig.copy().astype(np.float32) mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] img /= 255.0 img = (img - mean) / std img = img.transpose(2, 0, 1) img=np.expand_dims(img,axis=0)# expand_dims扩展维度 print(img.shape)# should be (1,3,224,224) model=models.alaxnet(pretrained=True).to(device).eval()
max_iterations=1000# 最大迭代次数 learning_rate=0.01 # 二分查找最大次数 binary_search_steps=10 #c的初始值 initial_const=100 confidence=initial_const # K值 k=40 boxmin=-3.0 boxmax=3.0 num_labels=1000# ImageNet是1000类 # 攻击目标标签,必须使用one hot编码 tatget_label=988 tlab=Variable(torch.from_numpy(np.eye(num_labels)[target_label]).to(device).float())# 将目标标签转变成torch变量 print(tlab)# should be torch.Size([1000]) shape=(1,3,224,224) # C的初始化边界 lower_bound=0 c=initial_const upper_bound=1e10 # the best l2,score, and image attack o_bestl2=1e10 o_bestscore=-1 o_bestattack=[np.zeros(shape)]
# the resulting image, tanh'd to keep bounded from boxmin to boxmax boxmul=(boxmax-boxmin)/2 boxplus=(boxmin+boxmax)/2 for outer_step in range(binary_search_steps): print("best_l2={},confidence={}".format(o_bestl2,confidence)) #把原始图像转换成图像数据和扰动的形态 timg=Variable(torch.from_numpy(np.arctanh(img-boxplus)/boxmul*0.999999)).to(device).float()) modifier=Variable(torch.zeros_like(timg).to(device).float()) modifier.requires_grad=True #定义优化器,仅优化modifer optimizer =torch.optim.Adam([modifier],lr=learning_rate) for iteration in range(1,max_iteration+1): optimizer.zero_grad()# 梯度清零 newimg=torch.tanh(modifier+timg)*boxmul+boxplus output=model(newimg) # 定义cw中的损失函数 # l2范数,用torch.dist()计算欧几里得距离,p=2为欧几里得距离,p=1为曼哈顿距离,即l1loss loss2=torch.dist(newimg,(torch.tanh(timg)*boxmul+boxplus),p=2) real=torch.max(output*tlab) other=torch.max((1-tlab)*output) loss1=other-real+k loss1=torch.clamp(loss1,min=0)# 用clamp限制loss1最小为0 loss=confidence*loss1 loss=loss1+loss2 # 反向传播+梯度更新 使用retain_graph=True来保留计算图,以便下一次调用backward()方法。如果不设置retain_graph=True,则会在第一次反向传播后自动释放计算图。 loss.backward(retain_graph=True) optimizer.step() l2=loss2 sc=output.data.cpu().numpy() # print out loss every 10% if iteration%(max_iterations//10) == 0: print("iteration={} loss={} loss1={} loss2={}".format(iteration,loss,loss1,loss2)) if (l2 < o_bestl2) and (np.argmax(sc) == target_label ): print("attack success l2={} target_label={}".format(l2,target_label)) o_bestl2 = l2 o_bestscore = np.argmax(sc) o_bestattack = newimg.data.cpu().numpy() confidence_old=1 if (o_bestscore==target_label) and o_bestscore!=-1: # 攻击成功,减小c upper_bound=min(upper_bound,confidence) if upper_bound<1e9: confidence_old=confidence confidence=(lower_bound+upper_bound)/2 else: lower_bound=max(lower_bound,confidence) confidence_old=confidence if upper_bound < 1e9: confidence = (lower_bound + upper_bound)/2 else: confidence *= 10 # torch.sign print("outer_step={} confidence {}->{}".format(outer_step,confidence_old,confidence))
# 输出图像的shape print(o_bestattack.shape) print(img.shape)
#对比展现原始图片和对抗样本图片,之前的函数拿过来直接用
def show_images_diff(original_img,original_label,adversarial_img,adversarial_label):
import matplotlib.pyplot as plt
plt.figure()
#归一化
if original_img.any() > 1.0:
original_img=original_img/255.0
if adversarial_img.any() > 1.0:
adversarial_img=adversarial_img/255.0
plt.subplot(131)
plt.title('Original')
plt.imshow(original_img)
plt.axis('off')
plt.subplot(132)
plt.title('Adversarial')
plt.imshow(adversarial_img)
plt.axis('off')
plt.subplot(133)
plt.title('Adversarial-Original')
difference = adversarial_img - original_img
#(-1,1) -> (0,1)
difference=difference / abs(difference).max()/2.0+0.5
plt.imshow(difference,cmap=plt.cm.gray)
plt.axis('off')
plt.tight_layout()
plt.show()
adv=o_bestattack[0]# 取出图片向量
print(adv.shape)
adv = adv.transpose(1, 2, 0)
adv = (adv * std) + mean
adv = adv * 255.0
adv = np.clip(adv, 0, 255).astype(np.uint8)
show_images_diff(orig,0,adv,0)