复现BadNets

  • 论文标题: BadNets: Identifying Vulnerabilities in the Machine Learning Model Supply Chain 或 BadNets: Evaluating Backdooring Attacks on Deep Neural Networks
  • 来源:arXiv/IEEE Access
  • 引用量:

highlight: agate

基准模型BaseLine Model

  1. 模型构建
python 复制代码
import torch.nn as nn

class BaseLineModel(nn.Module):
  def __init__(self) -> None:
    super(BaseLineModel, self).__init__()
    self.cov1 = nn.Sequential(
      nn.Conv2d(
        in_channels=1,
        out_channels=16,
        kernel_size=(5, 5),
        padding=0,
        stride=1
      ),
      nn.ReLU(),
      nn.AvgPool2d(kernel_size=2, stride=2)
    )
    self.cov2 = nn.Sequential(
      nn.Conv2d(
        in_channels=16,
        out_channels=32,
        kernel_size=(5, 5),
        padding=0,
        stride=1
      ),
      nn.ReLU(),
      nn.AvgPool2d(kernel_size=2, stride=2)
    )
    self.fc1 = nn.Sequential(
      nn.Flatten(),
      nn.Linear(32*4*4, 512),
      nn.ReLU()
    )
    self.fc2 = nn.Sequential(
      nn.Linear(512, 10),
      nn.Sigmoid()
    )
  
  def forward(self, x):
    # x = self.fc2(self.fc1(self.cov2(self.cov1(x))))
    x = self.cov1(x)
    x = self.cov2(x)
    x = self.fc1(x)
    x = self.fc2(x)
    return x 
  1. 训练模型
python 复制代码
def do_train_model(train_loader, test_loader):
  device = device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model = BaseLineModel()
  model = model.to(device)
  loss_func = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
  cnt_epochs = 10

  for i in range(cnt_epochs):
    for imgs, labels in train_loader:
      imgs = imgs.to(device)
      labels = labels.to(device)
      outputs = model(imgs)
      loss = loss_func(outputs, labels)
      optimizer.zero_grad() # 清空优化器的梯度
      loss.backward()
      optimizer.step()
    
    total_loss = 0
    with torch.no_grad(): # 表明不需要自动求导
      for imgs, labels in test_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        outputs = model(imgs)
        loss = loss_func(outputs, labels)
        loss.to('cpu')
        total_loss += loss
    print('第{}次打印损失:{}'.format(i, total_loss))

  torch.save(model, os.path.join(os.path.abspath(os.path.dirname(__file__)), 'save/baseline.nn'))

二、后门模型 BadNet Model

2.1 后门生成策略

python 复制代码
def get_sigle_pixel(imgs):
    imgs[0][size - 2][size - 2] = 0.9922
    return imgs

  def get_pattern(imgs):
    imgs[0][size - 2][size - 2] = 0.9922
    imgs[0][size - 1][size - 3] = 0.9922
    imgs[0][size - 3][size - 1] = 0.9922
    imgs[0][size - 1][size- 1] = 0.9922
    return imgs

2.2 自定义数据集

为了适应后续BadNet模型的训练,要求训练集 = 毒化数据集 and 原始数据集,因此定义下面自定义类

python 复制代码
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, input_data, output_data, transform=None):
        self.input_data = input_data
        self.output_data = output_data
        self.transform = transform

    def __len__(self):
        return len(self.input_data)

    def __getitem__(self, idx):
        input_sample = self.input_data[idx]
        output_sample = self.output_data[idx]
        if self.transform:
            sample = self.transform(sample)
        return input_sample, output_sample

2.3 clean数据集

在处理后门数据生成过程中,从测试集中采用并根据模型预测结果和目标筛选目标值target==预测值pred的所有样本作为clean data set

通过SubsetRandomSampler随机采样获取待处理样本,尚不知道该处理方法是否正确

python 复制代码
def clean_data(data, model):
  """获取结果和标签相同的数据用于随机采样"""
  if not os.path.exists(file_path):
    raise FileNotFoundError
  data_loader = DataLoader(data, batch_size=1) # 为了适配模型
  data_clean_features = []
  data_clean_vals = []
  for imgs, labels in data_loader:
    out = model(imgs)
    pred = int(out.argmax(dim=1).numpy())
    y = int(labels.numpy())
    if pred == y:
      data_clean_features.append(imgs)
      data_clean_vals.append(labels)
  
  # 保存到本地
  custom_dataset = CustomDataset(data_clean_features, data_clean_vals)
  with open(os.path.join(file_path, 'clean_data.pkl'), 'wb') as f:
    pickle.dump({'data':custom_dataset}, f)

2.4 posion数据集

clean数据集上采样num_samples个样本进行毒化,采用:

python 复制代码
def poison_data(data_loader:DataLoader, is_save=False):
  """
  :params data_loader 采样的待毒化样本

  攻击策略: all-attck
  """
  def get_sigle_pixel(imgs):
    imgs[0][size - 2][size - 2] = 0.9922
    return imgs

  def get_pattern(imgs):
    imgs[0][size - 2][size - 2] = 0.9922
    imgs[0][size - 1][size - 3] = 0.9922
    imgs[0][size - 3][size - 1] = 0.9922
    imgs[0][size - 1][size- 1] = 0.9922
    return imgs
  
  data_atk_features = []
  data_atk_vals = []

  for imgs, labels in data_loader:
    imgs = imgs[0][0] # 这种处理只是为了解决数据格式 试错的结果
    # 特征选择
    p = random.randint(0, 1)
    imgs = get_sigle_pixel(imgs) if p == 1 else get_pattern(imgs)
    data_atk_features.append(imgs)

    # 标签选择
    # 所有的标签更改为 i + 1
    # label = int(labels.numpy())
    # label = 0 if label == 9 else label + 1
    # 特定目标毒化
    label = 0
  
  # return data_atk_features, data_atk_vals
  data_atk = CustomDataset(data_atk_features, data_atk_vals)
  if is_save:
    if not os.path.exists(file_path):
      raise FileNotFoundError
    with open(os.path.join(file_path, 'poison_data.pkl'), 'wb') as f:
      pickle.dump({'data':data_atk}, f)
  return data_atk

2.5 模型重训练

python 复制代码
def re_train_model(combined_atk_loader:DataLoader, test_loader:DataLoader):
  """重新训练带有毒化数据的模型
  :combined_atk_loader poison set + clean set
  """
  do_train_model(combined_atk_loader, test_loader, file_name='badnet.nn')
python 复制代码
输出结果:
第0次打印损失:1672.834716796875
test acc:  0.6283166666666666
第1次打印损失:1510.174560546875
test acc:  0.7770833333333333
第2次打印损失:1498.032470703125
test acc:  0.7853833333333333
第3次打印损失:1480.6192626953125
test acc:  0.8189333333333333
第4次打印损失:1469.7681884765625
test acc:  0.7994666666666667
第5次打印损失:1449.789306640625
test acc:  0.8490166666666666
第6次打印损失:1421.9805908203125
test acc:  0.9278333333333333
第7次打印损失:1444.3782958984375
test acc:  0.8415
第8次打印损失:1434.840576171875
test acc:  0.86935
第9次打印损失:1432.73681640625
test acc:  0.8751833333333333
  • 从训练结果中可以看到模型的准确率只有87%,添加后门后导致模型的精度下降,和后门的定义存在冲突,需要额外验证。

三、实验

3.1 工具函数

a.参考文章列表[1]复现BadNets编写误分率error rate计算方法,横坐标x为目标预测值,纵坐标y为目标真实值;统计(i,j)位置分类的个数,最后计算该类别下的误分率。在计算过程中将i==j的位置置为0,是为了避免正确分类个数远大于误分类个数,导致计算的百分比过大,不易观察

python 复制代码
def handle_predict_list(predict_list:list, targets:torch.Tensor, preds:torch.Tensor) -> list:
  targets = targets.tolist()
  preds = preds.tolist()
  if len(targets) != len(preds): 
    raise RuntimeError('目标序列和预测序列长度不匹配')
  
  cur = 0
  while cur < len(targets):
    predict_list[preds[cur]][targets[cur]] += 1
    cur += 1
  
  return predict_list

@dec_time.run_time
def get_model_predict(data_loader:DataLoader, model=None):
  """计算模型的误判率,统计所有类别的情况然后计算比值
  :param data_loader 数据预处理器
  :param model 模型
  """
  range_size = 10
  predict_list = [[0 for _ in range(range_size)] for _ in range(range_size)]
  for imgs, labels in data_loader:
    imgs = imgs.to('cuda:0')
    labels = labels.to('cuda:0')
    outs = model(imgs)
    pred = outs.argmax(dim=1) # 获取每个批次的预测值
    predict_list = handle_predict_list(predict_list, labels, pred)
  return predict_list

def handle_predict(predict_list):
  """predict_list预处理操作"""
  sum_list = np.sum(predict_list, axis=0)
  print(sum_list)
  range_size = len(predict_list) # 数组长度
  predict_percent_list = [[0 for _ in range(range_size)] for _ in range(range_size)] # 每个(i,j)处的error rate

  for i in range(len(predict_list)):
    for j in range(len(predict_list[0])):
      if i == j or sum_list[j] == 0: # 没有误判的 如果不加入这个,由于模型准确率较高,(i,j)处计算的数值过大,绘制的图像不易查看其他位置error rate
        continue
      predict_percent_list[i][j] = predict_list[i][j] / sum_list[j] * 100
  return predict_list, predict_percent_list

b.可视化函数

python 复制代码
def plot_imag_cmap(predict_list, predict_percent_list, title=None):
  """绘制图像真实标签和异常标签的分布情况
  """
  print(f'start:\n{predict_list}')
  print(f'end:\n{predict_percent_list}')
  plt.imshow(predict_percent_list, cmap='Reds')
  plt.ylabel(f'True Labels')
  plt.xlabel(f'Target Labels')
  plt.title(title)
  plt.xticks(np.arange(10))
  plt.yticks(np.arange(10))
  plt.colorbar()
  plt.show()

3.2 数据评估

python 复制代码
def test_targeted_backdoor(data_loader, model, title=None):
  predict_list = error_rate.get_model_predict(data_loader, model) # 获取训练集的error
  predict_list, predict_percent_list = error_rate.handle_predict(predict_list)
  visual.plot_imag_cmap(predict_list, predict_percent_list, title)

if __name__ == '__main__':
  # 应该拿到毒化数据集(原始数据 + 脏数据)
  train_data, test_data = data_handler.get_data_for_MNIST()
  atk_data, data_atk_loader, combine_atk_data, combine_atk_loader = do_generate_backdoor(test_data, clean_data_path='clean_data_test.pkl')

  model_save_path = Path.joinpath(root_path, 'save')
  badnet_model_save_path = Path.joinpath(model_save_path, 'badnet.nn')
  if not badnet_model_save_path.exists():
    raise FileExistsError

  model = torch.load(badnet_model_save_path)
  # test_targeted_backdoor(combine_atk_loader, model, title=f'badnet,test data(1000 Bad Image, 9794 Origin Image, target:i->j %)')
  # test_targeted_backdoor(data_atk_loader, model, title=f'badnet,test data(1000 Bad Image, 0 Origin Image, target:i->j %)')
  test_loader = DataLoader(test_data, batch_size=64)
  test_targeted_backdoor(test_loader, model, title=f'badnet,test data(0 Bad Image, 10000 Origin Image, target:i->j %)')

四、结果分析

如上图所示,首先badnet模型分类下,分类3->29->8的误判率较高。此外,上述实验中将目标标签置为0,可以看出所有的毒化数据的预测目标都分类为该值,满足预期目标。 部分数据如下:


  • 场景二未看

参考文章

  1. 复现BadNets: Identifying Vulnerabilities in the Machine Learning Model Supply Chain
  2. Pytorch中数据采样方法Sampler
  3. Python读取pkl文件
相关推荐
写点什么呢3 小时前
Pytorch学习12_最大池化的使用
人工智能·pytorch·python·深度学习·学习·pycharm
取个名字真难呐18 小时前
随机置矩阵列为0[矩阵乘法pytorch版]
pytorch·python·矩阵
程序员正茂1 天前
AnaConda下载PyTorch慢的解决办法
人工智能·pytorch
qq_273900231 天前
pytorch torch.full_like函数介绍
人工智能·pytorch·python
大模型铲屎官1 天前
PyTorch 框架实现逻辑回归:从数据预处理到模型训练全流程
人工智能·pytorch·python·深度学习·逻辑回归
Wishell20152 天前
小白学Pytorch
pytorch
昊昊该干饭了2 天前
2.5万字 - 用TensorFlow和PyTorch分别实现五种经典模型
人工智能·pytorch·tensorflow
超自然祈祷2 天前
pyTorch笔记
人工智能·pytorch·笔记·神经网络
qq_273900233 天前
pytorch torch.scatter_reduce函数介绍
人工智能·pytorch·python
CodeJourney.3 天前
PyTorch不同优化器比较
人工智能·pytorch·算法·能源