- 论文标题: BadNets: Identifying Vulnerabilities in the Machine Learning Model Supply Chain 或 BadNets: Evaluating Backdooring Attacks on Deep Neural Networks
- 来源:arXiv/IEEE Access
- 引用量:
highlight: agate
基准模型BaseLine Model
- 模型构建
python
import torch.nn as nn
class BaseLineModel(nn.Module):
def __init__(self) -> None:
super(BaseLineModel, self).__init__()
self.cov1 = nn.Sequential(
nn.Conv2d(
in_channels=1,
out_channels=16,
kernel_size=(5, 5),
padding=0,
stride=1
),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2)
)
self.cov2 = nn.Sequential(
nn.Conv2d(
in_channels=16,
out_channels=32,
kernel_size=(5, 5),
padding=0,
stride=1
),
nn.ReLU(),
nn.AvgPool2d(kernel_size=2, stride=2)
)
self.fc1 = nn.Sequential(
nn.Flatten(),
nn.Linear(32*4*4, 512),
nn.ReLU()
)
self.fc2 = nn.Sequential(
nn.Linear(512, 10),
nn.Sigmoid()
)
def forward(self, x):
# x = self.fc2(self.fc1(self.cov2(self.cov1(x))))
x = self.cov1(x)
x = self.cov2(x)
x = self.fc1(x)
x = self.fc2(x)
return x
- 训练模型
python
def do_train_model(train_loader, test_loader):
device = device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = BaseLineModel()
model = model.to(device)
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
cnt_epochs = 10
for i in range(cnt_epochs):
for imgs, labels in train_loader:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = loss_func(outputs, labels)
optimizer.zero_grad() # 清空优化器的梯度
loss.backward()
optimizer.step()
total_loss = 0
with torch.no_grad(): # 表明不需要自动求导
for imgs, labels in test_loader:
imgs = imgs.to(device)
labels = labels.to(device)
outputs = model(imgs)
loss = loss_func(outputs, labels)
loss.to('cpu')
total_loss += loss
print('第{}次打印损失:{}'.format(i, total_loss))
torch.save(model, os.path.join(os.path.abspath(os.path.dirname(__file__)), 'save/baseline.nn'))
二、后门模型 BadNet Model
2.1 后门生成策略
python
def get_sigle_pixel(imgs):
imgs[0][size - 2][size - 2] = 0.9922
return imgs
def get_pattern(imgs):
imgs[0][size - 2][size - 2] = 0.9922
imgs[0][size - 1][size - 3] = 0.9922
imgs[0][size - 3][size - 1] = 0.9922
imgs[0][size - 1][size- 1] = 0.9922
return imgs
2.2 自定义数据集
为了适应后续BadNet模型的训练,要求训练集 = 毒化数据集 and 原始数据集
,因此定义下面自定义类
python
from torch.utils.data import Dataset
class CustomDataset(Dataset):
def __init__(self, input_data, output_data, transform=None):
self.input_data = input_data
self.output_data = output_data
self.transform = transform
def __len__(self):
return len(self.input_data)
def __getitem__(self, idx):
input_sample = self.input_data[idx]
output_sample = self.output_data[idx]
if self.transform:
sample = self.transform(sample)
return input_sample, output_sample
2.3 clean数据集
在处理后门数据生成过程中,从测试集中采用并根据模型预测结果和目标筛选目标值target==预测值pred
的所有样本作为clean data set
通过SubsetRandomSampler随机采样获取待处理样本,尚不知道该处理方法是否正确
python
def clean_data(data, model):
"""获取结果和标签相同的数据用于随机采样"""
if not os.path.exists(file_path):
raise FileNotFoundError
data_loader = DataLoader(data, batch_size=1) # 为了适配模型
data_clean_features = []
data_clean_vals = []
for imgs, labels in data_loader:
out = model(imgs)
pred = int(out.argmax(dim=1).numpy())
y = int(labels.numpy())
if pred == y:
data_clean_features.append(imgs)
data_clean_vals.append(labels)
# 保存到本地
custom_dataset = CustomDataset(data_clean_features, data_clean_vals)
with open(os.path.join(file_path, 'clean_data.pkl'), 'wb') as f:
pickle.dump({'data':custom_dataset}, f)
2.4 posion数据集
在clean数据集
上采样num_samples
个样本进行毒化,采用:
python
def poison_data(data_loader:DataLoader, is_save=False):
"""
:params data_loader 采样的待毒化样本
攻击策略: all-attck
"""
def get_sigle_pixel(imgs):
imgs[0][size - 2][size - 2] = 0.9922
return imgs
def get_pattern(imgs):
imgs[0][size - 2][size - 2] = 0.9922
imgs[0][size - 1][size - 3] = 0.9922
imgs[0][size - 3][size - 1] = 0.9922
imgs[0][size - 1][size- 1] = 0.9922
return imgs
data_atk_features = []
data_atk_vals = []
for imgs, labels in data_loader:
imgs = imgs[0][0] # 这种处理只是为了解决数据格式 试错的结果
# 特征选择
p = random.randint(0, 1)
imgs = get_sigle_pixel(imgs) if p == 1 else get_pattern(imgs)
data_atk_features.append(imgs)
# 标签选择
# 所有的标签更改为 i + 1
# label = int(labels.numpy())
# label = 0 if label == 9 else label + 1
# 特定目标毒化
label = 0
# return data_atk_features, data_atk_vals
data_atk = CustomDataset(data_atk_features, data_atk_vals)
if is_save:
if not os.path.exists(file_path):
raise FileNotFoundError
with open(os.path.join(file_path, 'poison_data.pkl'), 'wb') as f:
pickle.dump({'data':data_atk}, f)
return data_atk
2.5 模型重训练
python
def re_train_model(combined_atk_loader:DataLoader, test_loader:DataLoader):
"""重新训练带有毒化数据的模型
:combined_atk_loader poison set + clean set
"""
do_train_model(combined_atk_loader, test_loader, file_name='badnet.nn')
python
输出结果:
第0次打印损失:1672.834716796875
test acc: 0.6283166666666666
第1次打印损失:1510.174560546875
test acc: 0.7770833333333333
第2次打印损失:1498.032470703125
test acc: 0.7853833333333333
第3次打印损失:1480.6192626953125
test acc: 0.8189333333333333
第4次打印损失:1469.7681884765625
test acc: 0.7994666666666667
第5次打印损失:1449.789306640625
test acc: 0.8490166666666666
第6次打印损失:1421.9805908203125
test acc: 0.9278333333333333
第7次打印损失:1444.3782958984375
test acc: 0.8415
第8次打印损失:1434.840576171875
test acc: 0.86935
第9次打印损失:1432.73681640625
test acc: 0.8751833333333333
- 从训练结果中可以看到模型的准确率只有
87%
,添加后门后导致模型的精度下降,和后门的定义存在冲突,需要额外验证。
三、实验
3.1 工具函数
a.参考文章列表[1]复现BadNets
编写误分率error rate
计算方法,横坐标x
为目标预测值,纵坐标y
为目标真实值;统计(i,j)
位置分类的个数,最后计算该类别下的误分率。在计算过程中将i==j
的位置置为0
,是为了避免正确分类个数远大于误分类个数,导致计算的百分比过大,不易观察
python
def handle_predict_list(predict_list:list, targets:torch.Tensor, preds:torch.Tensor) -> list:
targets = targets.tolist()
preds = preds.tolist()
if len(targets) != len(preds):
raise RuntimeError('目标序列和预测序列长度不匹配')
cur = 0
while cur < len(targets):
predict_list[preds[cur]][targets[cur]] += 1
cur += 1
return predict_list
@dec_time.run_time
def get_model_predict(data_loader:DataLoader, model=None):
"""计算模型的误判率,统计所有类别的情况然后计算比值
:param data_loader 数据预处理器
:param model 模型
"""
range_size = 10
predict_list = [[0 for _ in range(range_size)] for _ in range(range_size)]
for imgs, labels in data_loader:
imgs = imgs.to('cuda:0')
labels = labels.to('cuda:0')
outs = model(imgs)
pred = outs.argmax(dim=1) # 获取每个批次的预测值
predict_list = handle_predict_list(predict_list, labels, pred)
return predict_list
def handle_predict(predict_list):
"""predict_list预处理操作"""
sum_list = np.sum(predict_list, axis=0)
print(sum_list)
range_size = len(predict_list) # 数组长度
predict_percent_list = [[0 for _ in range(range_size)] for _ in range(range_size)] # 每个(i,j)处的error rate
for i in range(len(predict_list)):
for j in range(len(predict_list[0])):
if i == j or sum_list[j] == 0: # 没有误判的 如果不加入这个,由于模型准确率较高,(i,j)处计算的数值过大,绘制的图像不易查看其他位置error rate
continue
predict_percent_list[i][j] = predict_list[i][j] / sum_list[j] * 100
return predict_list, predict_percent_list
b.可视化函数
python
def plot_imag_cmap(predict_list, predict_percent_list, title=None):
"""绘制图像真实标签和异常标签的分布情况
"""
print(f'start:\n{predict_list}')
print(f'end:\n{predict_percent_list}')
plt.imshow(predict_percent_list, cmap='Reds')
plt.ylabel(f'True Labels')
plt.xlabel(f'Target Labels')
plt.title(title)
plt.xticks(np.arange(10))
plt.yticks(np.arange(10))
plt.colorbar()
plt.show()
3.2 数据评估
python
def test_targeted_backdoor(data_loader, model, title=None):
predict_list = error_rate.get_model_predict(data_loader, model) # 获取训练集的error
predict_list, predict_percent_list = error_rate.handle_predict(predict_list)
visual.plot_imag_cmap(predict_list, predict_percent_list, title)
if __name__ == '__main__':
# 应该拿到毒化数据集(原始数据 + 脏数据)
train_data, test_data = data_handler.get_data_for_MNIST()
atk_data, data_atk_loader, combine_atk_data, combine_atk_loader = do_generate_backdoor(test_data, clean_data_path='clean_data_test.pkl')
model_save_path = Path.joinpath(root_path, 'save')
badnet_model_save_path = Path.joinpath(model_save_path, 'badnet.nn')
if not badnet_model_save_path.exists():
raise FileExistsError
model = torch.load(badnet_model_save_path)
# test_targeted_backdoor(combine_atk_loader, model, title=f'badnet,test data(1000 Bad Image, 9794 Origin Image, target:i->j %)')
# test_targeted_backdoor(data_atk_loader, model, title=f'badnet,test data(1000 Bad Image, 0 Origin Image, target:i->j %)')
test_loader = DataLoader(test_data, batch_size=64)
test_targeted_backdoor(test_loader, model, title=f'badnet,test data(0 Bad Image, 10000 Origin Image, target:i->j %)')
四、结果分析
如上图所示,首先badnet
模型分类下,分类3->2
和9->8
的误判率较高。此外,上述实验中将目标标签置为0
,可以看出所有的毒化数据的预测目标都分类为该值,满足预期目标。 部分数据如下:
- 场景二未看