Github地址:https://github.com/pralab/secml_malware
可以按照库的readme,通过pip安装
因为我需要对于库源码进行较大的改动,可以我通过以下方式安装
bash
# 克隆源码
git clone https://github.com/pralab/secml_malware.git
cd secml_malware
# 创建虚拟环境
conda create -n secml_malware_local_env python=3.9
conda activate secml_malware_local_env
# 安装依赖
pip install -r requirements.txt
# 如果ember库安装失败,可以手动下载到本地安装(需要注释掉 requirements.txt中的git+https://github.com/zangobot/ember.git)
git clone https://github.com/zangobot/ember.git
cd ember
pip install .
# 关键:用"开发模式"安装secml-malware(Python import 的就是你当前目录源码)
pip install -e .
运行攻击代码示例代码:
python
import numpy as np
from pathlib import Path
from secml.array import CArray
from secml_malware.models.malconv import MalConv
from secml_malware.models.c_classifier_end2end_malware import CClassifierEnd2EndMalware
from secml_malware.attack.blackbox.c_wrapper_phi import CEnd2EndWrapperPhi
from secml_malware.attack.blackbox.c_black_box_padding_evasion import CBlackBoxPaddingEvasionProblem
from secml_malware.attack.blackbox.ga.c_base_genetic_engine import CGeneticAlgorithm
# ===============================
# Load model
# ===============================
print("[*] Loading model...")
model = MalConv()
clf = CClassifierEnd2EndMalware(model)
clf.load_pretrained_model()
clf._model = clf._model.cpu()
net = CEnd2EndWrapperPhi(clf)
# ===============================
# Save adversarial sample
# ===============================
def save_adv(x_adv, path):
arr = x_adv.tondarray().flatten()
# 去 padding(256)
arr = arr[arr != 256]
# 限制范围
arr = np.clip(arr, 0, 255)
# 转 uint8
arr = arr.astype(np.uint8)
with open(path, "wb") as f:
f.write(bytes(arr))
# ===============================
# Paths
# ===============================
samples_dir = Path("/data/xjw/workspace/dataset/sorel-20m/benign/")
out_dir = Path("results")
adv_dir = out_dir / "adv"
out_dir.mkdir(exist_ok=True)
adv_dir.mkdir(exist_ok=True)
# ===============================
# Attack loop
# ===============================
for i, sample_path in enumerate(samples_dir.iterdir()):
if i >= 100:
break
if not sample_path.is_file():
continue
print(f"\n[*] {sample_path.name}")
# 读取文件
with open(sample_path, "rb") as f:
code = f.read()
x = CArray(np.frombuffer(code, dtype=np.uint8)).atleast_2d()
# 原始预测
_, conf = net.predict(x, True)
orig_conf = conf[0, 1].item()
print(f" Original confidence: {orig_conf:.4f}")
# 只攻击 benign
if orig_conf >= 0.5:
print(" Skip (already malware)")
continue
# 构建攻击
problem = CBlackBoxPaddingEvasionProblem(
net,
how_many_padding_bytes=20000,
population_size=50,
iterations=100
)
ga = CGeneticAlgorithm(problem)
# 运行攻击
y_pred, adv_score, adv_ds, _ = ga.run(x, CArray([orig_conf]))
final_conf = ga.confidences_[-1]
print(f" Final confidence: {final_conf:.4f}")
# 保存结果
adv_path = adv_dir / f"{sample_path.stem}_adv.exe"
save_adv(adv_ds.X[0, :], adv_path)
print(f" Saved → {adv_path.name}")