文章目录
- [1. CAN 基础](#1. CAN 基础)
-
- [1.1. 环境部署](#1.1. 环境部署)
- [1.2. 配置虚拟CAN接口](#1.2. 配置虚拟CAN接口)
- [1.3. CAN demo(old)](#1.3. CAN demo(old))
-
- [1.3.1. 发送](#1.3.1. 发送)
- [1.3.2. 接收](#1.3.2. 接收)
- [1.4. CAN demo(推荐)](#1.4. CAN demo(推荐))
-
- [1.4.1. 环境部署](#1.4.1. 环境部署)
- [1.4.1. 发送](#1.4.1. 发送)
- [1.4.2. 接收](#1.4.2. 接收)
- [2. 模型训练](#2. 模型训练)
-
- [2.1. 训练 demo 1](#2.1. 训练 demo 1)
-
- [2.1.1. 模型训练](#2.1.1. 模型训练)
- [2.1.2. 深度学习](#2.1.2. 深度学习)
- [2.1.3. 检测脚本(接收)](#2.1.3. 检测脚本(接收))
- [2.1.4. 攻击脚本(发送)](#2.1.4. 攻击脚本(发送))
- [2.2. 训练 demo 2](#2.2. 训练 demo 2)
-
- [2.2.1. 模型训练(6个文件)](#2.2.1. 模型训练(6个文件))
- [2.2.2. 深度学习(3个文件)](#2.2.2. 深度学习(3个文件))
- [2.2.3. 检测脚本(接收)](#2.2.3. 检测脚本(接收))
- [2.2.4. 攻击脚本(发送)](#2.2.4. 攻击脚本(发送))
1. CAN 基础
1.1. 环境部署
python
apt install python3-pip
pip install scapy -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install python-can
1.2. 配置虚拟CAN接口
bash
# 攻击机终端先执行:
# add the interface:
sudo ip link add dev can0 type vcan
# Delete the interface:
sudo ip link delete can0
# Bring the CAN interface up
sudo ip link set up can0
sudo ip link set can0 up
# Bring the CAN interface down:
sudo ip link set down can0
# Verify the interface is created and up
ip link show can0
# Check all CAN interfaces:
ip link show type can
ip link show type vcan
# Set CAN bitrate (for real CAN interfaces, not vcan):
sudo ip link set can0 type can bitrate 500000
1.3. CAN demo(old)
1.3.1. 发送
- demo1
python
from scapy.all import *
# from scapy.contrib.can import CAN
from scapy.layers.can import CAN
# Create a CAN packet
can_packet = CAN(identifier=0x123, data=b'\x01\x02\x03\x04\x05\x06\x07\x08')
# Send the packet on can0 interface
sendp(can_packet, iface="can0")
# Or to send multiple packets
for i in range(5):
packet = CAN(identifier=0x100 + i, data=bytes([i, i+1, i+2]))
sendp(packet, iface="can0")
time.sleep(0.1)
- demo2
python
#!/usr/bin/env python3
from scapy.all import *
# from scapy.contrib.can import CAN
from scapy.layers.can import CAN
import time
def test_can_communication():
print("Testing CAN communication on can0...")
# Send some test packets
for i in range(3):
can_id = 0x100 + i
data = bytes([0xDE, 0xAD, 0xBE, 0xEF, i])
packet = CAN(identifier=can_id, data=data)
sendp(packet, iface="can0", verbose=False)
print(f"Sent CAN packet: ID=0x{can_id:03X}, Data={data.hex()}")
time.sleep(0.5)
print("Test completed!")
if __name__ == "__main__":
test_can_communication()
1.3.2. 接收
- demo1(不可用)
python
from scapy.all import *
# Sniff CAN packets
print("Sniffing CAN traffic on can0...")
packets = sniff(iface="can0", count=5, filter="can")
for pkt in packets:
pkt.show()
- demo2 (可用)
python
from scapy.all import *
#from scapy.contrib.can import CAN
from scapy.layers.can import CAN
import time
def can_packet_handler(packet):
"""解析CAN数据包,提取特征(无需人工特征工程)"""
if packet.haslayer(Raw):
data = packet[Raw].load.hex() # 原始数据转十六进制
# 提取时序特征(长度、时间戳),无需人工定义病毒特征
features = {
"timestamp": time.time(),
"data_length": len(data),
"data_hex": data
}
return features
def continuous_can_sniff():
"""Continuously sniff CAN traffic"""
print("Continuous CAN sniffing started. Press Ctrl+C to stop.")
try:
# Sniff indefinitely
sniff(iface="can0", prn=can_packet_handler, store=0)
except KeyboardInterrupt:
print("\nSniffing stopped.")
if __name__ == "__main__":
continuous_can_sniff()
1.4. CAN demo(推荐)
1.4.1. 环境部署
bash
pip install python-can -i https://pypi.tuna.tsinghua.edu.cn/simple
1.4.1. 发送
python
import can
import time
import random
def send_can_message():
# 创建总线接口
# 注意:接口类型根据你的硬件选择
with can.Bus(interface='socketcan', channel='can0', bitrate=250000) as bus:
# 随机CAN ID和攻击数据(模拟病毒注入)
can_id = random.randint(0x000, 0x7FF) # 标准CAN ID
attack_data = bytes([random.randint(0x00, 0xFF) for _ in range(random.randint(1, 8))]) # 随机数据
# # 创建CAN消息
# message = can.Message(
# arbitration_id=0x123, # CAN ID
# data=[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08], # 数据 (最多8字节)
# is_extended_id=False # 标准帧
# )
message = can.Message(
arbitration_id=can_id, # CAN ID
data=attack_data, # 数据 (最多8字节)
is_extended_id=False # 标准帧
)
# 发送消息
try:
bus.send(message)
# print(f"消息发送成功: {message}")
print(f"发送消息: ID=0x{message.arbitration_id:04x}, "
f"数据={message.data.hex()}, "
f"时间戳={message.timestamp}")
except can.CanError:
print("消息发送失败")
if __name__ == "__main__":
send_can_message()
运行结果:
bash
$ python3 send.py
发送消息: ID=0x01a5, 数据=94890f684e, 时间戳=0.0
1.4.2. 接收
python
import can
def receive_can_messages():
with can.Bus(interface='socketcan', channel='can0', bitrate=250000) as bus:
print("开始监听CAN总线...按Ctrl+C停止")
try:
while True:
# 接收消息 (超时时间1秒)
message = bus.recv(timeout=1.0)
if message is not None:
print(f"收到消息: ID=0x{message.arbitration_id:04x}, "
f"数据={message.data.hex()}, "
f"时间戳={message.timestamp}")
except KeyboardInterrupt:
print("\n停止监听")
if __name__ == "__main__":
receive_can_messages()
运行结果:
bash
# python3 recv.py
开始监听CAN总线...按Ctrl+C停止
收到消息: ID=0x01a5, 数据=94890f684e, 时间戳=1764292410.186914
2. 模型训练
2.1. 训练 demo 1
2.1.1. 模型训练
train_ae_model.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# ---------------------- 无监督自编码器模型 ----------------------
class CAN_AE(nn.Module):
def __init__(self, input_dim=2):
super(CAN_AE, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, 16),
nn.ReLU(),
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 4) # 压缩到4维特征空间
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(4, 8),
nn.ReLU(),
nn.Linear(8, 16),
nn.ReLU(),
nn.Linear(16, input_dim),
nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
# ---------------------- 数据加载与预处理 ----------------------
# 加载公开车联网正常流量数据集(Car-Hacking Dataset:https://ocslab.hksecurity.net/Datasets/car-hacking-dataset)
# 仅使用正常流量(Normal.csv)
normal_data = pd.read_csv("dataset/Normal.csv")
# 提取关键特征(无需人工特征工程,仅用数据长度和时间戳差)
normal_data["Timestamp"] = pd.to_datetime(normal_data["Timestamp"])
normal_data["time_diff"] = normal_data["Timestamp"].diff().dt.total_seconds().fillna(0)
# 特征矩阵(时间戳差、数据长度)
X = normal_data[["time_diff", "DLC"]].values # DLC:数据长度字段
# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 划分训练集/测试集
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)
# 转换为Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# ---------------------- 模型训练 ----------------------
model = CAN_AE(input_dim=2)
criterion = nn.MSELoss() # 重构误差损失
optimizer = optim.Adam(model.parameters(), lr=1e-3)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# 训练参数
epochs = 50
batch_size = 32
train_loader = torch.utils.data.DataLoader(X_train_tensor, batch_size=batch_size, shuffle=True)
print("开始训练自编码器模型...")
for epoch in range(epochs):
model.train()
train_loss = 0.0
for batch in train_loader:
batch = batch.to(device)
optimizer.zero_grad()
outputs = model(batch)
loss = criterion(outputs, batch)
loss.backward()
optimizer.step()
train_loss += loss.item() * batch.size(0)
# 计算测试集损失
model.eval()
with torch.no_grad():
X_test_tensor = X_test_tensor.to(device)
test_outputs = model(X_test_tensor)
test_loss = criterion(test_outputs, X_test_tensor).item()
# 打印进度
print(f"Epoch {epoch+1}/{epochs} | 训练损失:{train_loss/len(train_loader):.6f} | 测试损失:{test_loss:.6f}")
# ---------------------- 保存模型与参数 ----------------------
# 保存模型权重
torch.save(model.state_dict(), "model/can_ae_model.pkl")
# 保存标准化器(用于推理)
import joblib
joblib.dump(scaler, "scaler.pkl")
# 计算异常阈值(正常流量重构误差的95分位数)
with torch.no_grad():
X_normal_tensor = torch.tensor(X_scaled, dtype=torch.float32).to(device)
normal_outputs = model(X_normal_tensor)
normal_mse = torch.mean((X_normal_tensor - normal_outputs) ** 2, dim=1).cpu().numpy()
ANOMALY_THRESHOLD = np.percentile(normal_mse, 95) # 95分位数作为阈值
print(f"训练完成!异常阈值:{ANOMALY_THRESHOLD:.6f}")
2.1.2. 深度学习
deep_learning_detector.py
python
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from flask import Flask, request, jsonify
# 初始化Flask服务(供检测机调用)
app = Flask(__name__)
# ---------------------- 无监督自编码器模型 ----------------------
class CAN_AE(nn.Module):
def __init__(self, input_dim=2):
super(CAN_AE, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, 16),
nn.ReLU(),
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 4) # 压缩到4维特征空间
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(4, 8),
nn.ReLU(),
nn.Linear(8, 16),
nn.ReLU(),
nn.Linear(16, input_dim),
nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
# ---------------------- 模型加载与初始化 ----------------------
# 特征标准化器(仅用正常流量拟合)
scaler = StandardScaler()
# 初始化模型并加载预训练权重(下文提供训练脚本)
model = CAN_AE(input_dim=2)
model.load_state_dict(torch.load("can_ae_model.pth", map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
model.eval() # 推理模式
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# 异常阈值(训练后根据正常流量重构误差确定)
ANOMALY_THRESHOLD = 0.05
@app.route("/detect", methods=["POST"])
def detect_attack():
"""接收检测机特征,返回检测结果"""
data = request.json
# 提取输入特征(时间戳差、数据长度)- 无需人工特征
input_features = np.array([
[time.time() - data["timestamp"], data["data_length"]]
])
# 标准化
input_scaled = scaler.transform(input_features)
# 模型推理
with torch.no_grad():
input_tensor = torch.tensor(input_scaled, dtype=torch.float32).to(device)
output_tensor = model(input_tensor)
# 计算重构误差(MSE)
mse = torch.mean((input_tensor - output_tensor) ** 2).item()
# 判断是否为攻击(误差超过阈值)
is_attack = mse > ANOMALY_THRESHOLD
confidence = 1.0 - (mse / (ANOMALY_THRESHOLD * 2)) if is_attack else mse / (ANOMALY_THRESHOLD * 2)
confidence = max(0.5, min(1.0, confidence)) # 置信度限制在0.5-1.0
return jsonify({
"is_attack": is_attack,
"confidence": confidence,
"reconstruction_error": mse
})
if __name__ == "__main__":
# 启动Flask服务(主机IP:192.168.159.1,端口5000)
app.run(host="0.0.0.0", port=5000, debug=False)
2.1.3. 检测脚本(接收)
can_traffic_collector.py(检测机)
python
from scapy.all import sniff, Raw
import requests
import time
import json
# 主机深度学习引擎地址(Flask服务)
DETECT_SERVER_URL = "http://192.168.17.101:5000/detect"
def process_can_packet(packet):
"""解析CAN数据包,提取特征(无需人工特征工程)"""
if packet.haslayer(Raw):
data = packet[Raw].load.hex() # 原始数据转十六进制
# 提取时序特征(长度、时间戳),无需人工定义病毒特征
features = {
"timestamp": time.time(),
"data_length": len(data),
"data_hex": data
}
return features
def send_to_detect_server(features):
"""发送特征到主机检测服务"""
print(features)
#try:
# response = requests.post(DETECT_SERVER_URL, json=features, timeout=1)
# if response.status_code == 200:
# result = response.json()
# print(f"检测结果:{'攻击' if result['is_attack'] else '正常'} | 置信度:{result['confidence']:.2f}")
#except Exception as e:
# print(f"发送失败:{e}")
def start_collecting(iface="can0"):
"""启动CAN流量采集(需先配置虚拟CAN接口)"""
# 配置虚拟CAN接口(检测机终端先执行:sudo ip link add dev can0 type vcan && sudo ip link set can0 up)
print(f"开始采集CAN流量(接口:{iface})...")
sniff(iface=iface, prn=lambda pkt: send_to_detect_server(process_can_packet(pkt)), store=0)
if __name__ == "__main__":
start_collecting()
2.1.4. 攻击脚本(发送)
python
#from scapy.all import sendp, Ether, CAN
from scapy.layers.can import CAN
#from scapy.layers.l2 import Ether
from scapy.all import sendp, Ether
## or
#from scapy.all import Ether
import time
import random
def send_attack_packets(iface="can0", duration=60):
"""模拟CAN总线注入攻击(随机数据注入)"""
# 配置虚拟CAN接口(攻击机终端先执行:sudo ip link add dev can0 type vcan && sudo ip link set can0 up)
print(f"开始发送攻击流量(持续{duration}秒)...")
start_time = time.time()
while time.time() - start_time < duration:
# 随机CAN ID和攻击数据(模拟病毒注入)
can_id = random.randint(0x000, 0x7FF) # 标准CAN ID
attack_data = bytes([random.randint(0x00, 0xFF) for _ in range(random.randint(1, 8))]) # 随机数据
# 构造CAN数据包
packet = Ether(dst="ff:ff:ff:ff:ff:ff") / CAN(identifier=can_id, data=attack_data)
# 发送数据包
sendp(packet, iface=iface, verbose=0)
time.sleep(0.01) # 10ms发送一次(高于正常流量频率)
print("攻击流量发送完成!")
if __name__ == "__main__":
send_attack_packets(duration=60)
2.2. 训练 demo 2
2.2.1. 模型训练(6个文件)
- 数据预处理
utils/preprocessor.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
### 数据预处理
class CANDataPreprocessor:
"""CAN数据预处理"""
def __init__(self):
self.scaler = StandardScaler()
self.feature_names = []
def extract_features(self, df):
"""从CAN数据中提取特征"""
features = []
# 1. 基本统计特征
if 'ID' in df.columns:
# CAN ID 频率特征
id_counts = df['ID'].value_counts()
df['id_freq'] = df['ID'].map(id_counts)
# df['id_freq'] = df['ID'].map(id_counts).fillna(0)
features.append('id_freq')
# 2. 时序特征
if 'Timestamp' in df.columns:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df['time_diff'] = df['Timestamp'].diff().dt.total_seconds().fillna(0)
features.extend(['time_diff'])
# # 滚动窗口统计
# df['msg_rate_1s'] = df['Timestamp'].rolling('1s').count()
# features.append('msg_rate_1s')
# 3. 数据内容特征
if 'DATA' in df.columns:
df['data_length'] = df['DATA'].apply(lambda x: len(str(x)))
features.append('data_length')
# 数据字节统计(如果数据是十六进制字符串)
df['data_entropy'] = df['DATA'].apply(self._calculate_entropy)
features.append('data_entropy')
# 4. CAN ID特定特征
can_id_dummies = pd.get_dummies(df['ID'], prefix='ID')
df = pd.concat([df, can_id_dummies], axis=1)
features.extend(can_id_dummies.columns.tolist())
self.feature_names = features
return df[features].fillna(0)
def _calculate_entropy(self, data_str):
"""计算数据熵值"""
from collections import Counter
import math
if not isinstance(data_str, str):
return 0
# 计算字节频率
byte_counts = Counter(data_str)
total_bytes = len(data_str)
entropy = 0
for count in byte_counts.values():
p = count / total_bytes
entropy -= p * math.log2(p)
return entropy
def prepare_training_data(self, df, train_ratio=0.8):
"""准备训练数据"""
# 提取特征
features = self.extract_features(df)
print("提取特征:\n",features)
# 标准化
scaled_features = self.scaler.fit_transform(features)
# 分割训练测试集
split_idx = int(len(scaled_features) * train_ratio)
train_data = scaled_features[:split_idx]
test_data = scaled_features[split_idx:]
return train_data, test_data, features.columns.tolist()
- 创建数据加载器
utils/dataset.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
### 加载数据集
class CANDataset(Dataset):
"""CAN数据集类"""
def __init__(self, features, labels=None):
self.features = torch.FloatTensor(features)
self.labels = torch.FloatTensor(labels) if labels is not None else None
def __len__(self):
return len(self.features)
def __getitem__(self, idx):
if self.labels is not None:
return self.features[idx], self.labels[idx]
return self.features[idx]
- 初始化训练器
utils/trainer.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
from utils.autoencoder import CANAutoencoder, CAN_AE
### 模型训练
class CAN_AETrainer:
"""CAN自编码器训练器"""
def __init__(self, input_dim, encoding_dim=32, learning_rate=1e-3):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.model = CANAutoencoder(input_dim, encoding_dim).to(self.device)
# self.model = CAN_AE(input_dim=2).to(self.device)
self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
self.criterion = nn.MSELoss()
self.train_losses = []
self.val_losses = []
def train(self, train_loader, val_loader=None, epochs=10):
"""训练模型"""
self.model.train()
for epoch in range(epochs):
# 训练阶段
train_loss = 0
for batch_idx, data in enumerate(train_loader):
if isinstance(data, tuple):
data = data[0] # 只取特征,不取标签
data = data.to(self.device)
self.optimizer.zero_grad()
# 前向传播
output = self.model(data)
loss = self.criterion(output, data)
# 反向传播
loss.backward()
self.optimizer.step()
train_loss += loss.item()
avg_train_loss = train_loss / len(train_loader)
self.train_losses.append(avg_train_loss)
# 验证阶段
if val_loader is not None:
val_loss = self.validate(val_loader)
self.val_losses.append(val_loss)
else:
val_loss = 0
# if (epoch + 1) % 10 == 0:
print(f'Epoch [{epoch+1:2}/{epochs}], Train Loss: {avg_train_loss:.6f}, Val Loss: {val_loss:.6f}')
def validate(self, val_loader):
"""验证模型"""
self.model.eval()
val_loss = 0
with torch.no_grad():
for data in val_loader:
if isinstance(data, tuple):
data = data[0]
data = data.to(self.device)
output = self.model(data)
loss = self.criterion(output, data)
val_loss += loss.item()
return val_loss / len(val_loader)
def calculate_reconstruction_error(self, data_loader):
"""计算重构误差"""
self.model.eval()
errors = []
with torch.no_grad():
for data in data_loader:
if isinstance(data, tuple):
data = data[0]
data = data.to(self.device)
output = self.model(data)
# 计算每个样本的重构误差
error = torch.mean((output - data) ** 2, dim=1)
errors.extend(error.cpu().numpy())
return np.array(errors)
def save_model(self, filepath):
"""保存模型"""
torch.save({
'model_state_dict': self.model.state_dict(),
'optimizer_state_dict': self.optimizer.state_dict(),
'train_losses': self.train_losses,
'val_losses': self.val_losses
}, filepath)
print(f"模型已保存到: {filepath}")
def load_model(self, filepath):
"""加载模型"""
checkpoint = torch.load(filepath)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
self.train_losses = checkpoint['train_losses']
self.val_losses = checkpoint['val_losses']
print(f"模型已从 {filepath} 加载")
- 初始化模型
utils/autoencoder.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
### 模型
class CANAutoencoder(nn.Module):
"""CAN总线自编码器模型"""
def __init__(self, input_dim, encoding_dim=32):
super(CANAutoencoder, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, 128),
nn.ReLU(True),
nn.Dropout(0.2),
nn.Linear(128, 64),
nn.ReLU(True),
nn.Linear(64, encoding_dim),
nn.ReLU(True)
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(encoding_dim, 64),
nn.ReLU(True),
nn.Linear(64, 128),
nn.ReLU(True),
nn.Dropout(0.2),
nn.Linear(128, input_dim),
nn.Sigmoid() # 输出在0-1之间
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
def encode(self, x):
"""仅进行编码"""
return self.encoder(x)
class CAN_AE(nn.Module):
def __init__(self, input_dim=2):
super(CAN_AE, self).__init__()
# 编码器
self.encoder = nn.Sequential(
nn.Linear(input_dim, 16),
nn.ReLU(),
nn.Linear(16, 8),
nn.ReLU(),
nn.Linear(8, 4) # 压缩到4维特征空间
)
# 解码器
self.decoder = nn.Sequential(
nn.Linear(4, 8),
nn.ReLU(),
nn.Linear(8, 16),
nn.ReLU(),
nn.Linear(16, input_dim),
nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded
- 异常检测
utils/detector.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
### 异常检测
class CANAnomalyDetector:
"""基于自编码器的CAN异常检测器"""
def __init__(self, trainer, threshold_quantile=0.95):
# def __init__(self, trainer, threshold_quantile=0.05):
self.trainer = trainer
self.threshold_quantile = threshold_quantile
self.threshold = None
def fit_threshold(self, normal_data_loader):
"""基于正常数据拟合异常阈值"""
print("计算异常检测阈值...")
# 计算正常数据的重构误差
normal_errors = self.trainer.calculate_reconstruction_error(normal_data_loader)
# 设置阈值(正常数据重构误差的95%分位数)
self.threshold = np.quantile(normal_errors, self.threshold_quantile)
print(f"异常阈值: {self.threshold:.6f}")
return self.threshold
def detect_anomalies(self, data_loader):
"""检测异常"""
if self.threshold is None:
raise ValueError("必须先调用 fit_threshold 方法设置阈值")
# 计算重构误差
errors = self.trainer.calculate_reconstruction_error(data_loader)
# 标记异常
anomalies = errors > self.threshold
anomaly_scores = errors / self.threshold # 标准化异常分数
return anomalies, anomaly_scores, errors
def plot_anomalies(self, errors, anomalies, save_path=None):
"""可视化异常检测结果"""
plt.figure(figsize=(12, 8))
# 绘制重构误差
plt.subplot(2, 1, 1)
plt.plot(errors, 'b-', alpha=0.7, label='重构误差')
plt.axhline(y=self.threshold, color='r', linestyle='--', label=f'异常阈值 ({self.threshold:.4f})')
plt.ylabel('重构误差')
plt.legend()
plt.title('CAN消息重构误差')
# 绘制异常点
plt.subplot(2, 1, 2)
anomaly_indices = np.where(anomalies)[0]
plt.plot(anomaly_indices, errors[anomaly_indices], 'ro',
label=f'异常点 ({len(anomaly_indices)}个)')
plt.axhline(y=self.threshold, color='r', linestyle='--')
plt.xlabel('样本索引')
plt.ylabel('重构误差')
plt.legend()
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.show()
- 训练脚本
ae_train.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
from utils.preprocessor import CANDataPreprocessor
from utils.dataset import CANDataset
from utils.autoencoder import CANAutoencoder
from utils.trainer import CAN_AETrainer
from utils.detector import CANAnomalyDetector
def complete_can_ae_training(can_data_path, model_save_path, joblib_save_path, epochs,
detection_png_path="model/can_anomaly_detection.png"):
"""完整的CAN自编码器训练流程"""
# 1. 加载数据
print("加载CAN数据...")
df = pd.read_csv(can_data_path)
print(f"数据形状: {df.shape}")
# print(type(df), df)
# 2. 数据预处理
print("数据预处理...")
preprocessor = CANDataPreprocessor()
train_data, test_data, feature_names = preprocessor.prepare_training_data(df)
print(f"特征数量: {len(feature_names)}")
print(f"训练数据形状: {train_data.shape}")
print(f"测试数据形状: {test_data.shape}")
# 3. 创建数据加载器
train_dataset = CANDataset(train_data)
test_dataset = CANDataset(test_data)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# 4. 初始化训练器
input_dim = train_data.shape[1]
print("input_dim:", input_dim)
trainer = CAN_AETrainer(input_dim=input_dim, encoding_dim=32, learning_rate=1e-3)
# 5. 训练模型
print("开始训练自编码器...")
trainer.train(train_loader, test_loader, epochs=epochs)
# 6. 保存模型和预处理器
trainer.save_model(model_save_path)
# 保存预处理器
joblib.dump(preprocessor, joblib_save_path)
# 7. 异常检测
print("设置异常检测器...")
detector = CANAnomalyDetector(trainer, threshold_quantile=0.95)
threshold = detector.fit_threshold(train_loader) # 使用训练数据设置阈值
print("训练数据设置阈值:", threshold)
# 在测试数据上检测异常
anomalies, scores, errors = detector.detect_anomalies(test_loader)
print(f"检测到 {np.sum(anomalies)} 个异常样本")
print(f"异常比例: {np.sum(anomalies) / len(anomalies) * 100:.2f}%")
# # 8. 可视化结果
# detector.plot_anomalies(errors, anomalies, detection_png_path)
return trainer, detector, preprocessor
# 使用示例
if __name__ == "__main__":
# 训练模型
trainer, detector, preprocessor = complete_can_ae_training(can_data_path="dataset/NormalAll3.csv",
model_save_path="model/can_model_3_10.pkl",
joblib_save_path="model/can_preprocessor_3_10.joblib",
epochs=10)
# 模型评估
print("训练完成!")
print(f"最终训练损失: {trainer.train_losses[-1]:.6f}")
print(f"最终验证损失: {trainer.val_losses[-1]:.6f}")
2.2.2. 深度学习(3个文件)
- 特征修复
utils/fixer.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
class FeatureFixer:
"""专门修复特征不匹配问题的类"""
def __init__(self, preprocessor):
self.preprocessor = preprocessor
self.expected_features = self._get_expected_features()
def _get_expected_features(self):
"""获取训练时使用的特征名称"""
if hasattr(self.preprocessor, 'feature_names_in_'):
return self.preprocessor.feature_names_in_
elif hasattr(self.preprocessor, 'feature_names_'):
return self.preprocessor.feature_names_
elif hasattr(self.preprocessor, 'scaler') and hasattr(self.preprocessor.scaler, 'feature_names_in_'):
return self.preprocessor.scaler.feature_names_in_
else:
raise ValueError("无法确定训练时的特征名称")
def analyze_mismatch(self, new_data):
"""分析特征不匹配情况"""
if not isinstance(new_data, pd.DataFrame):
new_data = pd.DataFrame(new_data)
current_features = set(new_data.columns)
expected_features = set(self.expected_features)
missing = expected_features - current_features
extra = current_features - expected_features
common = current_features & expected_features
print("=== 特征不匹配分析 ===")
print(f"训练特征数: {len(expected_features)}")
print(f"当前特征数: {len(current_features)}")
print(f"共同特征: {len(common)}")
print(f"缺失特征: {len(missing)}")
print(f"多余特征: {len(extra)}")
if missing:
print("\n缺失的特征列表:")
for feature in sorted(missing):
print(f" - {feature}")
return {
'missing': missing,
'extra': extra,
'common': common
}
def fix_features(self, new_data, fill_value=0):
"""修复特征,使其与训练时匹配"""
if not isinstance(new_data, pd.DataFrame):
new_data = pd.DataFrame(new_data)
# 创建对齐的DataFrame
aligned_data = pd.DataFrame(index=new_data.index)
for feature in self.expected_features:
if feature in new_data.columns:
aligned_data[feature] = new_data[feature]
else:
aligned_data[feature] = fill_value
# print(f"填充缺失特征: {feature} = {fill_value}")
# 移除多余特征(可选)
extra_features = set(new_data.columns) - set(self.expected_features)
if extra_features:
print(f"移除了 {len(extra_features)} 个多余特征", extra_features)
return aligned_data
def safe_transform(self, new_data):
"""安全的数据转换"""
fixed_data = self.fix_features(new_data)
self.validate_feature_alignment(fixed_data)
return self.preprocessor.transform(fixed_data)
def transform(self, fixed_data):
"""数据转换"""
return self.preprocessor.transform(fixed_data)
def validate_feature_alignment(self, fixed_data):
"""验证特征对齐是否正确"""
print("=== 特征对齐验证 ===")
# 获取期望特征
if hasattr(self.preprocessor, 'feature_names_in_'):
expected = set(self.preprocessor.feature_names_in_)
elif hasattr(self.preprocessor, 'feature_names_'):
expected = set(self.preprocessor.feature_names_)
else:
print("无法验证: 无法获取期望特征")
return False
actual = set(fixed_data.columns)
if expected == actual:
print("✅ 特征完美对齐!")
return True
else:
print("❌ 特征未对齐")
print(f"期望: {len(expected)} 个特征")
print(f"实际: {len(actual)} 个特征")
print(f"差异: {expected - actual}")
return False
- 预测器
utils/predictor.py
python
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import joblib
from utils.preprocessor import CANDataPreprocessor
from utils.autoencoder import CANAutoencoder, CAN_AE
from utils.fixer import FeatureFixer
class CAN_AEPredictor:
"""训练好的CAN自编码器预测器"""
def __init__(self, model_path, preprocessor_path):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.preprocessor = joblib.load(preprocessor_path)
# 加载模型
checkpoint = torch.load(model_path, map_location=self.device)
input_dim = checkpoint['model_state_dict']['encoder.0.weight'].shape[1]
self.model = CANAutoencoder(input_dim).to(self.device)
# self.model = CAN_AE(input_dim).to(self.device)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.model.eval()
self.threshold = float(1.5706)
# self.threshold = float(2.4762576)
# self.threshold = float(2.404398)
def predict_anomaly_old(self, can_messages):
"""预测CAN消息是否为异常"""
# 1.提取特征
df = pd.DataFrame(can_messages)
# df = pd.read_csv("dataset/NormalAll2_test.csv",
# dtype={
# 'ID': str, # 将ID作为字符串读取,保留16进制格式
# 'DATA': str # 数据字段作为字符串
# })
features = self.preprocessor.extract_features(df)
print(features)
# 2. 分析特征不匹配...
fixer = FeatureFixer(self.preprocessor.scaler)
# 3. 进行预测
print("5. 进行预测...")
transformed_data = fixer.safe_transform(features)
# 转换为tensor
features_tensor = torch.FloatTensor(transformed_data).to(self.device)
# 计算重构误差
with torch.no_grad():
reconstructed = self.model(features_tensor)
errors = torch.mean((reconstructed - features_tensor) ** 2, dim=1)
return errors.cpu().numpy()
def predict_anomaly(self, can_messages):
"""预测CAN消息是否为异常"""
# 1. 加载新数据...
df = pd.DataFrame(can_messages)
# df = pd.read_csv("dataset/NormalAll2_test.csv",
# dtype={
# 'ID': str, # 将ID作为字符串读取,保留16进制格式
# 'DATA': str # 数据字段作为字符串
# })
features = self.preprocessor.extract_features(df)
fixer = FeatureFixer(self.preprocessor.scaler)
# # 2. 分析特征不匹配...
# analysis = fixer.analyze_mismatch(features)
# 3. 修复特征
fix_features = fixer.fix_features(features)
print(fix_features)
# 4. 验证修复
is_valid = fixer.validate_feature_alignment(fix_features)
if not is_valid:
print("警告: 特征验证失败,但继续执行...")
# 5. 进行预测
print("5. 进行预测...")
transformed_data = fixer.transform(fix_features)
# 计算重构误差
with torch.no_grad():
# 转换为tensor
features_tensor = torch.FloatTensor(transformed_data).to(self.device)
reconstructed = self.model(features_tensor)
errors = torch.mean((reconstructed - features_tensor) ** 2, dim=1)
print("err:", type(errors.cpu().numpy()), errors.cpu().numpy())
mse = torch.mean((features_tensor - reconstructed) ** 2).item()
print("mse:", type(mse), mse)
return errors.cpu().numpy()
def real_time_monitoring(self, can_msg: dict, can_interface='can0'):
"""实时CAN流量监控"""
print(f"开始实时监控 {can_interface}...")
print(type(can_msg), can_msg)
# 预测异常
error = float(self.predict_anomaly([can_msg])[0])
# error = self.predict_anomaly_old([can_msg])[0]
if error > self.threshold: # 需要提前设置阈值
print(f"🚨 异常检测! CAN ID: {can_msg['ID']}, "
f"重构误差: {error:.4f}, 阈值:{self.threshold},数据: {can_msg['DATA']}")
else:
print(f"👌 正常检测! CAN ID: {can_msg['ID']}, "
f"重构误差: {error:.4f}, 阈值:{self.threshold},数据: {can_msg['DATA']}")
# # 判断是否为攻击(误差超过阈值)
is_attack = bool(error > self.threshold)
# confidence = float(1.0 - (error / (self.threshold * 2)) if is_attack else error / (self.threshold * 2))
is_attack = error > self.threshold
# confidence = 1.0 - (error/(self.threshold*2)) if is_attack else error/(self.threshold*2)
confidence = float((error-self.threshold)/(self.threshold)) if is_attack else ((self.threshold-error)/(self.threshold))
confidence = max(0.1, min(1.0, confidence)) # 置信度限制在0.5-1.0
# return bool(is_attack), float(error), confidence
return is_attack, error, confidence
# def real_time_monitoring(self, can_interface='can0'):
# """实时CAN流量监控"""
# from scapy.all import sniff
# from scapy.contrib.can import CAN
# print(f"开始实时监控 {can_interface}...")
# def process_packet(packet):
# if packet.haslayer(CAN):
# can_msg = {
# 'timestamp': packet.time,
# 'can_id': packet.identifier,
# 'data': packet.data.hex(),
# 'dlc': len(packet.data)
# }
# # 预测异常
# error = self.predict_anomaly([can_msg])[0]
# if error > self.threshold: # 需要提前设置阈值
# print(f"🚨 异常检测! CAN ID: 0x{packet.identifier:X}, "
# f"重构误差: {error:.4f}, 数据: {packet.data.hex()}")
# # 开始嗅探
# sniff(iface=can_interface, prn=process_packet, store=0)
- 学习服务
ae_predi.py
python
from utils.predictor import CAN_AEPredictor
from flask import Flask, request, jsonify
import time
# 初始化Flask服务(供检测机调用)
app = Flask(__name__)
@app.route("/detect", methods=["POST"])
def detect_attack():
"""接收检测机特征,返回检测结果"""
can_msg = request.get_json()
print("#########", type(can_msg), can_msg)
# id_hex = "0350"
# data_hex = "052884666d0000a2" # 重构误差: 1.3194 重构误差: 0.7282
# # id_hex = "0153"
# # data_hex = "000000ff00ff0000" # 重构误差: 1.3767 重构误差: 0.7810
# # id_hex = "018f"
# # data_hex = "fe360000003c0000" # 重构误差: 1.3199 重构误差: 0.7521
# # id_hex = "0260"
# # data_hex = "1b242430ff906e39" # 重构误差: 1.3240 重构误差: 0.7286
# # # 篡改数据
# # id_hex = "0350"
# # data_hex = "FFFFFFFFFFFFFFFF" # 重构误差: 1.3198 重构误差: 0.8873
# # id_hex = "0fff"
# # data_hex = "2c3c4c2c3c4c" # 重构误差: 1.3198 重构误差: 0.8873
# import random
# can_id = random.randint(0x800, 0x1FFF) # 扩展CAN ID
# dlc = random.randint(9, 15)
# # 3. 随机边界值数据(含0x00、0xFF等极端值)
# fuzz_data = bytes([random.choice([0x00, 0x01, 0xFF, 0xFE]) for _ in range(dlc)])
# print(type(can_id), can_id)
# print(type(fuzz_data), fuzz_data.hex())
# id_hex = str(can_id)
# data_hex = fuzz_data.hex()
# # print(type(id_hex), id_hex)
# # print(type(data_hex), data_hex)
# # 4562, 重构误差: 2.7902, 阈值:2.4762576,数据: fefffefeff000001feffff01feff01
# # 2288, 重构误差: 2.7931, 阈值:2.4762576,数据: ff000100000101000001ff00fefffe
# can_msg = {
# 'Timestamp': can_msg['Timestamp'],
# 'ID': id_hex,
# 'DATA': data_hex,
# 'DLC': len(data_hex)/2
# }
# # 提取输入特征(时间戳差、数据长度)- 无需人工特征
is_attack, mse, confidence = predictor.real_time_monitoring(can_msg=can_msg)
# # # predictor.real_time_monitoring(can_msg=can_msg)
# print(type(is_attack), is_attack)
# print(type(mse), mse)
# print(type(confidence), confidence)
# 检测结果:攻击 | 置信度:0.14
# 收到消息: ID=0x07e8, 数据=e452974c, 时间戳=1764409664.920674
# {'Timestamp': 1764409664.920674, 'ID': '07e8', 'DLC': 4.0, 'DATA': 'e452974c'}
# 检测结果:正常 | 置信度:0.10
# 收到消息: ID=0x00b0, 数据=36497b764dfb9f6d, 时间戳=1764409665.6558738
# {'Timestamp': 1764409665.6558738, 'ID': '00b0', 'DLC': 8.0, 'DATA': '36497b764dfb9f6d'}
# 检测结果:正常 | 置信度:0.50
# 收到消息: ID=0x03e9, 数据=1f, 时间戳=1764409666.3997526
# {'Timestamp': 1764409666.3997526, 'ID': '03e9', 'DLC': 1.0, 'DATA': '1f'}
# 检测结果:攻击 | 置信度:0.80
# 收到消息: ID=0x0192, 数据=81ed, 时间戳=1764409667.105688
# {'Timestamp': 1764409667.105688, 'ID': '0192', 'DLC': 2.0, 'DATA': '81ed'}
# 检测结果:攻击 | 置信度:0.41
# 收到消息: ID=0x01f2, 数据=e85a, 时间戳=1764409667.8463929
# {'Timestamp': 1764409667.8463929, 'ID': '01f2', 'DLC': 2.0, 'DATA': 'e85a'}
# 检测结果:攻击 | 置信度:0.41
# 收到消息: ID=0x022a, 数据=edba409161aa, 时间戳=1764409668.8249633
return jsonify({
"is_attack": is_attack,
"confidence": confidence,
"reconstruction_error": mse
})
# 标准化
return jsonify({
"is_attack": 0,
"confidence": 0,
"reconstruction_error": 0
})
if __name__ == "__main__":
# predictor = CAN_AEPredictor(model_path="model/can_model.pkl", preprocessor_path="model/can_preprocessor.joblib")
# predictor = CAN_AEPredictor(model_path="model/can_model_2.pkl", preprocessor_path="model/can_preprocessor_2.joblib")
# predictor = CAN_AEPredictor(model_path="model/can_model_3.pkl", preprocessor_path="model/can_preprocessor_3.joblib")
# predictor = CAN_AEPredictor(model_path="model/can_model_3_50.pkl", preprocessor_path="model/can_preprocessor_3_50.joblib")
predictor = CAN_AEPredictor(model_path="model/can_model_3_10.pkl", preprocessor_path="model/can_preprocessor_3_10.joblib")
# 启动Flask服务(主机IP:192.168.159.1,端口5000)
app.run(host="0.0.0.0", port=5000, debug=False)
2.2.3. 检测脚本(接收)
python
import can
import requests
# 主机深度学习引擎地址(Flask服务)
# DETECT_SERVER_URL = "http://192.168.17.101:5000/detect"
DETECT_SERVER_URL = "http://192.168.17.1:5000/detect"
def send_to_detect_server(features):
"""发送特征到主机检测服务"""
try:
response = requests.post(DETECT_SERVER_URL, json=features, timeout=1)
if response.status_code == 200:
result = response.json()
print(f"检测结果:{'攻击' if result['is_attack'] else '正常'} | 置信度:{result['confidence']:.2f}")
except Exception as e:
print(f"发送失败:{e}")
def receive_can_messages():
with can.Bus(interface='socketcan', channel='can0', bitrate=250000) as bus:
print("开始监听CAN总线...按Ctrl+C停止")
try:
while True:
# 接收消息 (超时时间1秒)
message = bus.recv(timeout=1.0)
if message is not None:
print(f"收到消息: ID=0x{message.arbitration_id:04x}, "
f"数据={message.data.hex()}, "
f"时间戳={message.timestamp}")
iddd = format(message.arbitration_id, '04x')
data = message.data.hex()
# print(type(iddd), iddd)
# print(type(data), data)
features = {'Timestamp': message.timestamp,
'ID': iddd,
'DLC': len(data)/2,
'DATA': data}
print(features)
send_to_detect_server(features)
except KeyboardInterrupt:
print("\n停止监听")
if __name__ == "__main__":
receive_can_messages()
2.2.4. 攻击脚本(发送)
python
import can
import time
import random
def send_can_message():
# 创建总线接口
# 注意:接口类型根据你的硬件选择
with can.Bus(interface='socketcan', channel='can0', bitrate=250000) as bus:
# 随机CAN ID和攻击数据(模拟病毒注入)
can_id = random.randint(0x000, 0x7FF) # 标准CAN ID
attack_data = bytes([random.randint(0x00, 0xFF) for _ in range(random.randint(1, 8))]) # 随机数据
# # 创建CAN消息
# message = can.Message(
# arbitration_id=0x123, # CAN ID
# data=[0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08], # 数据 (最多8字节)
# is_extended_id=False # 标准帧
# )
message = can.Message(
arbitration_id=can_id, # CAN ID
data=attack_data, # 数据 (最多8字节)
is_extended_id=False # 标准帧
)
# 发送消息
try:
bus.send(message)
# print(f"消息发送成功: {message}")
print(f"发送消息: ID=0x{message.arbitration_id:04x}, "
f"数据={message.data.hex()}, "
f"时间戳={message.timestamp}, "
f"数据长度={len(attack_data)}")
except can.CanError:
print("消息发送失败")
if __name__ == "__main__":
send_can_message()