一、算法原理与架构
┌─────────────────────────────────────────────────────────────┐
│ 孤立森林异常检测系统 │
├─────────────────────────────────────────────────────────────┤
│ 数据预处理 │ 孤立森林构建 │ 异常评分 │ 决策输出 │
│ │ │ │ │
│ • 数据清洗 │ • 构建 iTrees │ • 路径长度 │ • 异常阈值 │
│ • 特征选择 │ • 随机采样 │ • 异常分数 │ • 告警级别 │
│ • 归一化 │ • 递归分割 │ • 归一化 │ • 可视化 │
│ • 维度缩减 │ • 森林集成 │ • 排序 │ • 报告生成 │
└─────────────────────────────────────────────────────────────┘
核心思想:异常点更容易被孤立(路径更短),正常点需要更多分割才能孤立。
二、完整 C# 实现
2.1 孤立树节点结构 (IsolationTreeNode.cs)
csharp
using System;
using System.Collections.Generic;
namespace IsolationForest
{
/// <summary>
/// 孤立树节点
/// </summary>
public class IsolationTreeNode
{
public int SplitFeature { get; set; } = -1; // 分割特征索引,-1表示叶节点
public double SplitValue { get; set; } = 0.0; // 分割值
public IsolationTreeNode Left { get; set; } // 左子树(<=分割值)
public IsolationTreeNode Right { get; set; } // 右子树(>分割值)
public int Size { get; set; } = 0; // 节点包含的样本数
public int Height { get; set; } = 0; // 节点高度(叶节点为0)
public bool IsLeaf => SplitFeature == -1; // 是否为叶节点
}
}
2.2 孤立树构建 (IsolationTree.cs)
csharp
using System;
using System.Collections.Generic;
using System.Linq;
namespace IsolationForest
{
/// <summary>
/// 孤立树 (iTree)
/// </summary>
public class IsolationTree
{
private readonly int _maxHeight;
private readonly int _sampleSize;
private readonly Random _random;
public IsolationTreeNode Root { get; private set; }
public int TreeHeight { get; private set; } = 0;
public IsolationTree(int maxHeight, int sampleSize, Random random)
{
_maxHeight = maxHeight;
_sampleSize = sampleSize;
_random = random ?? new Random();
}
/// <summary>
/// 构建孤立树
/// </summary>
public void Build(List<double[]> samples)
{
Root = BuildRecursive(samples, 0);
}
private IsolationTreeNode BuildRecursive(List<double[]> samples, int currentHeight)
{
var node = new IsolationTreeNode
{
Size = samples.Count,
Height = currentHeight
};
// 停止条件:样本数<=1 或 达到最大高度
if (samples.Count <= 1 || currentHeight >= _maxHeight)
{
return node;
}
// 随机选择特征和分割值
int featureIndex = SelectRandomFeature(samples);
double splitValue = SelectSplitValue(samples, featureIndex);
if (double.IsNaN(splitValue))
{
return node; // 无法分割,返回叶节点
}
// 分割样本
var leftSamples = new List<double[]>();
var rightSamples = new List<double[]>();
foreach (var sample in samples)
{
if (sample[featureIndex] <= splitValue)
leftSamples.Add(sample);
else
rightSamples.Add(sample);
}
// 如果分割后某一边为空,返回叶节点
if (leftSamples.Count == 0 || rightSamples.Count == 0)
{
return node;
}
// 递归构建左右子树
node.SplitFeature = featureIndex;
node.SplitValue = splitValue;
node.Left = BuildRecursive(leftSamples, currentHeight + 1);
node.Right = BuildRecursive(rightSamples, currentHeight + 1);
TreeHeight = Math.Max(TreeHeight, currentHeight + 1);
return node;
}
/// <summary>
/// 随机选择特征
/// </summary>
private int SelectRandomFeature(List<double[]> samples)
{
if (samples.Count == 0) return -1;
int featureCount = samples[0].Length;
return _random.Next(featureCount);
}
/// <summary>
/// 选择分割值(使用中位数)
/// </summary>
private double SelectSplitValue(List<double[]> samples, int featureIndex)
{
var values = samples.Select(s => s[featureIndex]).OrderBy(v => v).ToList();
if (values.Count == 0) return double.NaN;
if (values.Count == 1) return values[0];
// 使用中位数作为分割值
int midIndex = values.Count / 2;
return values[midIndex];
}
/// <summary>
/// 计算样本在树中的路径长度
/// </summary>
public int PathLength(double[] sample)
{
return PathLengthRecursive(Root, sample, 0);
}
private int PathLengthRecursive(IsolationTreeNode node, double[] sample, int currentDepth)
{
if (node == null || node.IsLeaf)
return currentDepth;
if (sample[node.SplitFeature] <= node.SplitValue)
return PathLengthRecursive(node.Left, sample, currentDepth + 1);
else
return PathLengthRecursive(node.Right, sample, currentDepth + 1);
}
}
}
2.3 孤立森林集成 (IsolationForest.cs)
csharp
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace IsolationForest
{
/// <summary>
/// 孤立森林异常检测器
/// </summary>
public class IsolationForest
{
private readonly List<IsolationTree> _trees;
private readonly int _numTrees;
private readonly int _sampleSize;
private readonly int _maxHeight;
private readonly Random _random;
public IsolationForest(int numTrees = 100, int sampleSize = 256, int maxHeight = 0)
{
_numTrees = numTrees;
_sampleSize = sampleSize;
_maxHeight = maxHeight > 0 ? maxHeight : (int)Math.Ceiling(Math.Log(sampleSize, 2));
_trees = new List<IsolationTree>(numTrees);
_random = new Random();
}
/// <summary>
/// 训练孤立森林
/// </summary>
public void Train(List<double[]> trainingData)
{
Console.WriteLine($"开始训练孤立森林...");
Console.WriteLine($" 树的数量: {_numTrees}");
Console.WriteLine($" 每棵树样本数: {_sampleSize}");
Console.WriteLine($" 最大树高: {_maxHeight}");
_trees.Clear();
// 并行构建多棵树
Parallel.For(0, _numTrees, i =>
{
// 随机采样
var sampledData = BootstrapSample(trainingData, _sampleSize);
// 构建孤立树
var tree = new IsolationTree(_maxHeight, _sampleSize, new Random(_random.Next()));
tree.Build(sampledData);
lock (_trees)
{
_trees.Add(tree);
}
if ((i + 1) % 10 == 0)
{
Console.WriteLine($" 已完成 {i + 1}/{_numTrees} 棵树");
}
});
Console.WriteLine($"孤立森林训练完成!共 {_trees.Count} 棵树");
}
/// <summary>
/// 随机采样(Bootstrap)
/// </summary>
private List<double[]> BootstrapSample(List<double[]> data, int sampleSize)
{
var sample = new List<double[]>(sampleSize);
for (int i = 0; i < sampleSize; i++)
{
int index = _random.Next(data.Count);
sample.Add(data[index]);
}
return sample;
}
/// <summary>
/// 计算样本的异常分数
/// </summary>
public double AnomalyScore(double[] sample)
{
if (_trees.Count == 0)
throw new InvalidOperationException("请先训练孤立森林");
// 计算在所有树中的平均路径长度
double totalPathLength = 0;
foreach (var tree in _trees)
{
totalPathLength += tree.PathLength(sample);
}
double avgPathLength = totalPathLength / _trees.Count;
// 归一化异常分数(使用调和数近似)
double normalizedScore = NormalizeScore(avgPathLength, _sampleSize);
return normalizedScore;
}
/// <summary>
/// 批量计算异常分数
/// </summary>
public List<double> BatchAnomalyScores(List<double[]> samples)
{
var scores = new List<double>(samples.Count);
foreach (var sample in samples)
{
scores.Add(AnomalyScore(sample));
}
return scores;
}
/// <summary>
/// 归一化异常分数
/// </summary>
private double NormalizeScore(double pathLength, int sampleSize)
{
if (pathLength == 0) return 0;
// 使用调和数近似
double harmonicNumber = HarmonicNumber(sampleSize - 1);
double expectedPathLength = 2 * harmonicNumber - 2 * (sampleSize - 1) / (double)sampleSize;
// 归一化到 [0, 1],越接近1越异常
double score = Math.Pow(2, -pathLength / expectedPathLength);
return Math.Max(0, Math.Min(1, score));
}
/// <summary>
/// 计算调和数 H(n)
/// </summary>
private double HarmonicNumber(int n)
{
double sum = 0;
for (int i = 1; i <= n; i++)
{
sum += 1.0 / i;
}
return sum;
}
/// <summary>
/// 判断是否为异常点
/// </summary>
public bool IsAnomaly(double[] sample, double threshold = 0.6)
{
double score = AnomalyScore(sample);
return score > threshold;
}
/// <summary>
/// 获取异常检测结果
/// </summary>
public AnomalyDetectionResult DetectAnomalies(List<double[]> samples, double threshold = 0.6)
{
var result = new AnomalyDetectionResult
{
Threshold = threshold,
Scores = new List<double>(samples.Count),
IsAnomaly = new List<bool>(samples.Count),
AnomalyIndices = new List<int>()
};
for (int i = 0; i < samples.Count; i++)
{
double score = AnomalyScore(samples[i]);
bool isAnomaly = score > threshold;
result.Scores.Add(score);
result.IsAnomaly.Add(isAnomaly);
if (isAnomaly)
{
result.AnomalyIndices.Add(i);
}
}
return result;
}
}
/// <summary>
/// 异常检测结果
/// </summary>
public class AnomalyDetectionResult
{
public double Threshold { get; set; }
public List<double> Scores { get; set; }
public List<bool> IsAnomaly { get; set; }
public List<int> AnomalyIndices { get; set; }
public int AnomalyCount => AnomalyIndices.Count;
public double AnomalyRate => (double)AnomalyCount / Scores.Count;
}
}
2.4 工业数据生成与测试 (Program.cs)
csharp
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace IsolationForest
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("====== 孤立森林异常检测系统 ======\n");
// 1. 生成测试数据
Console.WriteLine("生成测试数据...");
var dataGenerator = new IndustrialDataGenerator();
var (trainingData, testData) = dataGenerator.GenerateDataset(
normalCount: 1000,
anomalyCount: 50,
featureCount: 5
);
Console.WriteLine($"训练数据: {trainingData.Count} 个样本");
Console.WriteLine($"测试数据: {testData.Count} 个样本");
// 2. 训练孤立森林
var forest = new IsolationForest(
numTrees: 100,
sampleSize: 256,
maxHeight: 8
);
var stopwatch = Stopwatch.StartNew();
forest.Train(trainingData);
stopwatch.Stop();
Console.WriteLine($"训练耗时: {stopwatch.ElapsedMilliseconds} ms\n");
// 3. 检测异常
Console.WriteLine("开始异常检测...");
var detectionResult = forest.DetectAnomalies(testData, threshold: 0.65);
// 4. 评估结果
EvaluateResults(detectionResult, dataGenerator.AnomalyLabels);
// 5. 演示单个样本检测
DemonstrateSingleDetection(forest);
// 6. 工业场景应用示例
IndustrialApplicationExample(forest);
Console.WriteLine("\n按任意键退出...");
Console.ReadKey();
}
static void EvaluateResults(AnomalyDetectionResult result, List<bool> trueLabels)
{
Console.WriteLine("\n====== 检测结果评估 ======");
Console.WriteLine($"检测到的异常点: {result.AnomalyCount}");
Console.WriteLine($"异常率: {result.AnomalyRate:P2}");
// 计算混淆矩阵
int tp = 0, fp = 0, tn = 0, fn = 0;
for (int i = 0; i < result.IsAnomaly.Count; i++)
{
bool predicted = result.IsAnomaly[i];
bool actual = trueLabels[i];
if (actual && predicted) tp++;
else if (!actual && predicted) fp++;
else if (!actual && !predicted) tn++;
else if (actual && !predicted) fn++;
}
Console.WriteLine($"\n混淆矩阵:");
Console.WriteLine($" 真阳性(TP): {tp}");
Console.WriteLine($" 假阳性(FP): {fp}");
Console.WriteLine($" 真阴性(TN): {tn}");
Console.WriteLine($" 假阴性(FN): {fn}");
// 计算指标
double accuracy = (double)(tp + tn) / (tp + tn + fp + fn);
double precision = tp + fp > 0 ? (double)tp / (tp + fp) : 0;
double recall = tp + fn > 0 ? (double)tp / (tp + fn) : 0;
double f1Score = precision + recall > 0 ? 2 * precision * recall / (precision + recall) : 0;
Console.WriteLine($"\n性能指标:");
Console.WriteLine($" 准确率(Accuracy): {accuracy:P2}");
Console.WriteLine($" 精确率(Precision): {precision:P2}");
Console.WriteLine($" 召回率(Recall): {recall:P2}");
Console.WriteLine($" F1分数: {f1Score:P2}");
// 显示异常分数分布
Console.WriteLine($"\n异常分数统计:");
Console.WriteLine($" 最小值: {result.Scores.Min():F4}");
Console.WriteLine($" 最大值: {result.Scores.Max():F4}");
Console.WriteLine($" 平均值: {result.Scores.Average():F4}");
Console.WriteLine($" 标准差: {CalculateStdDev(result.Scores):F4}");
}
static void DemonstrateSingleDetection(IsolationForest forest)
{
Console.WriteLine("\n====== 单个样本检测演示 ======");
// 正常样本
double[] normalSample = { 45.2, 23.1, 67.8, 12.5, 89.3 };
double normalScore = forest.AnomalyScore(normalSample);
bool isNormalAnomaly = forest.IsAnomaly(normalSample, 0.65);
Console.WriteLine($"正常样本: [{string.Join(", ", normalSample.Select(x => x.ToString("F1")))}]");
Console.WriteLine($" 异常分数: {normalScore:F4}");
Console.WriteLine($" 是否异常: {(isNormalAnomaly ? "是" : "否")}");
// 异常样本
double[] anomalySample = { 999.9, 888.8, 777.7, 666.6, 555.5 };
double anomalyScore = forest.AnomalyScore(anomalySample);
bool isAnomalyAnomaly = forest.IsAnomaly(anomalySample, 0.65);
Console.WriteLine($"异常样本: [{string.Join(", ", anomalySample.Select(x => x.ToString("F1")))}]");
Console.WriteLine($" 异常分数: {anomalyScore:F4}");
Console.WriteLine($" 是否异常: {(isAnomalyAnomaly ? "是" : "否")}");
}
static void IndustrialApplicationExample(IsolationForest forest)
{
Console.WriteLine("\n====== 工业应用示例 ======");
// 模拟传感器数据流
Console.WriteLine("实时传感器监测:");
var sensorData = new IndustrialDataGenerator();
for (int i = 0; i < 10; i++)
{
var sample = sensorData.GenerateSensorReading();
double score = forest.AnomalyScore(sample);
bool isAnomaly = forest.IsAnomaly(sample, 0.65);
string status = isAnomaly ? "⚠️ 异常" : "✅ 正常";
Console.WriteLine($" 传感器{i+1}: 温度={sample[0]:F1}°C, 压力={sample[1]:F1}MPa, " +
$"振动={sample[2]:F1}Hz, 流量={sample[3]:F1}L/min, 电流={sample[4]:F1}A " +
$"[{status}] (分数:{score:F3})");
}
}
static double CalculateStdDev(List<double> values)
{
double avg = values.Average();
double sum = values.Sum(v => Math.Pow(v - avg, 2));
return Math.Sqrt(sum / values.Count);
}
}
/// <summary>
/// 工业数据生成器
/// </summary>
public class IndustrialDataGenerator
{
public List<bool> AnomalyLabels { get; private set; } = new List<bool>();
private Random _random = new Random();
public (List<double[]>, List<double[]>) GenerateDataset(int normalCount, int anomalyCount, int featureCount)
{
var trainingData = new List<double[]>();
var testData = new List<double[]>();
AnomalyLabels.Clear();
// 生成正常数据(高斯分布)
for (int i = 0; i < normalCount; i++)
{
var sample = new double[featureCount];
for (int j = 0; j < featureCount; j++)
{
sample[j] = GaussianRandom(50, 10); // 均值50,标准差10
}
if (i < normalCount * 0.8)
trainingData.Add(sample);
else
testData.Add(sample);
AnomalyLabels.Add(false);
}
// 生成异常数据(远离正常分布)
for (int i = 0; i < anomalyCount; i++)
{
var sample = new double[featureCount];
for (int j = 0; j < featureCount; j++)
{
// 异常值:均值100,标准差5,或随机极值
if (_random.NextDouble() < 0.5)
sample[j] = GaussianRandom(100, 5);
else
sample[j] = _random.NextDouble() * 200;
}
testData.Add(sample);
AnomalyLabels.Add(true);
}
return (trainingData, testData);
}
public double[] GenerateSensorReading()
{
// 模拟工业传感器读数
return new double[]
{
GaussianRandom(45, 5), // 温度 (°C)
GaussianRandom(2.5, 0.3), // 压力 (MPa)
GaussianRandom(50, 10), // 振动 (Hz)
GaussianRandom(100, 15), // 流量 (L/min)
GaussianRandom(10, 2) // 电流 (A)
};
}
private double GaussianRandom(double mean, double stdDev)
{
double u1 = _random.NextDouble();
double u2 = _random.NextDouble();
double randStdNormal = Math.Sqrt(-2.0 * Math.Log(u1)) * Math.Sin(2.0 * Math.PI * u2);
return mean + stdDev * randStdNormal;
}
}
}
2.5 实时监控系统扩展 (RealTimeMonitor.cs)
csharp
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Threading;
using System.Threading.Tasks;
namespace IsolationForest
{
/// <summary>
/// 实时异常监控系统
/// </summary>
public class RealTimeMonitor
{
private readonly IsolationForest _forest;
private readonly ConcurrentQueue<double[]> _dataQueue;
private readonly CancellationTokenSource _cancellationToken;
private readonly int _windowSize;
private readonly double _threshold;
public event EventHandler<AnomalyDetectedEventArgs> OnAnomalyDetected;
public event EventHandler<string> OnStatusUpdate;
public bool IsRunning { get; private set; } = false;
public int ProcessedCount { get; private set; } = 0;
public int AnomalyCount { get; private set; } = 0;
public RealTimeMonitor(IsolationForest forest, int windowSize = 100, double threshold = 0.65)
{
_forest = forest ?? throw new ArgumentNullException(nameof(forest));
_dataQueue = new ConcurrentQueue<double[]>();
_cancellationToken = new CancellationTokenSource();
_windowSize = windowSize;
_threshold = threshold;
}
/// <summary>
/// 添加数据到监控队列
/// </summary>
public void AddData(double[] data)
{
_dataQueue.Enqueue(data);
// 限制队列大小
while (_dataQueue.Count > _windowSize * 2)
{
_dataQueue.TryDequeue(out _);
}
}
/// <summary>
/// 启动监控
/// </summary>
public void Start()
{
if (IsRunning) return;
IsRunning = true;
Task.Run(() => MonitorLoop(), _cancellationToken.Token);
OnStatusUpdate?.Invoke("实时监控系统已启动");
}
/// <summary>
/// 停止监控
/// </summary>
public void Stop()
{
IsRunning = false;
_cancellationToken.Cancel();
OnStatusUpdate?.Invoke("实时监控系统已停止");
}
private async Task MonitorLoop()
{
var buffer = new List<double[]>();
while (IsRunning && !_cancellationToken.IsCancellationRequested)
{
try
{
// 收集数据
while (_dataQueue.TryDequeue(out double[] data))
{
buffer.Add(data);
ProcessedCount++;
if (buffer.Count >= _windowSize)
{
// 批量检测
var result = _forest.DetectAnomalies(buffer, _threshold);
// 检查是否有新异常
for (int i = 0; i < result.IsAnomaly.Count; i++)
{
if (result.IsAnomaly[i])
{
AnomalyCount++;
OnAnomalyDetected?.Invoke(this, new AnomalyDetectedEventArgs
{
Timestamp = DateTime.Now,
Data = buffer[i],
Score = result.Scores[i],
Index = ProcessedCount - buffer.Count + i
});
}
}
buffer.Clear();
}
}
await Task.Delay(100, _cancellationToken.Token); // 100ms检查一次
}
catch (OperationCanceledException)
{
break;
}
catch (Exception ex)
{
OnStatusUpdate?.Invoke($"监控异常: {ex.Message}");
}
}
}
}
/// <summary>
/// 异常检测事件参数
/// </summary>
public class AnomalyDetectedEventArgs : EventArgs
{
public DateTime Timestamp { get; set; }
public double[] Data { get; set; }
public double Score { get; set; }
public int Index { get; set; }
}
}
三、工业应用场景
3.1 适用场景
| 场景 | 说明 | 优势 |
|---|---|---|
| 设备健康监测 | 振动、温度、电流异常检测 | 无需标签数据,实时性强 |
| 网络安全入侵检测 | 流量异常、行为异常 | 对新型攻击敏感 |
| 金融欺诈检测 | 交易金额、频率异常 | 快速响应 |
| 工业质量控制 | 产品尺寸、重量偏差 | 减少人工检验 |
| 环境监测 | 污染物浓度突变 | 早期预警 |
3.2 参数调优建议
| 参数 | 建议值 | 说明 |
|---|---|---|
| numTrees | 100-200 | 树越多越稳定,但计算量增加 |
| sampleSize | 256-512 | 样本越大,检测越准确 |
| maxHeight | log2(sampleSize) | 树高限制,防止过拟合 |
| threshold | 0.6-0.7 | 异常分数阈值,根据业务调整 |
3.3 性能优化
csharp
// 1. 使用并行计算
Parallel.ForEach(samples, sample => {
scores.Add(forest.AnomalyScore(sample));
});
// 2. 增量更新(在线学习)
forest.AddTrainingData(newSamples);
// 3. 特征选择(降维)
var selectedFeatures = FeatureSelector.SelectTopK(samples, 10);
参考代码 孤立森林 基于Ensemble的快速异常检测方法 www.youwenfan.com/contentcsu/63115.html
四、部署与集成
4.1 与现有系统集成
csharp
// 1. 在STM32数据采集系统中集成
public class SensorMonitor
{
private IsolationForest _forest;
public void ProcessSensorData(float[] readings)
{
double[] features = ConvertToFeatures(readings);
double score = _forest.AnomalyScore(features);
if (score > 0.7)
{
TriggerAlarm("传感器异常", score);
LogAnomaly(readings, score);
}
}
}
4.2 Docker 部署
dockerfile
FROM mcr.microsoft.com/dotnet/runtime:6.0
WORKDIR /app
COPY bin/Release/net6.0/publish .
ENTRYPOINT ["dotnet", "IsolationForest.dll"]