机器学习评估指标详解 - 高级篇

本文是机器学习评估指标系列的第三篇，聚焦于后处理、模型量化、模型部署和全流程监控指标，帮助您在实际项目中全面评估和优化机器学习系统。

引言

在前两篇中，我们学习了数据质量、前处理、训练过程和模型验证指标。在高级篇中，我们将深入探讨：

📊 图表生成说明 ：本文档中的所有图表都可以通过运行 docs/ml_metrics_visualization_complete.py 脚本生成。每个指标部分都包含了相应的图表说明和代码引用。

后处理优化：如何优化模型输出以提高性能
模型量化：如何压缩模型以加速推理
模型部署：如何评估生产环境中的模型性能
全流程监控：如何监控整个机器学习系统的健康状态

这些指标帮助我们：

优化模型后处理流程
平衡模型精度和推理速度
确保模型在生产环境中的稳定性
持续改进模型性能

后处理指标

1. 非极大值抑制 (NMS) 指标

NMS效果评估

定义：评估NMS对检测结果的影响

关键指标：

检测数量变化：NMS前后检测框数量
mAP变化：NMS对mAP的影响
FP/TP变化：误检和正确检测的变化

Python实现：

python 复制代码

import numpy as np
from typing import List, Dict

def evaluate_nms_effect(detections: List[Dict], ground_truth: List[Dict], 
                       iou_thresholds=[0.3, 0.5, 0.7], score_threshold=0.5):
    """
    评估NMS效果
    
    Args:
        detections: 检测结果列表
        ground_truth: 真实标注列表
        iou_thresholds: NMS的IoU阈值列表
        score_threshold: 置信度阈值
    
    Returns:
        dict: NMS效果评估结果
    """
    results = {}
    
    # 原始检测结果
    original_count = len(detections)
    original_metrics = calculate_detection_metrics(detections, ground_truth)
    
    results['original'] = {
        'detection_count': original_count,
        'metrics': original_metrics
    }
    
    # 不同IoU阈值下的NMS效果
    for iou_thresh in iou_thresholds:
        nms_detections = apply_nms(detections, iou_threshold=iou_thresh, 
                                   score_threshold=score_threshold)
        nms_count = len(nms_detections)
        nms_metrics = calculate_detection_metrics(nms_detections, ground_truth)
        
        results[f'nms_iou_{iou_thresh}'] = {
            'detection_count': nms_count,
            'reduction_rate': (original_count - nms_count) / original_count * 100,
            'metrics': nms_metrics,
            'map_change': nms_metrics['mAP'] - original_metrics['mAP']
        }
    
    return results

def apply_nms(detections: List[Dict], iou_threshold=0.5, score_threshold=0.5):
    """
    应用NMS
    """
    # 按置信度排序
    detections_sorted = sorted(detections, key=lambda x: x['confidence'], reverse=True)
    
    kept = []
    while detections_sorted:
        # 选择置信度最高的
        current = detections_sorted.pop(0)
        if current['confidence'] < score_threshold:
            continue
        
        kept.append(current)
        
        # 移除与当前框IoU > 阈值的框
        detections_sorted = [
            det for det in detections_sorted
            if calculate_iou(current['bbox'], det['bbox']) < iou_threshold
        ]
    
    return kept

可视化图表：

生成图表代码：

python 复制代码

from docs.ml_metrics_visualization_complete import plot_nms_comparison
plot_nms_comparison()

Soft-NMS效果评估

定义：评估Soft-NMS相比标准NMS的改进

Python实现：

python 复制代码

def apply_soft_nms(detections: List[Dict], iou_threshold=0.5, 
                   score_threshold=0.5, sigma=0.5, method='linear'):
    """
    应用Soft-NMS
    
    Args:
        method: 'linear' 或 'gaussian'
    """
    detections_sorted = sorted(detections, key=lambda x: x['confidence'], reverse=True)
    
    kept = []
    while detections_sorted:
        current = detections_sorted.pop(0)
        if current['confidence'] < score_threshold:
            continue
        
        kept.append(current)
        
        # 降低重叠框的置信度
        for det in detections_sorted:
            iou = calculate_iou(current['bbox'], det['bbox'])
            if iou > iou_threshold:
                if method == 'linear':
                    det['confidence'] *= (1 - iou)
                elif method == 'gaussian':
                    det['confidence'] *= np.exp(-(iou ** 2) / sigma)
        
        # 重新排序
        detections_sorted = sorted(detections_sorted, 
                                  key=lambda x: x['confidence'], reverse=True)
    
    return kept

def compare_nms_methods(detections: List[Dict], ground_truth: List[Dict]):
    """
    比较标准NMS和Soft-NMS
    """
    # 标准NMS
    standard_nms = apply_nms(detections, iou_threshold=0.5)
    standard_metrics = calculate_detection_metrics(standard_nms, ground_truth)
    
    # Soft-NMS (Linear)
    soft_nms_linear = apply_soft_nms(detections, method='linear')
    soft_linear_metrics = calculate_detection_metrics(soft_nms_linear, ground_truth)
    
    # Soft-NMS (Gaussian)
    soft_nms_gaussian = apply_soft_nms(detections, method='gaussian')
    soft_gaussian_metrics = calculate_detection_metrics(soft_nms_gaussian, ground_truth)
    
    return {
        'standard_nms': {
            'count': len(standard_nms),
            'metrics': standard_metrics
        },
        'soft_nms_linear': {
            'count': len(soft_nms_linear),
            'metrics': soft_linear_metrics
        },
        'soft_nms_gaussian': {
            'count': len(soft_nms_gaussian),
            'metrics': soft_gaussian_metrics
        }
    }

2. 置信度阈值优化

最优阈值搜索

定义：找到使F1分数最大的置信度阈值

Python实现：

python 复制代码

def find_optimal_threshold(detections: List[Dict], ground_truth: List[Dict], 
                          metric='f1', iou_threshold=0.5):
    """
    寻找最优置信度阈值
    
    Args:
        metric: 'f1', 'precision', 'recall', 'mAP'
    """
    thresholds = np.arange(0.1, 1.0, 0.05)
    results = []
    
    for threshold in thresholds:
        filtered_detections = [d for d in detections if d['confidence'] >= threshold]
        metrics = calculate_detection_metrics(filtered_detections, ground_truth, 
                                            iou_threshold=iou_threshold)
        
        results.append({
            'threshold': threshold,
            'metric_value': metrics[metric],
            'detection_count': len(filtered_detections),
            'precision': metrics['precision'],
            'recall': metrics['recall']
        })
    
    # 找到最优阈值
    best_result = max(results, key=lambda x: x['metric_value'])
    
    return {
        'optimal_threshold': best_result['threshold'],
        'optimal_metric_value': best_result['metric_value'],
        'all_results': results,
        'best_result': best_result
    }

def plot_threshold_analysis(threshold_results):
    """
    绘制阈值分析图
    """
    thresholds = [r['threshold'] for r in threshold_results['all_results']]
    f1_scores = [r['metric_value'] for r in threshold_results['all_results']]
    precisions = [r['precision'] for r in threshold_results['all_results']]
    recalls = [r['recall'] for r in threshold_results['all_results']]
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(thresholds, f1_scores, 'o-', label='F1 Score', color='blue')
    plt.plot(thresholds, precisions, 'o-', label='Precision', color='green')
    plt.plot(thresholds, recalls, 'o-', label='Recall', color='red')
    plt.axvline(x=threshold_results['optimal_threshold'], 
                color='black', linestyle='--', label='Optimal Threshold')
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Score')
    plt.title('Threshold Analysis')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    detection_counts = [r['detection_count'] for r in threshold_results['all_results']]
    plt.plot(thresholds, detection_counts, 'o-', color='purple')
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Detection Count')
    plt.title('Detection Count vs Threshold')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

可视化图表：

生成图表代码：

python 复制代码

from docs.ml_metrics_visualization_complete import plot_threshold_optimization
plot_threshold_optimization()

类别特定阈值优化

Python实现：

python 复制代码

def optimize_class_specific_thresholds(detections: List[Dict], ground_truth: List[Dict]):
    """
    为每个类别优化置信度阈值
    """
    classes = set([d['class'] for d in detections])
    optimal_thresholds = {}
    
    for cls in classes:
        class_detections = [d for d in detections if d['class'] == cls]
        class_ground_truth = [gt for gt in ground_truth if gt['class'] == cls]
        
        threshold_result = find_optimal_threshold(class_detections, class_ground_truth)
        optimal_thresholds[cls] = threshold_result['optimal_threshold']
    
    return optimal_thresholds

3. 后处理性能指标

后处理时间

Python实现：

python 复制代码

import time

def measure_postprocessing_time(detections: List[Dict], nms_threshold=0.5, 
                               score_threshold=0.5, num_runs=100):
    """
    测量后处理时间
    """
    times = []
    
    for _ in range(num_runs):
        start_time = time.time()
        processed = apply_nms(detections, iou_threshold=nms_threshold, 
                             score_threshold=score_threshold)
        end_time = time.time()
        times.append(end_time - start_time)
    
    return {
        'avg_time_ms': np.mean(times) * 1000,
        'std_time_ms': np.std(times) * 1000,
        'min_time_ms': np.min(times) * 1000,
        'max_time_ms': np.max(times) * 1000,
        'throughput': len(detections) / np.mean(times)  # detections per second
    }

模型量化指标

1. 量化精度指标

量化精度损失

定义：量化前后模型精度的变化

Python实现：

python 复制代码

import torch
import torch.quantization as quantization

def evaluate_quantization_accuracy(model_fp32, model_int8, test_loader, device='cuda'):
    """
    评估量化精度损失
    
    Args:
        model_fp32: FP32模型
        model_int8: INT8量化模型
        test_loader: 测试数据加载器
    """
    # FP32模型精度
    fp32_accuracy = evaluate_model(model_fp32, test_loader, device)
    
    # INT8模型精度
    int8_accuracy = evaluate_model(model_int8, test_loader, device)
    
    # 精度损失
    accuracy_drop = fp32_accuracy - int8_accuracy
    relative_drop = (accuracy_drop / fp32_accuracy) * 100 if fp32_accuracy > 0 else 0
    
    return {
        'fp32_accuracy': fp32_accuracy,
        'int8_accuracy': int8_accuracy,
        'accuracy_drop': accuracy_drop,
        'relative_drop_percent': relative_drop,
        'retention_rate': (int8_accuracy / fp32_accuracy) * 100 if fp32_accuracy > 0 else 0
    }

def evaluate_model(model, test_loader, device='cuda'):
    """
    评估模型精度
    """
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    
    return correct / total

逐层精度分析

Python实现：

python 复制代码

def analyze_layerwise_quantization_error(model_fp32, model_int8, test_loader, device='cuda'):
    """
    分析逐层量化误差
    """
    layer_errors = {}
    
    model_fp32.eval()
    model_int8.eval()
    
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            
            # FP32前向传播（记录中间层输出）
            fp32_outputs = {}
            x = inputs
            for name, module in model_fp32.named_modules():
                if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
                    x = module(x)
                    fp32_outputs[name] = x.clone()
            
            # INT8前向传播（记录中间层输出）
            int8_outputs = {}
            x = inputs
            for name, module in model_int8.named_modules():
                if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
                    x = module(x)
                    int8_outputs[name] = x.clone()
            
            # 计算每层误差
            for name in fp32_outputs:
                if name in int8_outputs:
                    fp32_out = fp32_outputs[name]
                    int8_out = int8_outputs[name]
                    
                    mse = torch.mean((fp32_out - int8_out) ** 2).item()
                    mae = torch.mean(torch.abs(fp32_out - int8_out)).item()
                    
                    if name not in layer_errors:
                        layer_errors[name] = {'mse': [], 'mae': []}
                    
                    layer_errors[name]['mse'].append(mse)
                    layer_errors[name]['mae'].append(mae)
    
    # 计算平均误差
    layer_avg_errors = {}
    for name, errors in layer_errors.items():
        layer_avg_errors[name] = {
            'avg_mse': np.mean(errors['mse']),
            'avg_mae': np.mean(errors['mae'])
        }
    
    return layer_avg_errors

可视化图表：

生成图表代码：

python 复制代码

from docs.ml_metrics_visualization_complete import plot_quantization_detailed
plot_quantization_detailed()

2. 量化压缩指标

模型大小压缩比

Python实现：

python 复制代码

def calculate_quantization_compression(model_fp32, model_int8):
    """
    计算量化压缩比
    """
    # FP32模型大小
    fp32_size = calculate_model_size(model_fp32)
    
    # INT8模型大小
    int8_size = calculate_model_size(model_int8)
    
    compression_ratio = fp32_size / int8_size
    
    return {
        'fp32_size_mb': fp32_size,
        'int8_size_mb': int8_size,
        'compression_ratio': compression_ratio,
        'size_reduction_percent': (1 - int8_size / fp32_size) * 100
    }

def calculate_model_size(model):
    """
    计算模型大小（MB）
    """
    import os
    import tempfile
    
    with tempfile.NamedTemporaryFile(delete=False) as f:
        torch.save(model.state_dict(), f.name)
        size_mb = os.path.getsize(f.name) / (1024 * 1024)
        os.unlink(f.name)
    
    return size_mb

内存占用压缩

Python实现：

python 复制代码

def calculate_memory_compression(model_fp32, model_int8, input_size, batch_size=1):
    """
    计算内存压缩比
    """
    # FP32内存占用
    fp32_memory = estimate_memory_usage(model_fp32, input_size, batch_size)
    
    # INT8内存占用（参数内存减少4倍）
    int8_memory = estimate_memory_usage(model_int8, input_size, batch_size)
    
    memory_compression = fp32_memory['total_memory_mb'] / int8_memory['total_memory_mb']
    
    return {
        'fp32_memory_mb': fp32_memory['total_memory_mb'],
        'int8_memory_mb': int8_memory['total_memory_mb'],
        'memory_compression_ratio': memory_compression,
        'memory_reduction_percent': (1 - int8_memory['total_memory_mb'] / fp32_memory['total_memory_mb']) * 100
    }

3. 量化加速指标

推理速度提升

Python实现：

python 复制代码

def measure_quantization_speedup(model_fp32, model_int8, input_size, 
                                 num_runs=100, device='cuda'):
    """
    测量量化加速比
    """
    # FP32推理时间
    fp32_time = measure_inference_time(model_fp32, input_size, num_runs, device)
    
    # INT8推理时间
    int8_time = measure_inference_time(model_int8, input_size, num_runs, device)
    
    speedup = fp32_time['avg_inference_time_ms'] / int8_time['avg_inference_time_ms']
    
    return {
        'fp32_time_ms': fp32_time['avg_inference_time_ms'],
        'int8_time_ms': int8_time['avg_inference_time_ms'],
        'speedup': speedup,
        'time_reduction_percent': (1 - int8_time['avg_inference_time_ms'] / fp32_time['avg_inference_time_ms']) * 100,
        'fp32_fps': fp32_time['fps'],
        'int8_fps': int8_time['fps']
    }

吞吐量提升

Python实现：

python 复制代码

def measure_quantization_throughput_improvement(model_fp32, model_int8, 
                                               input_size, batch_sizes=[1, 4, 8, 16, 32],
                                               device='cuda'):
    """
    测量量化吞吐量提升
    """
    results = {}
    
    for batch_size in batch_sizes:
        batch_input_size = (batch_size,) + input_size[1:]
        
        fp32_throughput = measure_throughput(model_fp32, batch_input_size, 
                                            [batch_size], device)[batch_size]
        int8_throughput = measure_throughput(model_int8, batch_input_size, 
                                            [batch_size], device)[batch_size]
        
        speedup = int8_throughput['samples_per_second'] / fp32_throughput['samples_per_second']
        
        results[batch_size] = {
            'fp32_samples_per_sec': fp32_throughput['samples_per_second'],
            'int8_samples_per_sec': int8_throughput['samples_per_second'],
            'speedup': speedup
        }
    
    return results

4. 量化方法对比

静态量化 vs 动态量化

Python实现：

python 复制代码

def compare_quantization_methods(model, test_loader, calibration_loader, device='cuda'):
    """
    比较静态量化和动态量化
    """
    # 静态量化
    model_static = quantization.quantize(model, calibration_loader, device)
    static_metrics = evaluate_quantization_metrics(model, model_static, test_loader, device)
    
    # 动态量化
    model_dynamic = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
    dynamic_metrics = evaluate_quantization_metrics(model, model_dynamic, test_loader, device)
    
    return {
        'static_quantization': static_metrics,
        'dynamic_quantization': dynamic_metrics,
        'comparison': {
            'accuracy_drop': {
                'static': static_metrics['accuracy_drop'],
                'dynamic': dynamic_metrics['accuracy_drop']
            },
            'speedup': {
                'static': static_metrics['speedup'],
                'dynamic': dynamic_metrics['speedup']
            }
        }
    }

模型压缩指标

1. 模型剪枝指标

剪枝率

定义：被剪枝的参数占总参数的比例

Python实现：

python 复制代码

def calculate_pruning_ratio(model_pruned):
    """
    计算剪枝率
    """
    total_params = 0
    pruned_params = 0
    
    for name, param in model_pruned.named_parameters():
        total_params += param.numel()
        pruned_params += (param == 0).sum().item()
    
    pruning_ratio = pruned_params / total_params if total_params > 0 else 0
    
    return {
        'total_params': total_params,
        'pruned_params': pruned_params,
        'remaining_params': total_params - pruned_params,
        'pruning_ratio': pruning_ratio,
        'pruning_ratio_percent': pruning_ratio * 100
    }

剪枝精度损失

Python实现：

python 复制代码

def evaluate_pruning_impact(model_original, model_pruned, test_loader, device='cuda'):
    """
    评估剪枝影响
    """
    # 原始模型精度
    original_accuracy = evaluate_model(model_original, test_loader, device)
    
    # 剪枝后模型精度
    pruned_accuracy = evaluate_model(model_pruned, test_loader, device)
    
    # 剪枝率
    pruning_stats = calculate_pruning_ratio(model_pruned)
    
    return {
        'original_accuracy': original_accuracy,
        'pruned_accuracy': pruned_accuracy,
        'accuracy_drop': original_accuracy - pruned_accuracy,
        'pruning_stats': pruning_stats
    }

2. 知识蒸馏指标

教师-学生模型性能对比

Python实现：

python 复制代码

def compare_teacher_student_models(teacher_model, student_model, test_loader, device='cuda'):
    """
    比较教师模型和学生模型
    """
    teacher_accuracy = evaluate_model(teacher_model, test_loader, device)
    student_accuracy = evaluate_model(student_model, test_loader, device)
    
    teacher_size = calculate_model_size(teacher_model)
    student_size = calculate_model_size(student_model)
    
    teacher_time = measure_inference_time(teacher_model, (1, 3, 224, 224), device=device)
    student_time = measure_inference_time(student_model, (1, 3, 224, 224), device=device)
    
    return {
        'teacher': {
            'accuracy': teacher_accuracy,
            'size_mb': teacher_size,
            'inference_time_ms': teacher_time['avg_inference_time_ms']
        },
        'student': {
            'accuracy': student_accuracy,
            'size_mb': student_size,
            'inference_time_ms': student_time['avg_inference_time_ms']
        },
        'compression': {
            'size_ratio': teacher_size / student_size,
            'speedup': teacher_time['avg_inference_time_ms'] / student_time['avg_inference_time_ms'],
            'accuracy_retention': student_accuracy / teacher_accuracy
        }
    }

模型部署指标

1. 延迟指标 (Latency)

端到端延迟

定义：从输入到输出完成的总时间

Python实现：

python 复制代码

def measure_end_to_end_latency(model, preprocess_fn, postprocess_fn, 
                               input_data, num_runs=100):
    """
    测量端到端延迟
    """
    latencies = []
    
    for _ in range(num_runs):
        start_time = time.time()
        
        # 预处理
        processed_input = preprocess_fn(input_data)
        
        # 推理
        output = model(processed_input)
        
        # 后处理
        final_output = postprocess_fn(output)
        
        end_time = time.time()
        latencies.append(end_time - start_time)
    
    return {
        'avg_latency_ms': np.mean(latencies) * 1000,
        'p50_latency_ms': np.percentile(latencies, 50) * 1000,
        'p95_latency_ms': np.percentile(latencies, 95) * 1000,
        'p99_latency_ms': np.percentile(latencies, 99) * 1000,
        'min_latency_ms': np.min(latencies) * 1000,
        'max_latency_ms': np.max(latencies) * 1000
    }

延迟分解分析

Python实现：

python 复制代码

def analyze_latency_breakdown(model, preprocess_fn, postprocess_fn, input_data, num_runs=100):
    """
    分析延迟分解
    """
    preprocess_times = []
    inference_times = []
    postprocess_times = []
    
    for _ in range(num_runs):
        # 预处理时间
        start = time.time()
        processed_input = preprocess_fn(input_data)
        preprocess_times.append(time.time() - start)
        
        # 推理时间
        start = time.time()
        output = model(processed_input)
        inference_times.append(time.time() - start)
        
        # 后处理时间
        start = time.time()
        final_output = postprocess_fn(output)
        postprocess_times.append(time.time() - start)
    
    return {
        'preprocess': {
            'avg_ms': np.mean(preprocess_times) * 1000,
            'p95_ms': np.percentile(preprocess_times, 95) * 1000
        },
        'inference': {
            'avg_ms': np.mean(inference_times) * 1000,
            'p95_ms': np.percentile(inference_times, 95) * 1000
        },
        'postprocess': {
            'avg_ms': np.mean(postprocess_times) * 1000,
            'p95_ms': np.percentile(postprocess_times, 95) * 1000
        },
        'total': {
            'avg_ms': (np.mean(preprocess_times) + np.mean(inference_times) + 
                      np.mean(postprocess_times)) * 1000
        }
    }

2. 吞吐量指标 (Throughput)

QPS (Queries Per Second)

Python实现：

python 复制代码

def measure_qps(model, input_generator, duration_seconds=60):
    """
    测量QPS
    """
    start_time = time.time()
    query_count = 0
    
    while time.time() - start_time < duration_seconds:
        input_data = next(input_generator)
        _ = model(input_data)
        query_count += 1
    
    elapsed_time = time.time() - start_time
    qps = query_count / elapsed_time
    
    return {
        'qps': qps,
        'total_queries': query_count,
        'duration_seconds': elapsed_time
    }

并发吞吐量

Python实现：

python 复制代码

import concurrent.futures
import threading

def measure_concurrent_throughput(model, input_generator, num_threads=4, duration_seconds=60):
    """
    测量并发吞吐量
    """
    query_count = [0]  # 使用列表以便在线程间共享
    lock = threading.Lock()
    
    def worker():
        local_count = 0
        start_time = time.time()
        while time.time() - start_time < duration_seconds:
            input_data = next(input_generator)
            _ = model(input_data)
            local_count += 1
        
        with lock:
            query_count[0] += local_count
    
    threads = []
    start_time = time.time()
    for _ in range(num_threads):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)
    
    for t in threads:
        t.join()
    
    elapsed_time = time.time() - start_time
    total_qps = query_count[0] / elapsed_time
    
    return {
        'qps': total_qps,
        'qps_per_thread': total_qps / num_threads,
        'total_queries': query_count[0],
        'num_threads': num_threads,
        'duration_seconds': elapsed_time
    }

3. 资源占用指标

CPU使用率

Python实现：

python 复制代码

import psutil

def monitor_cpu_usage(model, input_generator, duration_seconds=60):
    """
    监控CPU使用率
    """
    cpu_percentages = []
    
    def monitor():
        while True:
            cpu_percentages.append(psutil.cpu_percent(interval=1))
            if len(cpu_percentages) * 1 >= duration_seconds:
                break
    
    monitor_thread = threading.Thread(target=monitor)
    monitor_thread.start()
    
    # 运行模型
    start_time = time.time()
    query_count = 0
    while time.time() - start_time < duration_seconds:
        input_data = next(input_generator)
        _ = model(input_data)
        query_count += 1
    
    monitor_thread.join()
    
    return {
        'avg_cpu_percent': np.mean(cpu_percentages),
        'max_cpu_percent': np.max(cpu_percentages),
        'min_cpu_percent': np.min(cpu_percentages),
        'cpu_samples': cpu_percentages
    }

内存占用

Python实现：

python 复制代码

def monitor_memory_usage(model, input_generator, duration_seconds=60):
    """
    监控内存占用
    """
    process = psutil.Process()
    memory_samples = []
    
    start_time = time.time()
    while time.time() - start_time < duration_seconds:
        input_data = next(input_generator)
        _ = model(input_data)
        memory_samples.append(process.memory_info().rss / 1024 / 1024)  # MB
    
    return {
        'avg_memory_mb': np.mean(memory_samples),
        'max_memory_mb': np.max(memory_samples),
        'min_memory_mb': np.min(memory_samples),
        'memory_samples': memory_samples
    }

GPU使用率

Python实现：

python 复制代码

try:
    import pynvml
    
    def monitor_gpu_usage(duration_seconds=60):
        """
        监控GPU使用率
        """
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
        
        gpu_utilizations = []
        gpu_memories = []
        
        start_time = time.time()
        while time.time() - start_time < duration_seconds:
            util = pynvml.nvmlDeviceGetUtilizationRates(handle)
            mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            
            gpu_utilizations.append(util.gpu)
            gpu_memories.append(mem_info.used / 1024 / 1024)  # MB
            
            time.sleep(1)
        
        return {
            'avg_gpu_util_percent': np.mean(gpu_utilizations),
            'max_gpu_util_percent': np.max(gpu_utilizations),
            'avg_gpu_memory_mb': np.mean(gpu_memories),
            'max_gpu_memory_mb': np.max(gpu_memories)
        }
except ImportError:
    print("请安装pynvml: pip install nvidia-ml-py3")

可视化图表：

生成图表代码：

python 复制代码

from docs.ml_metrics_visualization_complete import plot_deployment_performance
plot_deployment_performance()

4. 可用性指标

服务可用性 (Availability)

定义：服务正常运行时间占总时间的比例

计算公式：

复制代码

可用性 = (总时间 - 故障时间) / 总时间 × 100%

Python实现：

python 复制代码

class ServiceAvailabilityMonitor:
    """
    服务可用性监控
    """
    def __init__(self):
        self.start_time = time.time()
        self.downtime = 0
        self.last_failure_time = None
    
    def record_failure(self):
        """
        记录故障
        """
        if self.last_failure_time is None:
            self.last_failure_time = time.time()
    
    def record_recovery(self):
        """
        记录恢复
        """
        if self.last_failure_time is not None:
            self.downtime += time.time() - self.last_failure_time
            self.last_failure_time = None
    
    def get_availability(self):
        """
        计算可用性
        """
        total_time = time.time() - self.start_time
        if self.last_failure_time is not None:
            current_downtime = self.downtime + (time.time() - self.last_failure_time)
        else:
            current_downtime = self.downtime
        
        availability = (total_time - current_downtime) / total_time * 100
        return {
            'availability_percent': availability,
            'uptime_seconds': total_time - current_downtime,
            'downtime_seconds': current_downtime
        }

错误率 (Error Rate)

Python实现：

python 复制代码

class ErrorRateMonitor:
    """
    错误率监控
    """
    def __init__(self):
        self.total_requests = 0
        self.error_requests = 0
        self.error_types = {}
    
    def record_request(self, success=True, error_type=None):
        """
        记录请求
        """
        self.total_requests += 1
        if not success:
            self.error_requests += 1
            if error_type:
                self.error_types[error_type] = self.error_types.get(error_type, 0) + 1
    
    def get_error_rate(self):
        """
        计算错误率
        """
        error_rate = (self.error_requests / self.total_requests * 100) if self.total_requests > 0 else 0
        
        return {
            'error_rate_percent': error_rate,
            'total_requests': self.total_requests,
            'error_requests': self.error_requests,
            'success_rate_percent': 100 - error_rate,
            'error_types': self.error_types
        }

模型监控指标

1. 数据漂移指标 (Data Drift)

特征分布变化

Python实现：

python 复制代码

from scipy import stats

def detect_feature_drift(reference_data, current_data, feature_name):
    """
    检测特征漂移
    """
    # Kolmogorov-Smirnov测试
    ks_statistic, ks_pvalue = stats.ks_2samp(reference_data[feature_name], 
                                            current_data[feature_name])
    
    # 均值变化
    mean_reference = np.mean(reference_data[feature_name])
    mean_current = np.mean(current_data[feature_name])
    mean_change = abs(mean_current - mean_reference) / mean_reference * 100 if mean_reference != 0 else 0
    
    # 标准差变化
    std_reference = np.std(reference_data[feature_name])
    std_current = np.std(current_data[feature_name])
    std_change = abs(std_current - std_reference) / std_reference * 100 if std_reference != 0 else 0
    
    return {
        'feature_name': feature_name,
        'ks_statistic': ks_statistic,
        'ks_pvalue': ks_pvalue,
        'drift_detected': ks_pvalue < 0.05,
        'mean_change_percent': mean_change,
        'std_change_percent': std_change
    }

可视化图表：

生成图表代码：

python 复制代码

from docs.ml_metrics_visualization_complete import plot_data_drift_detection
plot_data_drift_detection()

类别分布变化

Python实现：

python 复制代码

from scipy.stats import chi2_contingency

def detect_label_drift(reference_labels, current_labels):
    """
    检测标签分布漂移
    """
    # 构建列联表
    ref_counts = pd.Series(reference_labels).value_counts()
    curr_counts = pd.Series(current_labels).value_counts()
    
    all_labels = set(ref_counts.index) | set(curr_counts.index)
    contingency_table = []
    for label in all_labels:
        contingency_table.append([
            ref_counts.get(label, 0),
            curr_counts.get(label, 0)
        ])
    
    contingency_table = np.array(contingency_table)
    
    # 卡方检验
    chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table)
    
    return {
        'chi2_statistic': chi2_stat,
        'p_value': p_value,
        'drift_detected': p_value < 0.05,
        'reference_distribution': ref_counts.to_dict(),
        'current_distribution': curr_counts.to_dict()
    }

2. 模型性能监控

预测分布监控

Python实现：

python 复制代码

def monitor_prediction_distribution(predictions, reference_distribution=None):
    """
    监控预测分布
    """
    if reference_distribution is None:
        # 使用当前分布作为参考
        reference_distribution = predictions
    
    # 计算KL散度
    from scipy.stats import entropy
    
    # 将预测转换为分布
    pred_dist = np.histogram(predictions, bins=50, density=True)[0]
    ref_dist = np.histogram(reference_distribution, bins=50, density=True)[0]
    
    # 避免零值
    pred_dist = pred_dist + 1e-10
    ref_dist = ref_dist + 1e-10
    
    kl_divergence = entropy(pred_dist, ref_dist)
    
    return {
        'kl_divergence': kl_divergence,
        'distribution_shift': kl_divergence > 0.1,  # 阈值可调
        'prediction_mean': np.mean(predictions),
        'prediction_std': np.std(predictions)
    }

性能下降检测

Python实现：

python 复制代码

class PerformanceMonitor:
    """
    性能监控器
    """
    def __init__(self, baseline_metric, threshold=0.05):
        self.baseline_metric = baseline_metric
        self.threshold = threshold
        self.metrics_history = []
    
    def update(self, current_metric):
        """
        更新当前指标
        """
        self.metrics_history.append(current_metric)
        
        performance_drop = (self.baseline_metric - current_metric) / self.baseline_metric
        
        return {
            'current_metric': current_metric,
            'baseline_metric': self.baseline_metric,
            'performance_drop': performance_drop,
            'performance_drop_percent': performance_drop * 100,
            'alert': performance_drop > self.threshold
        }

A/B测试指标

1. 统计显著性检验

T检验

Python实现：

python 复制代码

from scipy import stats

def ab_test_significance(group_a_metrics, group_b_metrics, alpha=0.05):
    """
    A/B测试统计显著性检验
    """
    # T检验
    t_statistic, p_value = stats.ttest_ind(group_a_metrics, group_b_metrics)
    
    # 计算效应量（Cohen's d）
    mean_a = np.mean(group_a_metrics)
    mean_b = np.mean(group_b_metrics)
    std_a = np.std(group_a_metrics)
    std_b = np.std(group_b_metrics)
    pooled_std = np.sqrt((std_a**2 + std_b**2) / 2)
    cohens_d = (mean_b - mean_a) / pooled_std
    
    return {
        't_statistic': t_statistic,
        'p_value': p_value,
        'significant': p_value < alpha,
        'mean_a': mean_a,
        'mean_b': mean_b,
        'improvement_percent': ((mean_b - mean_a) / mean_a) * 100 if mean_a != 0 else 0,
        'cohens_d': cohens_d,
        'effect_size': 'small' if abs(cohens_d) < 0.2 else 
                      'medium' if abs(cohens_d) < 0.5 else 'large'
    }

2. 置信区间

Python实现：

python 复制代码

def calculate_confidence_interval(metrics, confidence=0.95):
    """
    计算置信区间
    """
    mean = np.mean(metrics)
    std = np.std(metrics)
    n = len(metrics)
    
    # 使用t分布
    from scipy.stats import t
    t_critical = t.ppf((1 + confidence) / 2, n - 1)
    margin_error = t_critical * (std / np.sqrt(n))
    
    return {
        'mean': mean,
        'lower_bound': mean - margin_error,
        'upper_bound': mean + margin_error,
        'confidence': confidence,
        'margin_error': margin_error
    }

全流程性能指标

1. 端到端性能指标

全流程延迟

Python实现：

python 复制代码

def measure_full_pipeline_performance(data_loader, preprocess_fn, model, 
                                      postprocess_fn, num_samples=1000):
    """
    测量全流程性能
    """
    latencies = []
    accuracies = []
    
    for i, (input_data, ground_truth) in enumerate(data_loader):
        if i >= num_samples:
            break
        
        start_time = time.time()
        
        # 预处理
        processed_input = preprocess_fn(input_data)
        
        # 推理
        output = model(processed_input)
        
        # 后处理
        final_output = postprocess_fn(output)
        
        latency = time.time() - start_time
        latencies.append(latency)
        
        # 计算准确率
        accuracy = calculate_accuracy(final_output, ground_truth)
        accuracies.append(accuracy)
    
    return {
        'avg_latency_ms': np.mean(latencies) * 1000,
        'p95_latency_ms': np.percentile(latencies, 95) * 1000,
        'avg_accuracy': np.mean(accuracies),
        'throughput_qps': num_samples / np.sum(latencies)
    }

2. 系统资源利用率

Python实现：

python 复制代码

def measure_system_resource_utilization(duration_seconds=300):
    """
    测量系统资源利用率
    """
    cpu_samples = []
    memory_samples = []
    disk_io_samples = []
    network_io_samples = []
    
    start_time = time.time()
    while time.time() - start_time < duration_seconds:
        # CPU
        cpu_samples.append(psutil.cpu_percent(interval=1))
        
        # 内存
        memory = psutil.virtual_memory()
        memory_samples.append(memory.percent)
        
        # 磁盘IO
        disk_io = psutil.disk_io_counters()
        if disk_io:
            disk_io_samples.append({
                'read_mb': disk_io.read_bytes / 1024 / 1024,
                'write_mb': disk_io.write_bytes / 1024 / 1024
            })
        
        # 网络IO
        net_io = psutil.net_io_counters()
        if net_io:
            network_io_samples.append({
                'sent_mb': net_io.bytes_sent / 1024 / 1024,
                'recv_mb': net_io.bytes_recv / 1024 / 1024
            })
    
    return {
        'cpu': {
            'avg_percent': np.mean(cpu_samples),
            'max_percent': np.max(cpu_samples)
        },
        'memory': {
            'avg_percent': np.mean(memory_samples),
            'max_percent': np.max(memory_samples)
        },
        'disk_io': disk_io_samples,
        'network_io': network_io_samples
    }

模型解释性指标

1. SHAP值分析

Python实现：

python 复制代码

try:
    import shap
    
    def analyze_feature_importance_shap(model, X, sample_size=100):
        """
        使用SHAP分析特征重要性
        """
        # 采样
        X_sample = X[:sample_size] if len(X) > sample_size else X
        
        # 创建SHAP解释器
        explainer = shap.TreeExplainer(model)  # 或 shap.DeepExplainer for neural networks
        shap_values = explainer.shap_values(X_sample)
        
        # 计算特征重要性
        feature_importance = np.abs(shap_values).mean(axis=0)
        
        return {
            'shap_values': shap_values,
            'feature_importance': feature_importance,
            'feature_ranking': np.argsort(feature_importance)[::-1]
        }
except ImportError:
    print("请安装shap: pip install shap")

2. 特征重要性一致性

Python实现：

python 复制代码

def measure_feature_importance_consistency(model, X, y, num_runs=10):
    """
    测量特征重要性的一致性
    """
    importances_list = []
    
    for _ in range(num_runs):
        # 训练模型（使用不同的随机种子）
        model_copy = clone_model(model)
        model_copy.fit(X, y)
        
        # 获取特征重要性
        if hasattr(model_copy, 'feature_importances_'):
            importances_list.append(model_copy.feature_importances_)
    
    importances_array = np.array(importances_list)
    
    # 计算一致性（使用标准差）
    consistency = 1 - (np.std(importances_array, axis=0) / (np.mean(importances_array, axis=0) + 1e-10))
    
    return {
        'mean_importance': np.mean(importances_array, axis=0),
        'std_importance': np.std(importances_array, axis=0),
        'consistency': consistency,
        'avg_consistency': np.mean(consistency)
    }

实际应用场景分析

1. 实时推理场景

关键指标：

延迟：P95延迟 < 100ms
吞吐量：QPS > 1000
准确率：> 95%

优化策略：

模型量化
批处理优化
缓存机制

2. 批量处理场景

关键指标：

吞吐量：处理速度最大化
资源利用率：CPU/GPU利用率 > 80%
成本：每样本处理成本

优化策略：

批处理大小优化
并行处理
资源调度优化

3. 边缘设备部署

关键指标：

模型大小：< 10MB
内存占用：< 100MB
功耗：< 2W

优化策略：

模型剪枝
知识蒸馏
量化到INT8或更低精度

总结与最佳实践

核心要点回顾

后处理指标
- NMS效果评估
- 置信度阈值优化
- 后处理性能
模型量化指标
- 精度损失
- 压缩比
- 加速比
模型部署指标
- 延迟
- 吞吐量
- 资源占用
模型监控指标
- 数据漂移
- 性能下降
- 错误率

最佳实践清单

后处理优化
- 优化NMS参数
- 寻找最优置信度阈值
- 考虑类别特定阈值
模型量化
- 评估精度损失（< 1%）
- 测量压缩比和加速比
- 选择适合的量化方法
模型部署
- 监控延迟（P95）
- 测量吞吐量（QPS）
- 监控资源占用
持续监控
- 设置数据漂移告警
- 监控性能下降
- 定期A/B测试

指标选择指南

应用场景	关键指标	目标值
实时推理	P95延迟	< 100ms
批量处理	吞吐量	最大化
边缘设备	模型大小	< 10MB
高精度应用	准确率	> 99%
成本敏感	资源占用	最小化

相关资源：

可视化脚本如下：

复制代码

"""
机器学习评估指标完整可视化脚本
为所有指标生成图表
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches                                                                                        生成的图片里面的中文显示错误，请修改代码
from matplotlib.patches import Rectangle, FancyBboxPatch
import seaborn as sns
from sklearn.metrics import (confusion_matrix, precision_recall_curve, 
                            average_precision_score, roc_curve, auc,
                            silhouette_score, silhouette_samples)
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.datasets import make_classification, make_regression, make_blobs
from scipy import stats
import os
from typing import List, Tuple, Dict
import warnings
warnings.filterwarnings('ignore')

# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans', 'Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False

# 设置图表样式
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette("husl")

# 创建输出目录
os.makedirs('docs/images', exist_ok=True)


# ==================== 数据质量指标图表 ====================

def plot_missing_value_analysis():
    """绘制缺失值分析图表"""
    # 生成示例数据
    np.random.seed(42)
    data = {
        '年龄': np.random.normal(35, 10, 1000),
        '收入': np.random.normal(50000, 15000, 1000),
        '信用评分': np.random.normal(700, 100, 1000),
        '贷款金额': np.random.normal(200000, 50000, 1000),
        '工作年限': np.random.normal(5, 3, 1000),
    }
    df = pd.DataFrame(data)
    
    # 添加缺失值
    df.loc[df.sample(50).index, '年龄'] = np.nan
    df.loc[df.sample(100).index, '收入'] = np.nan
    df.loc[df.sample(150).index, '信用评分'] = np.nan
    df.loc[df.sample(200).index, '贷款金额'] = np.nan
    
    missing_rates = df.isnull().sum() / len(df) * 100
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 缺失值率柱状图
    ax1 = axes[0]
    colors = ['green' if x < 5 else 'orange' if x < 10 else 'red' for x in missing_rates.values]
    bars = ax1.bar(missing_rates.index, missing_rates.values, color=colors, alpha=0.7, edgecolor='black')
    ax1.axhline(y=5, color='green', linestyle='--', label='优秀阈值 (5%)')
    ax1.axhline(y=10, color='orange', linestyle='--', label='良好阈值 (10%)')
    ax1.set_xlabel('特征', fontsize=12)
    ax1.set_ylabel('缺失值率 (%)', fontsize=12)
    ax1.set_title('各特征缺失值分析（贷款申请数据集）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3, axis='y')
    ax1.tick_params(axis='x', rotation=45)
    
    for bar, value in zip(bars, missing_rates.values):
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{value:.1f}%', ha='center', va='bottom', fontsize=10)
    
    # 缺失值热力图
    ax2 = axes[1]
    missing_matrix = df.isnull()
    sns.heatmap(missing_matrix, cmap='YlOrRd', cbar=True, ax=ax2, 
                yticklabels=False, xticklabels=True)
    ax2.set_title('缺失值分布热力图（贷款申请数据集）', fontsize=14, fontweight='bold')
    ax2.set_xlabel('特征', fontsize=12)
    ax2.set_ylabel('样本索引', fontsize=12)
    
    plt.tight_layout()
    plt.savefig('docs/images/missing_value_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 缺失值分析图表已生成")


def plot_data_distribution_analysis():
    """绘制数据分布分析图表"""
    np.random.seed(42)
    
    # 生成不同分布的数据
    normal_data = np.random.normal(0, 1, 1000)
    skewed_data = np.random.gamma(2, 2, 1000)
    uniform_data = np.random.uniform(-3, 3, 1000)
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # 正态分布
    ax1 = axes[0, 0]
    ax1.hist(normal_data, bins=50, color='skyblue', edgecolor='black', alpha=0.7)
    ax1.axvline(np.mean(normal_data), color='red', linestyle='--', linewidth=2, label=f'均值: {np.mean(normal_data):.2f}')
    skewness = stats.skew(normal_data)
    kurt = stats.kurtosis(normal_data)
    ax1.set_title(f'正态分布\n偏度: {skewness:.2f}, 峰度: {kurt:.2f}', fontsize=12, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 偏斜分布
    ax2 = axes[0, 1]
    ax2.hist(skewed_data, bins=50, color='lightcoral', edgecolor='black', alpha=0.7)
    ax2.axvline(np.mean(skewed_data), color='red', linestyle='--', linewidth=2, label=f'均值: {np.mean(skewed_data):.2f}')
    skewness = stats.skew(skewed_data)
    kurt = stats.kurtosis(skewed_data)
    ax2.set_title(f'偏斜分布（消费支出特征）\n偏度: {skewness:.2f}, 峰度: {kurt:.2f}', fontsize=12, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 均匀分布
    ax3 = axes[0, 2]
    ax3.hist(uniform_data, bins=50, color='lightgreen', edgecolor='black', alpha=0.7)
    ax3.axvline(np.mean(uniform_data), color='red', linestyle='--', linewidth=2, label=f'均值: {np.mean(uniform_data):.2f}')
    skewness = stats.skew(uniform_data)
    kurt = stats.kurtosis(uniform_data)
    ax3.set_title(f'均匀分布（年龄特征）\n偏度: {skewness:.2f}, 峰度: {kurt:.2f}', fontsize=12, fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # Q-Q图（正态分布）
    ax4 = axes[1, 0]
    stats.probplot(normal_data, dist="norm", plot=ax4)
    ax4.set_xlabel('理论分位数', fontsize=12)
    ax4.set_ylabel('样本分位数', fontsize=12)
    ax4.set_title('Q-Q图 (正态分布)', fontsize=12, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    
    # Q-Q图（偏斜分布）
    ax5 = axes[1, 1]
    stats.probplot(skewed_data, dist="norm", plot=ax5)
    ax5.set_xlabel('理论分位数', fontsize=12)
    ax5.set_ylabel('样本分位数', fontsize=12)
    ax5.set_title('Q-Q图 (偏斜分布)', fontsize=12, fontweight='bold')
    ax5.grid(True, alpha=0.3)
    
    # 箱线图对比
    ax6 = axes[1, 2]
    data_to_plot = [normal_data, skewed_data, uniform_data]
    bp = ax6.boxplot(data_to_plot, labels=['正态', '偏斜', '均匀'], patch_artist=True)
    colors = ['skyblue', 'lightcoral', 'lightgreen']
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
    ax6.set_title('分布对比箱线图（多特征对比）', fontsize=12, fontweight='bold')
    ax6.set_xlabel('分布类型', fontsize=12)
    ax6.set_ylabel('值', fontsize=12)
    ax6.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/data_distribution_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 数据分布分析图表已生成")


def plot_class_imbalance_analysis():
    """绘制类别不平衡分析图表"""
    np.random.seed(42)
    
    # 生成不平衡数据
    classes = ['正常', '异常', '警告', '严重', '紧急']
    counts = [1000, 500, 200, 100, 50]  # 不平衡分布
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 柱状图
    ax1 = axes[0]
    colors = plt.cm.viridis(np.linspace(0, 1, len(classes)))
    bars = ax1.bar(classes, counts, color=colors, alpha=0.7, edgecolor='black')
    ax1.set_xlabel('类别', fontsize=12)
    ax1.set_ylabel('样本数量', fontsize=12)
    ax1.set_title('类别分布（不平衡数据集 - 异常检测任务）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3, axis='y')
    ax1.tick_params(axis='x', rotation=45)
    
    for bar, count in zip(bars, counts):
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{count}', ha='center', va='bottom', fontsize=10, fontweight='bold')
    
    # 饼图
    ax2 = axes[1]
    proportions = [c/sum(counts)*100 for c in counts]
    wedges, texts, autotexts = ax2.pie(counts, labels=classes, autopct='%1.1f%%',
                                       colors=colors, startangle=90)
    ax2.set_title('类别占比分布（异常检测任务）', fontsize=14, fontweight='bold')
    
    # 计算不平衡度
    imbalance_ratio = max(counts) / min(counts)
    fig.suptitle(f'类别不平衡度: {imbalance_ratio:.1f}:1', fontsize=16, fontweight='bold', y=1.02)
    
    plt.tight_layout()
    plt.savefig('docs/images/class_imbalance_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 类别不平衡分析图表已生成")


# ==================== 前处理指标图表 ====================

def plot_scaling_comparison():
    """绘制标准化/归一化对比图表"""
    np.random.seed(42)
    original_data = np.random.normal(100, 50, 1000)  # 均值100，标准差50
    
    # Z-score标准化
    standardized = (original_data - np.mean(original_data)) / np.std(original_data)
    
    # Min-Max归一化
    min_max_scaled = (original_data - np.min(original_data)) / (np.max(original_data) - np.min(original_data))
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 原始数据分布
    ax1 = axes[0, 0]
    ax1.hist(original_data, bins=50, color='skyblue', edgecolor='black', alpha=0.7)
    ax1.axvline(np.mean(original_data), color='red', linestyle='--', linewidth=2, 
               label=f'均值: {np.mean(original_data):.2f}')
    ax1.set_title('原始数据分布（收入特征）', fontsize=12, fontweight='bold')
    ax1.set_xlabel('值', fontsize=12)
    ax1.set_ylabel('频数', fontsize=12)
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Z-score标准化后
    ax2 = axes[0, 1]
    ax2.hist(standardized, bins=50, color='lightgreen', edgecolor='black', alpha=0.7)
    ax2.axvline(0, color='red', linestyle='--', linewidth=2, label='均值: 0')
    ax2.axvline(1, color='orange', linestyle='--', linewidth=2, label='标准差: 1')
    ax2.axvline(-1, color='orange', linestyle='--', linewidth=2)
    ax2.set_title(f'Z-score标准化后（收入特征）\n均值: {np.mean(standardized):.3f}, 标准差: {np.std(standardized):.3f}', 
                 fontsize=12, fontweight='bold')
    ax2.set_xlabel('标准化值', fontsize=12)
    ax2.set_ylabel('频数', fontsize=12)
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # Min-Max归一化后
    ax3 = axes[1, 0]
    ax3.hist(min_max_scaled, bins=50, color='lightcoral', edgecolor='black', alpha=0.7)
    ax3.axvline(0, color='red', linestyle='--', linewidth=2, label='最小值: 0')
    ax3.axvline(1, color='orange', linestyle='--', linewidth=2, label='最大值: 1')
    ax3.set_title(f'Min-Max归一化后（收入特征）\n最小值: {np.min(min_max_scaled):.3f}, 最大值: {np.max(min_max_scaled):.3f}', 
                 fontsize=12, fontweight='bold')
    ax3.set_xlabel('归一化值', fontsize=12)
    ax3.set_ylabel('频数', fontsize=12)
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 对比箱线图
    ax4 = axes[1, 1]
    data_to_plot = [original_data, standardized, min_max_scaled]
    bp = ax4.boxplot(data_to_plot, labels=['原始', 'Z-score', 'Min-Max'], patch_artist=True)
    colors = ['skyblue', 'lightgreen', 'lightcoral']
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
    ax4.set_title('数据变换对比（收入特征）', fontsize=12, fontweight='bold')
    ax4.set_xlabel('数据变换方法', fontsize=12)
    ax4.set_ylabel('值', fontsize=12)
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/scaling_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 标准化/归一化对比图表已生成")


def plot_feature_importance():
    """绘制特征重要性图表"""
    np.random.seed(42)
    feature_names = ['信用评分', '收入', '贷款金额', '工作年限', '年龄', 
                     '负债比率', '房产价值', '教育水平', '婚姻状况', '地区']
    importance_scores = np.random.rand(10)
    importance_scores = np.sort(importance_scores)[::-1]  # 降序排列
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 水平柱状图
    ax1 = axes[0]
    colors = plt.cm.viridis(np.linspace(0, 1, len(feature_names)))
    bars = ax1.barh(feature_names, importance_scores, color=colors, alpha=0.7, edgecolor='black')
    ax1.set_xlabel('重要性得分', fontsize=12)
    ax1.set_title('特征重要性排序（信用评分预测）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3, axis='x')
    
    for i, (bar, score) in enumerate(zip(bars, importance_scores)):
        ax1.text(score, bar.get_y() + bar.get_height()/2,
                f'{score:.3f}', ha='left', va='center', fontsize=9)
    
    # 累积重要性
    ax2 = axes[1]
    cumulative_importance = np.cumsum(importance_scores)
    cumulative_importance_pct = cumulative_importance / cumulative_importance[-1] * 100
    ax2.plot(range(1, len(feature_names)+1), cumulative_importance_pct, 
            marker='o', linewidth=2, markersize=8, color='blue')
    ax2.axhline(y=80, color='red', linestyle='--', linewidth=2, label='80%阈值')
    ax2.fill_between(range(1, len(feature_names)+1), cumulative_importance_pct, alpha=0.3)
    ax2.set_xlabel('特征数量（Top K）', fontsize=12)
    ax2.set_ylabel('累积重要性 (%)', fontsize=12)
    ax2.set_title('累积特征重要性（信用评分预测）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/feature_importance.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 特征重要性图表已生成")


def plot_correlation_heatmap():
    """绘制特征相关性热力图"""
    np.random.seed(42)
    n_features = 8
    feature_names = ['年龄', '收入', '信用评分', '贷款金额', '工作年限', '负债比率', '房产价值', '教育水平']
    
    # 生成相关矩阵
    corr_matrix = np.random.rand(n_features, n_features)
    corr_matrix = (corr_matrix + corr_matrix.T) / 2  # 对称矩阵
    np.fill_diagonal(corr_matrix, 1.0)  # 对角线为1
    
    # 确保相关性在合理范围内
    corr_matrix = np.clip(corr_matrix, -1, 1)
    
    fig, ax = plt.subplots(figsize=(12, 10))
    sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0,
               square=True, linewidths=0.5, cbar_kws={"shrink": 0.8},
               xticklabels=feature_names, yticklabels=feature_names, ax=ax)
    ax.set_title('特征相关性热力图（信用评分预测）', fontsize=14, fontweight='bold')
    ax.set_xlabel('特征', fontsize=12)
    ax.set_ylabel('特征', fontsize=12)
    
    plt.tight_layout()
    plt.savefig('docs/images/correlation_heatmap.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 特征相关性热力图已生成")


# ==================== 分类任务指标图表 ====================

def plot_confusion_matrix_detailed():
    """绘制详细混淆矩阵"""
    # 生成示例数据
    y_true = np.random.randint(0, 3, 200)
    y_pred = y_true.copy()
    # 添加一些错误
    error_indices = np.random.choice(200, 30, replace=False)
    y_pred[error_indices] = np.random.randint(0, 3, 30)
    
    cm = confusion_matrix(y_true, y_pred)
    classes = ['猫', '狗', '鸟']
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 数值混淆矩阵
    ax1 = axes[0]
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax1,
               xticklabels=classes, yticklabels=classes, cbar=True)
    ax1.set_xlabel('预测类别', fontsize=12)
    ax1.set_ylabel('真实类别', fontsize=12)
    ax1.set_title('混淆矩阵（数值）- 图像分类任务', fontsize=14, fontweight='bold')
    
    # 百分比混淆矩阵
    ax2 = axes[1]
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100
    sns.heatmap(cm_percent, annot=True, fmt='.1f', cmap='Blues', ax=ax2,
               xticklabels=classes, yticklabels=classes, cbar=True)
    ax2.set_xlabel('预测类别', fontsize=12)
    ax2.set_ylabel('真实类别', fontsize=12)
    ax2.set_title('混淆矩阵（百分比）- 图像分类任务', fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('docs/images/confusion_matrix_detailed.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 详细混淆矩阵图表已生成")


def plot_roc_curve_multiple():
    """绘制多个模型的ROC曲线对比"""
    np.random.seed(42)
    
    # 生成示例数据
    y_true = np.random.randint(0, 2, 1000)
    y_scores_model1 = np.random.rand(1000)
    y_scores_model2 = y_scores_model1 + np.random.randn(1000) * 0.1
    y_scores_model3 = y_scores_model1 + np.random.randn(1000) * 0.2
    
    fig, ax = plt.subplots(figsize=(10, 8))
    
    # 计算ROC曲线
    fpr1, tpr1, _ = roc_curve(y_true, y_scores_model1)
    fpr2, tpr2, _ = roc_curve(y_true, y_scores_model2)
    fpr3, tpr3, _ = roc_curve(y_true, y_scores_model3)
    
    auc1 = auc(fpr1, tpr1)
    auc2 = auc(fpr2, tpr2)
    auc3 = auc(fpr3, tpr3)
    
    ax.plot(fpr1, tpr1, label=f'逻辑回归 (AUC = {auc1:.3f})', linewidth=2, color='blue')
    ax.plot(fpr2, tpr2, label=f'随机森林 (AUC = {auc2:.3f})', linewidth=2, color='green')
    ax.plot(fpr3, tpr3, label=f'XGBoost (AUC = {auc3:.3f})', linewidth=2, color='red')
    ax.plot([0, 1], [0, 1], 'k--', label='随机分类器', linewidth=1)
    
    ax.set_xlabel('假正率 (FPR)', fontsize=12)
    ax.set_ylabel('真正率 (TPR)', fontsize=12)
    ax.set_title('ROC曲线对比（二分类任务）', fontsize=14, fontweight='bold')
    ax.legend(loc='lower right', fontsize=11)
    ax.grid(True, alpha=0.3)
    ax.set_xlim([0, 1])
    ax.set_ylim([0, 1])
    
    plt.tight_layout()
    plt.savefig('docs/images/roc_curve_multiple.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ ROC曲线对比图表已生成")


def plot_pr_curve_detailed():
    """绘制详细PR曲线"""
    np.random.seed(42)
    
    y_true = np.random.randint(0, 2, 1000)
    y_scores = np.random.rand(1000)
    
    precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
    ap = average_precision_score(y_true, y_scores)
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # PR曲线
    ax1 = axes[0]
    ax1.plot(recall, precision, linewidth=2, color='blue', label=f'AP = {ap:.3f}')
    ax1.fill_between(recall, precision, alpha=0.3)
    ax1.set_xlabel('召回率 (Recall)', fontsize=12)
    ax1.set_ylabel('精确率 (Precision)', fontsize=12)
    ax1.set_title('Precision-Recall曲线（二分类任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.set_xlim([0, 1])
    ax1.set_ylim([0, 1])
    
    # Precision和Recall随阈值变化
    ax2 = axes[1]
    ax2.plot(thresholds, precision[:-1], label='精确率', linewidth=2, color='blue')
    ax2.plot(thresholds, recall[:-1], label='召回率', linewidth=2, color='red')
    ax2.set_xlabel('分类阈值', fontsize=12)
    ax2.set_ylabel('分数', fontsize=12)
    ax2.set_title('Precision和Recall随阈值变化（二分类任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/pr_curve_detailed.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 详细PR曲线图表已生成")


# ==================== 回归任务指标图表 ====================

def plot_regression_metrics():
    """绘制回归任务指标图表"""
    np.random.seed(42)
    
    # 生成示例数据（房价预测）
    n_samples = 100
    y_true = np.random.randn(n_samples) * 10 + 50  # 真实房价（万元）
    y_pred = y_true + np.random.randn(n_samples) * 3  # 预测房价（万元）
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 预测值 vs 真实值散点图
    ax1 = axes[0, 0]
    ax1.scatter(y_true, y_pred, alpha=0.6, s=50)
    min_val = min(min(y_true), min(y_pred))
    max_val = max(max(y_true), max(y_pred))
    ax1.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=2, label='完美预测线')
    ax1.set_xlabel('真实房价 (万元)', fontsize=12)
    ax1.set_ylabel('预测房价 (万元)', fontsize=12)
    ax1.set_title('房价预测：预测值 vs 真实值', fontsize=12, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 计算R²
    from sklearn.metrics import r2_score
    r2 = r2_score(y_true, y_pred)
    ax1.text(0.05, 0.95, f'R² = {r2:.3f}', transform=ax1.transAxes,
            fontsize=12, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    # 残差图
    ax2 = axes[0, 1]
    residuals = y_true - y_pred
    ax2.scatter(y_pred, residuals, alpha=0.6, s=50)
    ax2.axhline(y=0, color='r', linestyle='--', linewidth=2)
    ax2.set_xlabel('预测房价 (万元)', fontsize=12)
    ax2.set_ylabel('残差 (万元)', fontsize=12)
    ax2.set_title('房价预测残差图', fontsize=12, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    # 误差分布直方图
    ax3 = axes[1, 0]
    errors = y_true - y_pred
    ax3.hist(errors, bins=20, color='skyblue', edgecolor='black', alpha=0.7)
    ax3.axvline(np.mean(errors), color='red', linestyle='--', linewidth=2,
               label=f'均值: {np.mean(errors):.2f}')
    ax3.set_xlabel('预测误差 (万元)', fontsize=12)
    ax3.set_ylabel('频数', fontsize=12)
    ax3.set_title('房价预测误差分布', fontsize=12, fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 时间序列预测（如果是时间序列数据）
    ax4 = axes[1, 1]
    indices = np.arange(n_samples)
    ax4.plot(indices, y_true, 'o-', label='真实值', linewidth=2, markersize=4, alpha=0.7)
    ax4.plot(indices, y_pred, 's-', label='预测值', linewidth=2, markersize=4, alpha=0.7)
    ax4.set_xlabel('样本索引', fontsize=12)
    ax4.set_ylabel('房价 (万元)', fontsize=12)
    ax4.set_title('房价预测对比（时间序列）', fontsize=12, fontweight='bold')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    # 添加指标文本
    from sklearn.metrics import mean_squared_error, mean_absolute_error
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    
    fig.suptitle(f'回归任务指标分析\nMSE: {mse:.2f}, MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.3f}',
                fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('docs/images/regression_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 回归任务指标图表已生成")


# ==================== 聚类任务指标图表 ====================

def plot_clustering_metrics():
    """绘制聚类任务指标图表"""
    np.random.seed(42)
    
    # 生成示例数据
    X, y_true = make_blobs(n_samples=300, centers=4, n_features=2, random_state=42)
    
    # 模拟聚类结果
    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=4, random_state=42)
    y_pred = kmeans.fit_predict(X)
    
    # 计算轮廓系数
    silhouette_avg = silhouette_score(X, y_pred)
    sample_silhouette_values = silhouette_samples(X, y_pred)
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 聚类结果可视化
    ax1 = axes[0, 0]
    scatter = ax1.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis', s=50, alpha=0.6)
    centers = kmeans.cluster_centers_
    ax1.scatter(centers[:, 0], centers[:, 1], c='red', marker='x', s=200, linewidths=3, label='聚类中心')
    ax1.set_xlabel('收入 (万元)', fontsize=12)
    ax1.set_ylabel('消费支出 (万元)', fontsize=12)
    ax1.set_title('客户聚类结果可视化（K=4）', fontsize=12, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax1)
    
    # 轮廓系数图
    ax2 = axes[0, 1]
    y_lower = 10
    n_clusters = len(np.unique(y_pred))
    colors = plt.cm.viridis(np.linspace(0, 1, n_clusters))
    
    for i in range(n_clusters):
        ith_cluster_silhouette_values = sample_silhouette_values[y_pred == i]
        ith_cluster_silhouette_values.sort()
        
        size_cluster_i = ith_cluster_silhouette_values.shape[0]
        y_upper = y_lower + size_cluster_i
        
        ax2.fill_betweenx(np.arange(y_lower, y_upper),
                        0, ith_cluster_silhouette_values,
                        facecolor=colors[i], edgecolor=colors[i], alpha=0.7)
        
        ax2.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
        y_lower = y_upper + 10
    
    ax2.axvline(x=silhouette_avg, color="red", linestyle="--", linewidth=2,
               label=f'平均轮廓系数: {silhouette_avg:.3f}')
    ax2.set_xlabel('轮廓系数值', fontsize=12)
    ax2.set_ylabel('簇标签', fontsize=12)
    ax2.set_title('轮廓系数分析（K=4聚类）', fontsize=12, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 不同K值的轮廓系数
    ax3 = axes[1, 0]
    k_range = range(2, 11)
    silhouette_scores = []
    for k in k_range:
        kmeans_k = KMeans(n_clusters=k, random_state=42)
        labels_k = kmeans_k.fit_predict(X)
        silhouette_scores.append(silhouette_score(X, labels_k))
    
    ax3.plot(k_range, silhouette_scores, 'o-', linewidth=2, markersize=8, color='blue')
    ax3.set_xlabel('簇数量 (K)', fontsize=12)
    ax3.set_ylabel('平均轮廓系数', fontsize=12)
    ax3.set_title('不同K值的轮廓系数（客户聚类）', fontsize=12, fontweight='bold')
    ax3.grid(True, alpha=0.3)
    ax3.set_xticks(k_range)
    
    # 肘部法则（Inertia）
    ax4 = axes[1, 1]
    inertias = []
    for k in k_range:
        kmeans_k = KMeans(n_clusters=k, random_state=42)
        kmeans_k.fit(X)
        inertias.append(kmeans_k.inertia_)
    
    ax4.plot(k_range, inertias, 'o-', linewidth=2, markersize=8, color='green')
    ax4.set_xlabel('簇数量 (K)', fontsize=12)
    ax4.set_ylabel('Inertia (簇内平方和)', fontsize=12)
    ax4.set_title('肘部法则 (Elbow Method) - 客户聚类', fontsize=12, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    ax4.set_xticks(k_range)
    
    plt.tight_layout()
    plt.savefig('docs/images/clustering_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 聚类任务指标图表已生成")


# ==================== 目标检测指标图表 ====================

def plot_iou_visualization():
    """可视化IOU计算过程"""
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # 示例1: 高IOU
    ax1 = axes[0]
    gt_box = [2, 2, 6, 6]
    pred_box = [2.5, 2.5, 6.5, 6.5]
    
    rect1 = Rectangle((gt_box[0], gt_box[1]), gt_box[2]-gt_box[0], gt_box[3]-gt_box[1],
                     linewidth=2, edgecolor='green', facecolor='lightgreen', alpha=0.3, label='真实框')
    ax1.add_patch(rect1)
    
    rect2 = Rectangle((pred_box[0], pred_box[1]), pred_box[2]-pred_box[0], pred_box[3]-pred_box[1],
                     linewidth=2, edgecolor='red', facecolor='lightcoral', alpha=0.3, label='预测框')
    ax1.add_patch(rect2)
    
    intersection = max(0, min(gt_box[2], pred_box[2]) - max(gt_box[0], pred_box[0])) * \
                   max(0, min(gt_box[3], pred_box[3]) - max(gt_box[1], pred_box[1]))
    union = (gt_box[2]-gt_box[0])*(gt_box[3]-gt_box[1]) + (pred_box[2]-pred_box[0])*(pred_box[3]-pred_box[1]) - intersection
    iou = intersection / union if union > 0 else 0
    
    ax1.set_xlim(0, 10)
    ax1.set_ylim(0, 10)
    ax1.set_xlabel('X坐标', fontsize=12)
    ax1.set_ylabel('Y坐标', fontsize=12)
    ax1.set_title(f'高IOU示例 = {iou:.3f}', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.set_aspect('equal')
    
    # 示例2: 中等IOU
    ax2 = axes[1]
    gt_box2 = [2, 2, 6, 6]
    pred_box2 = [4, 4, 8, 8]
    
    rect3 = Rectangle((gt_box2[0], gt_box2[1]), gt_box2[2]-gt_box2[0], gt_box2[3]-gt_box2[1],
                     linewidth=2, edgecolor='green', facecolor='lightgreen', alpha=0.3, label='真实框')
    ax2.add_patch(rect3)
    
    rect4 = Rectangle((pred_box2[0], pred_box2[1]), pred_box2[2]-pred_box2[0], pred_box2[3]-pred_box2[1],
                     linewidth=2, edgecolor='red', facecolor='lightcoral', alpha=0.3, label='预测框')
    ax2.add_patch(rect4)
    
    intersection2 = max(0, min(gt_box2[2], pred_box2[2]) - max(gt_box2[0], pred_box2[0])) * \
                    max(0, min(gt_box2[3], pred_box2[3]) - max(gt_box2[1], pred_box2[1]))
    union2 = (gt_box2[2]-gt_box2[0])*(gt_box2[3]-gt_box2[1]) + (pred_box2[2]-pred_box2[0])*(pred_box2[3]-pred_box2[1]) - intersection2
    iou2 = intersection2 / union2 if union2 > 0 else 0
    
    ax2.set_xlim(0, 10)
    ax2.set_ylim(0, 10)
    ax2.set_xlabel('X坐标', fontsize=12)
    ax2.set_ylabel('Y坐标', fontsize=12)
    ax2.set_title(f'中等IOU示例 = {iou2:.3f}', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    ax2.set_aspect('equal')
    
    # 示例3: 低IOU
    ax3 = axes[2]
    gt_box3 = [2, 2, 6, 6]
    pred_box3 = [7, 7, 9, 9]
    
    rect5 = Rectangle((gt_box3[0], gt_box3[1]), gt_box3[2]-gt_box3[0], gt_box3[3]-gt_box3[1],
                     linewidth=2, edgecolor='green', facecolor='lightgreen', alpha=0.3, label='真实框')
    ax3.add_patch(rect5)
    
    rect6 = Rectangle((pred_box3[0], pred_box3[1]), pred_box3[2]-pred_box3[0], pred_box3[3]-pred_box3[1],
                     linewidth=2, edgecolor='red', facecolor='lightcoral', alpha=0.3, label='预测框')
    ax3.add_patch(rect6)
    
    intersection3 = max(0, min(gt_box3[2], pred_box3[2]) - max(gt_box3[0], pred_box3[0])) * \
                    max(0, min(gt_box3[3], pred_box3[3]) - max(gt_box3[1], pred_box3[1]))
    union3 = (gt_box3[2]-gt_box3[0])*(gt_box3[3]-gt_box3[1]) + (pred_box3[2]-pred_box3[0])*(pred_box3[3]-pred_box3[1]) - intersection3
    iou3 = intersection3 / union3 if union3 > 0 else 0
    
    ax3.set_xlim(0, 10)
    ax3.set_ylim(0, 10)
    ax3.set_xlabel('X坐标', fontsize=12)
    ax3.set_ylabel('Y坐标', fontsize=12)
    ax3.set_title(f'低IOU示例 = {iou3:.3f}', fontsize=14, fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    ax3.set_aspect('equal')
    
    plt.tight_layout()
    plt.savefig('docs/images/iou_visualization.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ IOU可视化图表已生成")


def plot_confidence_distribution():
    """绘制置信度分布直方图"""
    np.random.seed(42)
    confidence_scores = np.random.beta(5, 2, 1000)
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 置信度分布直方图
    ax1 = axes[0]
    ax1.hist(confidence_scores, bins=50, color='skyblue', edgecolor='black', alpha=0.7)
    ax1.axvline(x=0.5, color='red', linestyle='--', linewidth=2, label='Threshold=0.5')
    ax1.set_xlabel('置信度分数', fontsize=12)
    ax1.set_ylabel('频数', fontsize=12)
    ax1.set_title('检测置信度分布（目标检测任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 累积分布
    ax2 = axes[1]
    sorted_scores = np.sort(confidence_scores)[::-1]
    cumulative = np.arange(1, len(sorted_scores) + 1) / len(sorted_scores) * 100
    ax2.plot(sorted_scores, cumulative, linewidth=2, color='blue')
    ax2.axvline(x=0.5, color='red', linestyle='--', linewidth=2, label='Threshold=0.5')
    ax2.set_xlabel('置信度分数', fontsize=12)
    ax2.set_ylabel('累积百分比 (%)', fontsize=12)
    ax2.set_title('置信度累积分布（目标检测任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/confidence_distribution.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 置信度分布图表已生成")


# ==================== 训练过程指标图表 ====================

def plot_training_curves():
    """绘制训练曲线"""
    np.random.seed(42)
    epochs = np.arange(1, 101)
    
    # 模拟训练过程
    train_loss = 2.5 * np.exp(-epochs/30) + 0.3 + 0.1 * np.random.randn(100)
    val_loss = 2.8 * np.exp(-epochs/35) + 0.4 + 0.15 * np.random.randn(100)
    train_acc = 0.2 + 0.7 * (1 - np.exp(-epochs/25)) + 0.05 * np.random.randn(100)
    val_acc = 0.15 + 0.65 * (1 - np.exp(-epochs/30)) + 0.08 * np.random.randn(100)
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 损失曲线
    ax1 = axes[0, 0]
    ax1.plot(epochs, train_loss, label='训练损失', linewidth=2, color='blue')
    ax1.plot(epochs, val_loss, label='验证损失', linewidth=2, color='red')
    ax1.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax1.set_ylabel('损失值 (Loss)', fontsize=12)
    ax1.set_title('训练和验证损失（图像分类任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 准确率曲线
    ax2 = axes[0, 1]
    ax2.plot(epochs, train_acc, label='训练准确率', linewidth=2, color='blue')
    ax2.plot(epochs, val_acc, label='验证准确率', linewidth=2, color='red')
    ax2.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax2.set_ylabel('准确率 (Accuracy)', fontsize=12)
    ax2.set_title('训练和验证准确率（图像分类任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 学习率调度
    ax3 = axes[1, 0]
    lr_schedule = 0.01 * (0.95 ** epochs)
    ax3.plot(epochs, lr_schedule, linewidth=2, color='purple')
    ax3.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax3.set_ylabel('学习率 (Learning Rate)', fontsize=12)
    ax3.set_title('学习率调度（图像分类任务）', fontsize=14, fontweight='bold')
    ax3.set_yscale('log')
    ax3.grid(True, alpha=0.3)
    
    # 梯度范数
    ax4 = axes[1, 1]
    grad_norm = 10 * np.exp(-epochs/20) + 1 + 0.5 * np.random.randn(100)
    ax4.plot(epochs, grad_norm, linewidth=2, color='green')
    ax4.axhline(y=1.0, color='red', linestyle='--', linewidth=2, label='正常范围')
    ax4.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax4.set_ylabel('梯度范数 (Gradient Norm)', fontsize=12)
    ax4.set_title('梯度范数（图像分类任务）', fontsize=14, fontweight='bold')
    ax4.set_yscale('log')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/training_curves.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 训练曲线图表已生成")


# ==================== 模型参数指标图表 ====================

def plot_model_complexity():
    """绘制模型复杂度分析图表"""
    model_names = ['MobileNet', 'ResNet18', 'ResNet50', 'VGG16', 'EfficientNet']
    params_million = [1.2, 3.5, 8.9, 15.3, 25.7]
    flops_giga = [0.5, 1.2, 3.5, 6.8, 12.3]
    accuracy = [0.85, 0.89, 0.92, 0.94, 0.95]
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # 参数量 vs 准确率
    ax1 = axes[0]
    scatter = ax1.scatter(params_million, accuracy, s=200, alpha=0.6, c=flops_giga, cmap='viridis')
    for i, name in enumerate(model_names):
        ax1.annotate(name, (params_million[i], accuracy[i]), fontsize=9)
    ax1.set_xlabel('参数量 (百万)', fontsize=12)
    ax1.set_ylabel('准确率', fontsize=12)
    ax1.set_title('参数量 vs 准确率（ImageNet数据集）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    plt.colorbar(scatter, ax=ax1, label='FLOPs (十亿次)')
    
    # FLOPs vs 准确率
    ax2 = axes[1]
    scatter2 = ax2.scatter(flops_giga, accuracy, s=200, alpha=0.6, c=params_million, cmap='plasma')
    for i, name in enumerate(model_names):
        ax2.annotate(name, (flops_giga[i], accuracy[i]), fontsize=9)
    ax2.set_xlabel('FLOPs (十亿次)', fontsize=12)
    ax2.set_ylabel('准确率', fontsize=12)
    ax2.set_title('FLOPs vs 准确率（ImageNet数据集）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    plt.colorbar(scatter2, ax=ax2, label='参数量 (百万)')
    
    # 效率雷达图（简化版）
    ax3 = axes[2]
    efficiency_metrics = {
        '准确率': [a * 100 for a in accuracy],
        '参数量效率': [100 - p/max(params_million)*100 for p in params_million],
        '计算效率': [100 - f/max(flops_giga)*100 for f in flops_giga]
    }
    x = np.arange(len(model_names))
    width = 0.25
    multiplier = 0
    
    for metric, values in efficiency_metrics.items():
        offset = width * multiplier
        ax3.bar(x + offset, values, width, label=metric, alpha=0.7)
        multiplier += 1
    
    ax3.set_xlabel('模型', fontsize=12)
    ax3.set_ylabel('归一化分数', fontsize=12)
    ax3.set_title('模型效率对比（ImageNet数据集）', fontsize=14, fontweight='bold')
    ax3.set_xticks(x + width, model_names)
    ax3.legend()
    ax3.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('docs/images/model_complexity.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 模型复杂度分析图表已生成")


# ==================== 量化指标图表 ====================

def plot_quantization_metrics():
    """绘制量化指标图表"""
    models = ['FP32', 'INT8', 'FP16']
    accuracy = [0.95, 0.94, 0.945]
    model_size_mb = [100, 25, 50]
    inference_time_ms = [50, 15, 25]
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # 精度对比
    ax1 = axes[0]
    bars1 = ax1.bar(models, accuracy, color=['blue', 'green', 'orange'], alpha=0.7, edgecolor='black')
    ax1.set_xlabel('量化方法', fontsize=12)
    ax1.set_ylabel('准确率', fontsize=12)
    ax1.set_title('量化后精度对比（ResNet50模型）', fontsize=14, fontweight='bold')
    ax1.set_ylim([0.93, 0.96])
    ax1.grid(True, alpha=0.3, axis='y')
    for bar, acc in zip(bars1, accuracy):
        height = bar.get_height()
        ax1.text(bar.get_x() + bar.get_width()/2., height,
                f'{acc:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    # 模型大小对比
    ax2 = axes[1]
    bars2 = ax2.bar(models, model_size_mb, color=['blue', 'green', 'orange'], alpha=0.7, edgecolor='black')
    ax2.set_xlabel('量化方法', fontsize=12)
    ax2.set_ylabel('模型大小 (MB)', fontsize=12)
    ax2.set_title('模型大小对比（ResNet50模型）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3, axis='y')
    for bar, size in zip(bars2, model_size_mb):
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{size}MB', ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    # 推理时间对比
    ax3 = axes[2]
    bars3 = ax3.bar(models, inference_time_ms, color=['blue', 'green', 'orange'], alpha=0.7, edgecolor='black')
    ax3.set_xlabel('量化方法', fontsize=12)
    ax3.set_ylabel('推理时间 (ms)', fontsize=12)
    ax3.set_title('推理时间对比（ResNet50模型）', fontsize=14, fontweight='bold')
    ax3.grid(True, alpha=0.3, axis='y')
    for bar, time_ms in zip(bars3, inference_time_ms):
        height = bar.get_height()
        ax3.text(bar.get_x() + bar.get_width()/2., height,
                f'{time_ms}ms', ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    # 计算压缩比和加速比
    compression_ratio = model_size_mb[0] / model_size_mb[1]
    speedup = inference_time_ms[0] / inference_time_ms[1]
    accuracy_drop = (accuracy[0] - accuracy[1]) / accuracy[0] * 100
    
    fig.suptitle(f'量化效果总结: 压缩比 {compression_ratio:.1f}x, 加速比 {speedup:.1f}x, 精度下降 {accuracy_drop:.2f}%',
                fontsize=14, fontweight='bold', y=1.02)
    
    plt.tight_layout()
    plt.savefig('docs/images/quantization_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 量化指标图表已生成")


# ==================== 部署指标图表 ====================

def plot_deployment_metrics():
    """绘制部署指标图表"""
    batch_sizes = [1, 4, 8, 16, 32]
    latency_ms = [50, 45, 42, 40, 38]
    throughput_qps = [20, 89, 190, 400, 842]
    cpu_usage = [25, 45, 65, 85, 95]
    memory_mb = [500, 800, 1200, 2000, 3500]
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 延迟 vs Batch Size
    ax1 = axes[0, 0]
    ax1.plot(batch_sizes, latency_ms, 'o-', linewidth=2, markersize=8, color='blue')
    ax1.set_xlabel('批次大小', fontsize=12)
    ax1.set_ylabel('延迟 (毫秒)', fontsize=12)
    ax1.set_title('延迟 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.set_xticks(batch_sizes)
    
    # 吞吐量 vs Batch Size
    ax2 = axes[0, 1]
    ax2.plot(batch_sizes, throughput_qps, 'o-', linewidth=2, markersize=8, color='green')
    ax2.set_xlabel('批次大小', fontsize=12)
    ax2.set_ylabel('吞吐量 (QPS)', fontsize=12)
    ax2.set_title('吞吐量 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    ax2.set_xticks(batch_sizes)
    
    # CPU使用率
    ax3 = axes[1, 0]
    ax3.plot(batch_sizes, cpu_usage, 'o-', linewidth=2, markersize=8, color='red')
    ax3.axhline(y=80, color='orange', linestyle='--', linewidth=2, label='推荐阈值 (80%)')
    ax3.set_xlabel('批次大小', fontsize=12)
    ax3.set_ylabel('CPU使用率 (%)', fontsize=12)
    ax3.set_title('CPU使用率 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    ax3.set_xticks(batch_sizes)
    ax3.set_ylim([0, 100])
    
    # 内存占用
    ax4 = axes[1, 1]
    ax4.plot(batch_sizes, memory_mb, 'o-', linewidth=2, markersize=8, color='purple')
    ax4.set_xlabel('批次大小', fontsize=12)
    ax4.set_ylabel('内存占用 (MB)', fontsize=12)
    ax4.set_title('内存占用 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3)
    ax4.set_xticks(batch_sizes)
    
    plt.tight_layout()
    plt.savefig('docs/images/deployment_metrics.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 部署指标图表已生成")


# ==================== 进阶篇：训练过程指标图表 ====================

def plot_loss_function_comparison():
    """绘制不同损失函数对比"""
    epochs = np.arange(1, 101)
    
    # 模拟不同损失函数的行为
    mse_loss = 2.5 * np.exp(-epochs/30) + 0.3
    mae_loss = 1.8 * np.exp(-epochs/35) + 0.4
    huber_loss = 2.0 * np.exp(-epochs/32) + 0.35
    focal_loss = 3.0 * np.exp(-epochs/25) + 0.5
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 损失曲线对比
    ax1 = axes[0]
    ax1.plot(epochs, mse_loss, label='MSE Loss', linewidth=2, color='blue')
    ax1.plot(epochs, mae_loss, label='MAE Loss', linewidth=2, color='green')
    ax1.plot(epochs, huber_loss, label='Huber Loss', linewidth=2, color='orange')
    ax1.plot(epochs, focal_loss, label='Focal Loss', linewidth=2, color='red')
    ax1.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax1.set_ylabel('损失值', fontsize=12)
    ax1.set_title('不同损失函数收敛对比（回归任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 对数尺度
    ax2 = axes[1]
    ax2.semilogy(epochs, mse_loss, label='MSE损失', linewidth=2, color='blue')
    ax2.semilogy(epochs, mae_loss, label='MAE损失', linewidth=2, color='green')
    ax2.semilogy(epochs, huber_loss, label='Huber损失', linewidth=2, color='orange')
    ax2.semilogy(epochs, focal_loss, label='Focal损失', linewidth=2, color='red')
    ax2.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax2.set_ylabel('损失值（对数尺度）', fontsize=12)
    ax2.set_title('损失函数收敛对比（对数尺度，回归任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/loss_function_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 损失函数对比图表已生成")


def plot_gradient_analysis():
    """绘制梯度分析详细图表"""
    epochs = np.arange(1, 101)
    
    # 模拟梯度范数变化
    normal_grad = 5 * np.exp(-epochs/25) + 1 + 0.2 * np.random.randn(100)
    exploding_grad = np.concatenate([
        normal_grad[:50],
        normal_grad[50:60] * np.exp(np.arange(10) * 0.5),
        normal_grad[60:] * 100
    ])
    vanishing_grad = normal_grad * np.exp(-epochs/15)
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 正常梯度
    ax1 = axes[0, 0]
    ax1.plot(epochs, normal_grad, linewidth=2, color='green')
    ax1.axhline(y=1.0, color='red', linestyle='--', linewidth=2, label='正常范围')
    ax1.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax1.set_ylabel('梯度范数', fontsize=12)
    ax1.set_title('正常梯度范数（图像分类任务）', fontsize=14, fontweight='bold')
    ax1.set_yscale('log')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 梯度爆炸
    ax2 = axes[0, 1]
    ax2.plot(epochs, exploding_grad, linewidth=2, color='red')
    ax2.axhline(y=100, color='orange', linestyle='--', linewidth=2, label='危险阈值')
    ax2.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax2.set_ylabel('梯度范数', fontsize=12)
    ax2.set_title('梯度爆炸（图像分类任务）', fontsize=14, fontweight='bold')
    ax2.set_yscale('log')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    # 梯度消失
    ax3 = axes[1, 0]
    ax3.plot(epochs, vanishing_grad, linewidth=2, color='blue')
    ax3.axhline(y=1e-6, color='orange', linestyle='--', linewidth=2, label='消失阈值')
    ax3.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax3.set_ylabel('梯度范数', fontsize=12)
    ax3.set_title('梯度消失（图像分类任务）', fontsize=14, fontweight='bold')
    ax3.set_yscale('log')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 梯度分布
    ax4 = axes[1, 1]
    ax4.hist(normal_grad, bins=30, alpha=0.7, color='green', label='正常梯度')
    ax4.hist(exploding_grad, bins=30, alpha=0.5, color='red', label='爆炸梯度')
    ax4.set_xlabel('梯度范数', fontsize=12)
    ax4.set_ylabel('频数', fontsize=12)
    ax4.set_title('梯度分布对比（图像分类任务）', fontsize=14, fontweight='bold')
    ax4.set_xscale('log')
    ax4.legend()
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/gradient_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 梯度分析图表已生成")


def plot_learning_rate_schedules():
    """绘制不同学习率调度策略对比"""
    epochs = np.arange(1, 101)
    initial_lr = 0.01
    
    # 不同调度策略
    fixed_lr = np.ones(100) * initial_lr
    step_lr = initial_lr * (0.1 ** (epochs // 30))
    exp_lr = initial_lr * (0.95 ** epochs)
    cosine_lr = initial_lr * (1 + np.cos(np.pi * epochs / 100)) / 2
    reduce_on_plateau = initial_lr * np.ones(100)
    reduce_on_plateau[30:] *= 0.5
    reduce_on_plateau[60:] *= 0.5
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    axes = axes.flatten()
    
    schedules = [
        ('固定学习率', fixed_lr),
        ('StepLR (每30epoch降低)', step_lr),
        ('ExponentialLR', exp_lr),
        ('CosineAnnealingLR', cosine_lr),
        ('ReduceLROnPlateau', reduce_on_plateau),
    ]
    
    for idx, (name, lr_values) in enumerate(schedules):
        ax = axes[idx]
        ax.plot(epochs, lr_values, linewidth=2)
        ax.set_xlabel('训练轮数 (Epoch)', fontsize=12)
        ax.set_ylabel('学习率', fontsize=12)
        ax.set_title(f'{name}（图像分类任务）', fontsize=14, fontweight='bold')
        ax.set_yscale('log')
        ax.grid(True, alpha=0.3)
    
    # 对比图
    ax = axes[5]
    for name, lr_values in schedules:
        ax.plot(epochs, lr_values, linewidth=2, label=name)
    ax.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax.set_ylabel('学习率', fontsize=12)
    ax.set_title('所有调度策略对比（图像分类任务）', fontsize=14, fontweight='bold')
    ax.set_yscale('log')
    ax.legend(fontsize=9)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/learning_rate_schedules.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 学习率调度策略对比图表已生成")


def plot_weight_distribution_analysis():
    """绘制权重分布分析图表"""
    np.random.seed(42)
    
    # 模拟不同层的权重分布
    layer1_weights = np.random.normal(0, 0.1, 1000)  # 正常初始化
    layer2_weights = np.random.normal(0, 1.0, 1000)  # 初始化过大
    layer3_weights = np.random.normal(0, 0.01, 1000)  # 初始化过小
    layer4_weights = np.concatenate([
        np.random.normal(0, 0.1, 800),
        np.random.normal(0, 5.0, 200)  # 异常值
    ])
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    layers = [
        ('Conv1 (正常初始化)', layer1_weights, 'green'),
        ('Conv2 (初始化过大)', layer2_weights, 'red'),
        ('FC1 (初始化过小)', layer3_weights, 'blue'),
        ('FC2 (异常值)', layer4_weights, 'orange'),
    ]
    
    for idx, (name, weights, color) in enumerate(layers):
        ax = axes[idx // 2, idx % 2]
        ax.hist(weights, bins=50, alpha=0.7, color=color, edgecolor='black')
        ax.axvline(np.mean(weights), color='red', linestyle='--', 
                  linewidth=2, label=f'均值: {np.mean(weights):.4f}')
        ax.axvline(np.std(weights), color='orange', linestyle='--', 
                  linewidth=2, label=f'标准差: {np.std(weights):.4f}')
        ax.set_xlabel('权重值', fontsize=12)
        ax.set_ylabel('频数', fontsize=12)
        ax.set_title(f'{name}（ResNet50模型）', fontsize=14, fontweight='bold')
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/weight_distribution_analysis.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 权重分布分析图表已生成")


def plot_hyperparameter_tuning():
    """绘制超参数调优可视化"""
    np.random.seed(42)
    
    # 模拟超参数搜索结果
    learning_rates = np.logspace(-4, -1, 20)
    batch_sizes = [16, 32, 64, 128, 256]
    
    # 生成网格搜索结果
    results = []
    for lr in learning_rates:
        for bs in batch_sizes:
            # 模拟准确率（添加一些噪声）
            acc = 0.85 + 0.1 * np.exp(-lr * 100) + 0.05 * np.exp(-bs/100) + np.random.randn() * 0.02
            results.append({'lr': lr, 'batch_size': bs, 'accuracy': acc})
    
    df = pd.DataFrame(results)
    
    # 创建热力图
    pivot_table = df.pivot(index='batch_size', columns='lr', values='accuracy')
    
    fig, axes = plt.subplots(1, 2, figsize=(18, 6))
    
    # 热力图
    ax1 = axes[0]
    sns.heatmap(pivot_table, annot=True, fmt='.3f', cmap='viridis', ax=ax1, cbar=True)
    ax1.set_xlabel('学习率', fontsize=12)
    ax1.set_ylabel('批次大小', fontsize=12)
    ax1.set_title('超参数网格搜索热力图（图像分类任务）', fontsize=14, fontweight='bold')
    
    # 3D表面图
    ax2 = axes[1]
    X = np.log10(learning_rates)
    Y = batch_sizes
    X_grid, Y_grid = np.meshgrid(X, Y)
    Z = pivot_table.values
    
    contour = ax2.contourf(X_grid, Y_grid, Z, levels=20, cmap='viridis')
    ax2.scatter(np.log10(df['lr']), df['batch_size'], c=df['accuracy'], 
               s=50, cmap='viridis', edgecolors='black')
    ax2.set_xlabel('学习率 (log10)', fontsize=12)
    ax2.set_ylabel('批次大小', fontsize=12)
    ax2.set_title('超参数搜索3D视图（图像分类任务）', fontsize=14, fontweight='bold')
    plt.colorbar(contour, ax=ax2, label='准确率')
    
    plt.tight_layout()
    plt.savefig('docs/images/hyperparameter_tuning.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 超参数调优可视化图表已生成")


def plot_cross_validation_results():
    """绘制交叉验证结果可视化"""
    from sklearn.model_selection import cross_val_score, StratifiedKFold
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
    from sklearn.svm import SVC
    from sklearn.datasets import make_classification
    
    # 生成示例数据
    X, y = make_classification(n_samples=200, n_features=10, random_state=42)
    
    # 不同模型的交叉验证
    models = {
        '逻辑回归': LogisticRegression(random_state=42),
        '支持向量机': SVC(random_state=42),
        '随机森林': RandomForestClassifier(random_state=42),
        '梯度提升': GradientBoostingClassifier(random_state=42),
    }
    
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    
    results = {}
    for name, model in models.items():
        scores = cross_val_score(model, X, y, cv=kfold, scoring='accuracy')
        results[name] = scores
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 箱线图
    ax1 = axes[0]
    data_to_plot = [results[name] for name in models.keys()]
    bp = ax1.boxplot(data_to_plot, labels=list(models.keys()), patch_artist=True)
    colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
    for patch, color in zip(bp['boxes'], colors):
        patch.set_facecolor(color)
    ax1.set_xlabel('模型', fontsize=12)
    ax1.set_ylabel('准确率', fontsize=12)
    ax1.set_title('5折交叉验证结果对比（信用评分预测）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3, axis='y')
    ax1.tick_params(axis='x', rotation=45)
    
    # 条形图（平均值）
    ax2 = axes[1]
    means = [np.mean(results[name]) for name in models.keys()]
    stds = [np.std(results[name]) for name in models.keys()]
    bars = ax2.bar(models.keys(), means, yerr=stds, alpha=0.7, 
                  color=colors, edgecolor='black', capsize=5)
    ax2.set_xlabel('模型', fontsize=12)
    ax2.set_ylabel('平均准确率', fontsize=12)
    ax2.set_title('交叉验证平均准确率（信用评分预测）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3, axis='y')
    ax2.tick_params(axis='x', rotation=45)
    
    for bar, mean in zip(bars, means):
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{mean:.3f}', ha='center', va='bottom', fontsize=10)
    
    plt.tight_layout()
    plt.savefig('docs/images/cross_validation_results.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 交叉验证结果可视化图表已生成")


def plot_optimizer_comparison():
    """绘制优化器对比图表"""
    epochs = np.arange(1, 101)
    
    # 模拟不同优化器的收敛曲线
    sgd_loss = 2.5 * np.exp(-epochs/40) + 0.3
    adam_loss = 2.5 * np.exp(-epochs/25) + 0.25
    adamw_loss = 2.5 * np.exp(-epochs/22) + 0.23
    rmsprop_loss = 2.5 * np.exp(-epochs/30) + 0.28
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 损失曲线对比
    ax1 = axes[0]
    ax1.plot(epochs, sgd_loss, label='SGD', linewidth=2, color='blue')
    ax1.plot(epochs, adam_loss, label='Adam', linewidth=2, color='green')
    ax1.plot(epochs, adamw_loss, label='AdamW', linewidth=2, color='red')
    ax1.plot(epochs, rmsprop_loss, label='RMSprop', linewidth=2, color='orange')
    ax1.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax1.set_ylabel('损失值', fontsize=12)
    ax1.set_title('优化器收敛速度对比（图像分类任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 最终性能对比
    ax2 = axes[1]
    optimizers = ['SGD', 'Adam', 'AdamW', 'RMSprop']
    final_losses = [sgd_loss[-1], adam_loss[-1], adamw_loss[-1], rmsprop_loss[-1]]
    bars = ax2.bar(optimizers, final_losses, alpha=0.7, 
                   color=['blue', 'green', 'red', 'orange'], edgecolor='black')
    ax2.set_xlabel('优化器', fontsize=12)
    ax2.set_ylabel('最终损失值', fontsize=12)
    ax2.set_title('优化器最终性能对比（图像分类任务）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3, axis='y')
    
    for bar, loss in zip(bars, final_losses):
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{loss:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('docs/images/optimizer_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 优化器对比图表已生成")


def plot_regularization_effects():
    """绘制正则化效果对比"""
    epochs = np.arange(1, 101)
    
    # 模拟不同正则化强度的效果
    no_reg_train = 0.2 + 0.7 * (1 - np.exp(-epochs/20))
    no_reg_val = 0.15 + 0.6 * (1 - np.exp(-epochs/25))
    
    l2_weak_train = 0.2 + 0.68 * (1 - np.exp(-epochs/22))
    l2_weak_val = 0.15 + 0.65 * (1 - np.exp(-epochs/28))
    
    l2_strong_train = 0.2 + 0.65 * (1 - np.exp(-epochs/25))
    l2_strong_val = 0.15 + 0.68 * (1 - np.exp(-epochs/30))
    
    dropout_train = 0.2 + 0.66 * (1 - np.exp(-epochs/23))
    dropout_val = 0.15 + 0.67 * (1 - np.exp(-epochs/27))
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 训练准确率
    ax1 = axes[0]
    ax1.plot(epochs, no_reg_train, label='无正则化', linewidth=2, color='blue')
    ax1.plot(epochs, l2_weak_train, label='L2弱正则化', linewidth=2, color='green')
    ax1.plot(epochs, l2_strong_train, label='L2强正则化', linewidth=2, color='orange')
    ax1.plot(epochs, dropout_train, label='Dropout', linewidth=2, color='red')
    ax1.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax1.set_ylabel('训练准确率', fontsize=12)
    ax1.set_title('正则化对训练准确率的影响（图像分类任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 验证准确率
    ax2 = axes[1]
    ax2.plot(epochs, no_reg_val, label='无正则化', linewidth=2, color='blue')
    ax2.plot(epochs, l2_weak_val, label='L2弱正则化', linewidth=2, color='green')
    ax2.plot(epochs, l2_strong_val, label='L2强正则化', linewidth=2, color='orange')
    ax2.plot(epochs, dropout_val, label='Dropout', linewidth=2, color='red')
    ax2.set_xlabel('训练轮数 (Epoch)', fontsize=12)
    ax2.set_ylabel('验证准确率', fontsize=12)
    ax2.set_title('正则化对验证准确率的影响（图像分类任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/regularization_effects.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 正则化效果对比图表已生成")


# ==================== 高级篇：后处理和量化指标图表 ====================

def plot_nms_comparison():
    """绘制NMS效果对比"""
    # 模拟不同IoU阈值下的NMS效果
    iou_thresholds = np.arange(0.3, 0.95, 0.05)
    detection_counts = []
    map_scores = []
    
    for iou_thresh in iou_thresholds:
        # 模拟：IoU阈值越高，检测数量越少，但mAP可能先升后降
        count = 100 * (1 - iou_thresh) + np.random.randn() * 5
        map_score = 0.5 + 0.3 * np.exp(-(iou_thresh - 0.5)**2 / 0.1) + np.random.randn() * 0.02
        detection_counts.append(max(10, count))
        map_scores.append(max(0.3, min(0.9, map_score)))
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # 检测数量变化
    ax1 = axes[0]
    ax1.plot(iou_thresholds, detection_counts, 'o-', linewidth=2, markersize=6, color='blue')
    ax1.set_xlabel('IoU阈值', fontsize=12)
    ax1.set_ylabel('检测数量', fontsize=12)
    ax1.set_title('NMS后检测数量 vs IoU阈值（目标检测任务）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    
    # mAP变化
    ax2 = axes[1]
    ax2.plot(iou_thresholds, map_scores, 'o-', linewidth=2, markersize=6, color='green')
    ax2.set_xlabel('IoU阈值', fontsize=12)
    ax2.set_ylabel('mAP', fontsize=12)
    ax2.set_title('mAP vs IoU阈值（目标检测任务）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/nms_comparison.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ NMS效果对比图表已生成")


def plot_threshold_optimization():
    """绘制阈值优化详细分析"""
    # 模拟不同阈值下的性能
    thresholds = np.arange(0.1, 1.0, 0.05)
    precisions = []
    recalls = []
    f1_scores = []
    detection_counts = []
    
    for thresh in thresholds:
        # 模拟：阈值越高，精确率越高，召回率越低
        precision = 0.5 + 0.4 * thresh + np.random.randn() * 0.05
        recall = 0.9 - 0.6 * thresh + np.random.randn() * 0.05
        f1 = 2 * precision * recall / (precision + recall + 1e-10)
        count = 100 * (1 - thresh) + np.random.randn() * 5
        
        precisions.append(max(0.3, min(0.95, precision)))
        recalls.append(max(0.1, min(0.95, recall)))
        f1_scores.append(max(0.2, min(0.9, f1)))
        detection_counts.append(max(5, count))
    
    # 找到最优阈值
    optimal_idx = np.argmax(f1_scores)
    optimal_threshold = thresholds[optimal_idx]
    
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Precision/Recall/F1曲线
    ax1 = axes[0]
    ax1.plot(thresholds, precisions, 'o-', label='精确率', linewidth=2, color='blue')
    ax1.plot(thresholds, recalls, 's-', label='召回率', linewidth=2, color='red')
    ax1.plot(thresholds, f1_scores, '^-', label='F1分数', linewidth=2, color='green')
    ax1.axvline(x=optimal_threshold, color='black', linestyle='--', 
               linewidth=2, label=f'最优阈值: {optimal_threshold:.2f}')
    ax1.set_xlabel('置信度阈值', fontsize=12)
    ax1.set_ylabel('分数', fontsize=12)
    ax1.set_title('阈值优化分析（目标检测任务）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 检测数量
    ax2 = axes[1]
    ax2.plot(thresholds, detection_counts, 'o-', linewidth=2, markersize=6, color='purple')
    ax2.axvline(x=optimal_threshold, color='black', linestyle='--', 
               linewidth=2, label=f'最优阈值: {optimal_threshold:.2f}')
    ax2.set_xlabel('置信度阈值', fontsize=12)
    ax2.set_ylabel('检测数量', fontsize=12)
    ax2.set_title('检测数量 vs 阈值（目标检测任务）', fontsize=14, fontweight='bold')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/threshold_optimization.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 阈值优化分析图表已生成")


def plot_quantization_detailed():
    """绘制量化详细分析图表"""
    # 模拟量化前后的逐层误差
    layers = ['Conv1', 'Conv2', 'Conv3', 'Conv4', 'Conv5', 
              'FC1', 'FC2', 'FC3', 'FC4', 'Output']
    fp32_outputs = np.random.randn(10) * 0.1 + 1.0
    int8_outputs = fp32_outputs + np.random.randn(10) * 0.05
    quantization_errors = np.abs(fp32_outputs - int8_outputs)
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 逐层误差
    ax1 = axes[0, 0]
    ax1.barh(layers, quantization_errors, alpha=0.7, color='red')
    ax1.set_xlabel('量化误差', fontsize=12)
    ax1.set_ylabel('网络层', fontsize=12)
    ax1.set_title('逐层量化误差（ResNet50模型）', fontsize=14, fontweight='bold')
    ax1.grid(True, alpha=0.3, axis='x')
    
    # FP32 vs INT8输出对比
    ax2 = axes[0, 1]
    x_pos = np.arange(len(layers))
    width = 0.35
    ax2.bar(x_pos - width/2, fp32_outputs, width, label='FP32', alpha=0.7, color='blue')
    ax2.bar(x_pos + width/2, int8_outputs, width, label='INT8', alpha=0.7, color='green')
    ax2.set_xlabel('网络层', fontsize=12)
    ax2.set_ylabel('输出值', fontsize=12)
    ax2.set_title('FP32 vs INT8 输出对比（ResNet50模型）', fontsize=14, fontweight='bold')
    ax2.set_xticks(x_pos)
    ax2.set_xticklabels(layers, rotation=45, ha='right')
    ax2.legend()
    ax2.grid(True, alpha=0.3, axis='y')
    
    # 精度损失分布
    ax3 = axes[1, 0]
    accuracy_drops = np.random.randn(100) * 0.01 + 0.01
    ax3.hist(accuracy_drops, bins=20, alpha=0.7, color='orange', edgecolor='black')
    ax3.axvline(np.mean(accuracy_drops), color='red', linestyle='--', 
               linewidth=2, label=f'平均损失: {np.mean(accuracy_drops):.4f}')
    ax3.set_xlabel('精度下降', fontsize=12)
    ax3.set_ylabel('频数', fontsize=12)
    ax3.set_title('量化精度损失分布（ResNet50模型）', fontsize=14, fontweight='bold')
    ax3.legend()
    ax3.grid(True, alpha=0.3)
    
    # 压缩比和加速比
    ax4 = axes[1, 1]
    compression_ratios = [1.0, 2.0, 4.0, 8.0]
    speedups = [1.0, 1.5, 2.5, 3.5]
    methods = ['FP32', 'FP16', 'INT8', 'INT4']
    
    ax4_twin = ax4.twinx()
    bars1 = ax4.bar(np.arange(len(methods)) - 0.2, compression_ratios, 0.4, 
                   label='压缩比', alpha=0.7, color='blue')
    bars2 = ax4_twin.bar(np.arange(len(methods)) + 0.2, speedups, 0.4, 
                        label='加速比', alpha=0.7, color='green')
    ax4.set_xlabel('量化方法', fontsize=12)
    ax4.set_ylabel('压缩比', fontsize=12, color='blue')
    ax4_twin.set_ylabel('加速比', fontsize=12, color='green')
    ax4.set_xticks(np.arange(len(methods)))
    ax4.set_xticklabels(methods)
    ax4.set_title('量化压缩比和加速比', fontsize=14, fontweight='bold')
    ax4.tick_params(axis='y', labelcolor='blue')
    ax4_twin.tick_params(axis='y', labelcolor='green')
    ax4.legend(loc='upper left')
    ax4_twin.legend(loc='upper right')
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/quantization_detailed.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 量化详细分析图表已生成")


def plot_deployment_performance():
    """绘制部署性能详细分析"""
    batch_sizes = [1, 4, 8, 16, 32, 64]
    latency_ms = [50, 45, 42, 40, 38, 37]
    throughput_qps = [20, 89, 190, 400, 842, 1729]
    cpu_usage = [25, 45, 65, 85, 95, 98]
    memory_mb = [500, 800, 1200, 2000, 3500, 6000]
    gpu_usage = [15, 35, 55, 75, 90, 95]
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 10))
    
    # 延迟
    axes[0, 0].plot(batch_sizes, latency_ms, 'o-', linewidth=2, markersize=8, color='blue')
    axes[0, 0].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[0, 0].set_ylabel('延迟 (毫秒)', fontsize=12)
    axes[0, 0].set_title('延迟 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[0, 0].grid(True, alpha=0.3)
    
    # 吞吐量
    axes[0, 1].plot(batch_sizes, throughput_qps, 'o-', linewidth=2, markersize=8, color='green')
    axes[0, 1].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[0, 1].set_ylabel('吞吐量 (QPS)', fontsize=12)
    axes[0, 1].set_title('吞吐量 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[0, 1].grid(True, alpha=0.3)
    
    # CPU使用率
    axes[0, 2].plot(batch_sizes, cpu_usage, 'o-', linewidth=2, markersize=8, color='red')
    axes[0, 2].axhline(y=80, color='orange', linestyle='--', linewidth=2, label='推荐阈值')
    axes[0, 2].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[0, 2].set_ylabel('CPU使用率 (%)', fontsize=12)
    axes[0, 2].set_title('CPU使用率 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[0, 2].legend()
    axes[0, 2].grid(True, alpha=0.3)
    axes[0, 2].set_ylim([0, 100])
    
    # 内存占用
    axes[1, 0].plot(batch_sizes, memory_mb, 'o-', linewidth=2, markersize=8, color='purple')
    axes[1, 0].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[1, 0].set_ylabel('内存占用 (MB)', fontsize=12)
    axes[1, 0].set_title('内存占用 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[1, 0].grid(True, alpha=0.3)
    
    # GPU使用率
    axes[1, 1].plot(batch_sizes, gpu_usage, 'o-', linewidth=2, markersize=8, color='orange')
    axes[1, 1].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[1, 1].set_ylabel('GPU使用率 (%)', fontsize=12)
    axes[1, 1].set_title('GPU使用率 vs 批次大小（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[1, 1].grid(True, alpha=0.3)
    axes[1, 1].set_ylim([0, 100])
    
    # 效率对比（延迟/吞吐量）
    efficiency = np.array(throughput_qps) / np.array(latency_ms)
    axes[1, 2].plot(batch_sizes, efficiency, 'o-', linewidth=2, markersize=8, color='teal')
    axes[1, 2].set_xlabel('批次大小 (Batch Size)', fontsize=12)
    axes[1, 2].set_ylabel('效率 (QPS/ms)', fontsize=12)
    axes[1, 2].set_title('效率指标：吞吐量/延迟（图像分类模型部署）', fontsize=14, fontweight='bold')
    axes[1, 2].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('docs/images/deployment_performance.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 部署性能详细分析图表已生成")


def plot_data_drift_detection():
    """绘制数据漂移检测图表"""
    np.random.seed(42)
    
    # 模拟参考数据和当前数据
    reference_data = np.random.normal(0, 1, 1000)
    current_data = np.random.normal(0.3, 1.2, 1000)  # 有漂移
    
    # 计算统计量
    ref_mean, ref_std = np.mean(reference_data), np.std(reference_data)
    curr_mean, curr_std = np.mean(current_data), np.std(current_data)
    
    # KS测试
    ks_statistic, ks_pvalue = stats.ks_2samp(reference_data, current_data)
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 分布对比
    ax1 = axes[0, 0]
    ax1.hist(reference_data, bins=50, alpha=0.6, label='参考数据', color='blue', density=True)
    ax1.hist(current_data, bins=50, alpha=0.6, label='当前数据', color='red', density=True)
    ax1.axvline(ref_mean, color='blue', linestyle='--', linewidth=2, label=f'参考均值: {ref_mean:.2f}')
    ax1.axvline(curr_mean, color='red', linestyle='--', linewidth=2, label=f'当前均值: {curr_mean:.2f}')
    ax1.set_xlabel('特征值', fontsize=12)
    ax1.set_ylabel('密度', fontsize=12)
    ax1.set_title('数据分布对比（信用评分特征）', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Q-Q图
    ax2 = axes[0, 1]
    stats.probplot(reference_data, dist="norm", plot=ax2)
    ax2.set_xlabel('理论分位数', fontsize=12)
    ax2.set_ylabel('样本分位数', fontsize=12)
    ax2.set_title('参考数据 Q-Q图（信用评分特征）', fontsize=14, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    # 统计量对比
    ax3 = axes[1, 0]
    metrics = ['均值', '标准差']
    ref_values = [ref_mean, ref_std]
    curr_values = [curr_mean, curr_std]
    x = np.arange(len(metrics))
    width = 0.35
    ax3.bar(x - width/2, ref_values, width, label='参考数据', alpha=0.7, color='blue')
    ax3.bar(x + width/2, curr_values, width, label='当前数据', alpha=0.7, color='red')
    ax3.set_xlabel('统计量', fontsize=12)
    ax3.set_ylabel('数值', fontsize=12)
    ax3.set_title('统计量对比（信用评分特征）', fontsize=14, fontweight='bold')
    ax3.set_xticks(x)
    ax3.set_xticklabels(metrics)
    ax3.legend()
    ax3.grid(True, alpha=0.3, axis='y')
    
    # 漂移检测结果
    ax4 = axes[1, 1]
    drift_detected = ks_pvalue < 0.05
    colors = ['green' if not drift_detected else 'red']
    bars = ax4.bar(['漂移检测'], [1 if drift_detected else 0], 
                   color=colors, alpha=0.7, edgecolor='black')
    ax4.set_ylabel('检测结果', fontsize=12)
    ax4.set_title(f'数据漂移检测 (KS统计量: {ks_statistic:.3f}, p值: {ks_pvalue:.4f})', 
                 fontsize=14, fontweight='bold')
    ax4.set_ylim([0, 1.2])
    ax4.text(0, 0.5, '检测到漂移' if drift_detected else '未检测到漂移', 
            ha='center', va='center', fontsize=14, fontweight='bold')
    ax4.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('docs/images/data_drift_detection.png', dpi=300, bbox_inches='tight')
    plt.close()
    print("✓ 数据漂移检测图表已生成")


# ==================== 主函数 ====================

def main():
    """生成所有图表"""
    print("=" * 60)
    print("开始生成机器学习评估指标完整可视化图表...")
    print("=" * 60)
    
    # 数据质量指标
    print("\n【数据质量指标】")
    plot_missing_value_analysis()
    plot_data_distribution_analysis()
    plot_class_imbalance_analysis()
    
    # 前处理指标
    print("\n【前处理指标】")
    plot_scaling_comparison()
    plot_feature_importance()
    plot_correlation_heatmap()
    
    # 分类任务指标
    print("\n【分类任务指标】")
    plot_confusion_matrix_detailed()
    plot_roc_curve_multiple()
    plot_pr_curve_detailed()
    
    # 回归任务指标
    print("\n【回归任务指标】")
    plot_regression_metrics()
    
    # 聚类任务指标
    print("\n【聚类任务指标】")
    plot_clustering_metrics()
    
    # 目标检测指标
    print("\n【目标检测指标】")
    plot_iou_visualization()
    plot_confidence_distribution()
    
    # 训练过程指标
    print("\n【训练过程指标】")
    plot_training_curves()
    
    # 模型参数指标
    print("\n【模型参数指标】")
    plot_model_complexity()
    
    # 量化指标
    print("\n【量化指标】")
    plot_quantization_metrics()
    
    # 部署指标
    print("\n【部署指标】")
    plot_deployment_metrics()
    
    # 进阶篇图表
    print("\n【进阶篇：训练过程指标】")
    plot_loss_function_comparison()
    plot_gradient_analysis()
    plot_learning_rate_schedules()
    plot_weight_distribution_analysis()
    
    print("\n【进阶篇：超参数和验证】")
    plot_hyperparameter_tuning()
    plot_cross_validation_results()
    
    print("\n【进阶篇：优化器和正则化】")
    plot_optimizer_comparison()
    plot_regularization_effects()
    
    # 高级篇图表
    print("\n【高级篇：后处理指标】")
    plot_nms_comparison()
    plot_threshold_optimization()
    
    print("\n【高级篇：量化和部署】")
    plot_quantization_detailed()
    plot_deployment_performance()
    
    print("\n【高级篇：监控指标】")
    plot_data_drift_detection()
    
    print("\n" + "=" * 60)
    print("✓ 所有图表生成完成！")
    print("=" * 60)


if __name__ == '__main__':
    main()
# 机器学习评估指标可视化脚本依赖
# Python 3.11 兼容版本
numpy>=1.24.0,<2.0.0
matplotlib>=3.7.0
seaborn>=0.12.0
scikit-learn>=1.3.0
scipy>=1.10.0
pandas>=2.0.0