Elasticsearch 与机器学习结合：实现高效模型推理的方案（下）

接着上一部分的基础实现，本部分将深入探讨高级功能，包括故障处理、模型优化、性能监控和实际应用案例，帮助你构建企业级 Elasticsearch 机器学习解决方案。

6. 故障容错与降级策略

6.1 熔断器模式实现

熔断器模式可以防止系统在 Elasticsearch 集群不稳定时持续发送请求，加剧故障。下面是一个熔断器实现：

java 复制代码

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

/**
 * 熔断器实现，用于故障检测和自动恢复
 */
public class CircuitBreaker {
    private static final Logger logger = LoggerFactory.getLogger(CircuitBreaker.class);

    private final String name;
    private final AtomicInteger failureCount = new AtomicInteger(0);
    private final int threshold;
    private final long resetTimeoutMs;
    private volatile CircuitState state = CircuitState.CLOSED;
    private volatile long lastStateChangeTime;
    private final ReadWriteLock stateLock = new ReentrantReadWriteLock();

    public CircuitBreaker(String name, int threshold, long resetTimeoutMs) {
        this.name = name;
        this.threshold = threshold;
        this.resetTimeoutMs = resetTimeoutMs;
        this.lastStateChangeTime = System.currentTimeMillis();
    }

    /**
     * 检查熔断器是否打开（阻止请求）
     * @return 如果熔断器打开返回true
     */
    public boolean isOpen() {
        Lock readLock = stateLock.readLock();
        readLock.lock();
        try {
            if (state == CircuitState.OPEN) {
                // 检查是否应该进入半开状态
                long now = System.currentTimeMillis();
                if (now - lastStateChangeTime > resetTimeoutMs) {
                    transitionToHalfOpen();
                    return false;
                }
                return true;
            }
            return false;
        } finally {
            readLock.unlock();
        }
    }

    /**
     * 记录成功请求
     */
    public void recordSuccess() {
        Lock writeLock = stateLock.writeLock();
        writeLock.lock();
        try {
            if (state == CircuitState.HALF_OPEN) {
                // 半开状态成功，重置熔断器
                reset();
                logger.info("熔断器 {} 恢复关闭状态，服务正常", name);
            } else if (state == CircuitState.CLOSED) {
                // 关闭状态下，重置失败计数
                failureCount.set(0);
            }
        } finally {
            writeLock.unlock();
        }
    }

    /**
     * 记录失败请求
     */
    public void recordFailure() {
        Lock writeLock = stateLock.writeLock();
        writeLock.lock();
        try {
            if (state == CircuitState.HALF_OPEN) {
                // 半开状态失败，重新打开熔断器
                transitionToOpen();
                logger.warn("熔断器 {} 半开状态失败，重新打开", name);
                return;
            }

            // 关闭状态下，增加失败计数
            int currentFailures = failureCount.incrementAndGet();
            logger.debug("熔断器 {} 失败计数: {}/{}", name, currentFailures, threshold);

            if (currentFailures >= threshold) {
                transitionToOpen();
                logger.warn("熔断器 {} 失败次数达到阈值 {}，熔断器打开", name, threshold);
            }
        } finally {
            writeLock.unlock();
        }
    }

    /**
     * 转换到打开状态
     */
    private void transitionToOpen() {
        state = CircuitState.OPEN;
        lastStateChangeTime = System.currentTimeMillis();
    }

    /**
     * 转换到半开状态
     */
    private void transitionToHalfOpen() {
        Lock writeLock = stateLock.writeLock();
        writeLock.lock();
        try {
            if (state == CircuitState.OPEN) {
                state = CircuitState.HALF_OPEN;
                lastStateChangeTime = System.currentTimeMillis();
                logger.info("熔断器 {} 进入半开状态，允许少量请求测试服务可用性", name);
            }
        } finally {
            writeLock.unlock();
        }
    }

    /**
     * 重置熔断器状态
     */
    public void reset() {
        Lock writeLock = stateLock.writeLock();
        writeLock.lock();
        try {
            failureCount.set(0);
            state = CircuitState.CLOSED;
            lastStateChangeTime = System.currentTimeMillis();
            logger.info("熔断器 {} 已重置为关闭状态", name);
        } finally {
            writeLock.unlock();
        }
    }

    /**
     * 主动准备重置
     * 用于外部健康检查确认服务已恢复时
     */
    public void prepareReset() {
        if (state == CircuitState.OPEN) {
            transitionToHalfOpen();
        }
    }

    /**
     * 获取当前状态
     */
    public CircuitState getState() {
        return state;
    }

    /**
     * 获取上次状态变更时间
     */
    public long getLastStateChangeTime() {
        return lastStateChangeTime;
    }

    /**
     * 熔断器状态枚举
     */
    public enum CircuitState {
        CLOSED,     // 关闭状态，允许所有请求
        OPEN,       // 打开状态，阻止所有请求
        HALF_OPEN   // 半开状态，允许少量请求以测试服务是否恢复
    }
}

6.2 故障降级服务

基于熔断器实现的故障降级服务，可在 ES 不可用时提供替代方案：

java 复制代码

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;

@Service
public class FaultTolerantInferenceService {
    private static final Logger logger = LoggerFactory.getLogger(FaultTolerantInferenceService.class);

    private final InferenceService inferenceService;
    private final Map<String, FallbackModel> fallbackModels = new ConcurrentHashMap<>();
    private final Map<String, CircuitBreaker> circuitBreakers = new ConcurrentHashMap<>();

    // 降级策略配置
    private static final int ERROR_THRESHOLD = 5;     // 错误阈值
    private static final long RESET_TIMEOUT_MS = 30000;  // 重置时间（毫秒）

    @Autowired
    public FaultTolerantInferenceService(InferenceService inferenceService) {
        this.inferenceService = inferenceService;
    }

    /**
     * 注册降级模型
     * @param primaryModelId 主要模型ID
     * @param fallbackModel 降级模型
     */
    public void registerFallbackModel(String primaryModelId, FallbackModel fallbackModel) {
        Objects.requireNonNull(primaryModelId, "主模型ID不能为空");
        Objects.requireNonNull(fallbackModel, "降级模型不能为空");

        fallbackModels.put(primaryModelId, fallbackModel);
        logger.info("为模型 {} 注册降级模型 {}", primaryModelId, fallbackModel.getModelId());
    }

    /**
     * 容错推理
     */
    public Map<String, Object> faultTolerantInfer(
            String indexName, String pipelineId, String modelId, String text) {

        Objects.requireNonNull(indexName, "索引名称不能为空");
        Objects.requireNonNull(pipelineId, "管道ID不能为空");
        Objects.requireNonNull(modelId, "模型ID不能为空");
        Objects.requireNonNull(text, "输入文本不能为空");

        // 获取或创建熔断器
        CircuitBreaker breaker = getOrCreateCircuitBreaker(pipelineId);

        if (breaker.isOpen()) {
            logger.warn("熔断器开启，使用降级模型进行推理: {}", pipelineId);
            return executeFallbackStrategy(indexName, pipelineId, modelId, text);
        }

        try {
            // 尝试使用主要模型
            Map<String, Object> result = inferenceService.inferSingle(
                    indexName, pipelineId, modelId, text);
            breaker.recordSuccess();
            return result;
        } catch (Exception e) {
            logger.error("主模型推理失败，记录错误: {}", e.getMessage(), e);
            breaker.recordFailure();

            // 如果熔断器现在打开，使用降级策略
            if (breaker.isOpen()) {
                logger.warn("熔断器已触发，切换到降级模型");
                return executeFallbackStrategy(indexName, pipelineId, modelId, text);
            }

            // 单次错误，但熔断器未打开，仍然使用降级策略
            return executeFallbackStrategy(indexName, pipelineId, modelId, text);
        }
    }

    /**
     * 执行降级策略
     */
    private Map<String, Object> executeFallbackStrategy(
            String indexName, String pipelineId, String modelId, String text) {

        // 检查是否有注册的降级模型
        FallbackModel fallback = fallbackModels.get(modelId);
        if (fallback != null) {
            try {
                logger.info("使用降级模型 {} 进行推理", fallback.getModelId());
                return inferenceService.inferSingle(
                        indexName,
                        fallback.getPipelineId(),
                        fallback.getModelId(),
                        text);
            } catch (Exception e) {
                logger.error("降级模型也失败了: {}", e.getMessage(), e);
            }
        }

        // 如果没有降级模型或降级模型也失败，返回本地计算结果
        logger.warn("所有推理选项都失败，使用本地简单处理");
        Map<String, Object> fallbackResult = new HashMap<>();
        fallbackResult.put("text", text);
        fallbackResult.put("timestamp", System.currentTimeMillis());
        fallbackResult.put("prediction", Map.of(
            "fallback", true,
            "reason", "ES推理不可用",
            "simple_result", SimpleFallbackProcessor.process(text)
        ));

        return fallbackResult;
    }

    /**
     * 获取或创建熔断器
     */
    private CircuitBreaker getOrCreateCircuitBreaker(String key) {
        return circuitBreakers.computeIfAbsent(key,
                k -> new CircuitBreaker(k, ERROR_THRESHOLD, RESET_TIMEOUT_MS));
    }

    /**
     * 检查集群健康状态并主动重置熔断器
     */
    @Scheduled(fixedRate = 10000) // 每10秒检查一次
    public void checkCircuitHealth() {
        if (circuitBreakers.isEmpty()) {
            return;
        }

        // 只在有熔断器打开时检查集群健康
        boolean hasOpenBreakers = circuitBreakers.values().stream()
                .anyMatch(b -> b.getState() == CircuitBreaker.CircuitState.OPEN);

        if (hasOpenBreakers && ESClientUtil.isClusterHealthy()) {
            logger.info("检测到ES集群恢复健康，准备重置熔断器");

            for (Map.Entry<String, CircuitBreaker> entry : circuitBreakers.entrySet()) {
                CircuitBreaker breaker = entry.getValue();
                if (breaker.getState() == CircuitBreaker.CircuitState.OPEN) {
                    logger.info("准备重置熔断器: {}", entry.getKey());
                    breaker.prepareReset();
                }
            }
        }
    }

    /**
     * 重置所有熔断器
     */
    public void resetAllCircuitBreakers() {
        circuitBreakers.values().forEach(CircuitBreaker::reset);
        logger.info("已重置所有熔断器");
    }

    /**
     * 降级模型配置
     */
    public static class FallbackModel {
        private final String modelId;
        private final String pipelineId;

        public FallbackModel(String modelId, String pipelineId) {
            this.modelId = Objects.requireNonNull(modelId, "降级模型ID不能为空");
            this.pipelineId = Objects.requireNonNull(pipelineId, "降级管道ID不能为空");
        }

        public String getModelId() { return modelId; }
        public String getPipelineId() { return pipelineId; }

        @Override
        public String toString() {
            return "FallbackModel{modelId='" + modelId + "', pipelineId='" + pipelineId + "'}";
        }
    }

    /**
     * 简单降级处理器
     * 当所有ES推理选项都不可用时的最后防线
     */
    private static class SimpleFallbackProcessor {
        private static final Map<String, Double> SENTIMENT_KEYWORDS = Map.of(
            "好", 0.8, "赞", 0.9, "喜欢", 0.8, "满意", 0.7, "推荐", 0.7,
            "差", 0.2, "糟", 0.1, "失望", 0.2, "退货", 0.3, "不满", 0.3
        );

        /**
         * 本地简单文本处理，无需ES
         */
        public static Map<String, Object> process(String text) {
            // 简单的关键词情感分析
            double score = 0.5; // 默认中性
            boolean hasMatch = false;

            for (Map.Entry<String, Double> entry : SENTIMENT_KEYWORDS.entrySet()) {
                if (text.contains(entry.getKey())) {
                    score = entry.getValue();
                    hasMatch = true;
                    break;
                }
            }

            String label = score > 0.6 ? "正面" : (score < 0.4 ? "负面" : "中性");

            return Map.of(
                "score", score,
                "label", label,
                "confidence", hasMatch ? 0.6 : 0.3,
                "method", "keyword_fallback"
            );
        }
    }
}

7. 模型优化技术

7.1 模型量化与优化服务

java 复制代码

import ai.onnxruntime.OrtEnvironment;
import ai.onnxruntime.OrtSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

@Service
public class ModelOptimizationService {
    private static final Logger logger = LoggerFactory.getLogger(ModelOptimizationService.class);

    /**
     * 使用ONNX Runtime优化模型
     * @param inputPath 原始模型路径
     * @param outputPath 优化后模型输出路径
     * @param level 优化级别
     * @return 优化后的模型路径
     */
    public Path optimizeModel(Path inputPath, Path outputPath, OptimizationLevel level) {
        Objects.requireNonNull(inputPath, "输入模型路径不能为空");
        Objects.requireNonNull(outputPath, "输出模型路径不能为空");
        Objects.requireNonNull(level, "优化级别不能为空");

        if (!Files.exists(inputPath)) {
            throw new IllegalArgumentException("输入模型文件不存在: " + inputPath);
        }

        try {
            logger.info("开始优化模型: {}, 优化级别: {}", inputPath, level);
            OrtEnvironment env = OrtEnvironment.getEnvironment();

            // 创建优化配置
            OrtSession.SessionOptions options = createOptimizationOptions(level);

            // 配置输出路径
            options.setSessionConfigEntry("session.optimized_model_filepath", outputPath.toString());

            // 加载并优化模型
            logger.info("正在应用优化...");
            OrtSession session = env.createSession(inputPath.toString(), options);

            // 确保会话已创建，这会触发优化过程
            session.getInputNames();

            // 关闭会话和环境
            session.close();
            env.close();

            if (!Files.exists(outputPath)) {
                throw new RuntimeException("优化后的模型文件未生成: " + outputPath);
            }

            logger.info("模型优化完成，输出到: {}", outputPath);
            return outputPath;
        } catch (Exception e) {
            logger.error("模型优化失败: {}", e.getMessage(), e);
            throw new RuntimeException("模型优化失败", e);
        }
    }

    /**
     * 创建优化配置
     */
    private OrtSession.SessionOptions createOptimizationOptions(OptimizationLevel level) {
        OrtSession.SessionOptions options = new OrtSession.SessionOptions();

        // 设置优化级别
        switch (level) {
            case BASIC:
                options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.BASIC_OPT);
                break;
            case EXTENDED:
                options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.EXTENDED_OPT);
                break;
            case ALL:
                options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT);
                break;
            case QUANTIZED:
                options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT);
                // 添加量化配置
                setupQuantizationOptions(options);
                break;
        }

        options.setGraphOptimizationLevel(OrtSession.SessionOptions.GraphOptimizationLevel.ORT_ENABLE_ALL);
        return options;
    }

    /**
     * 设置量化配置
     */
    private void setupQuantizationOptions(OrtSession.SessionOptions options) {
        Map<String, String> optimizationFlags = new HashMap<>();
        optimizationFlags.put("enable_quantization", "1");
        optimizationFlags.put("enable_dynamic_quantization", "1");

        for (Map.Entry<String, String> flag : optimizationFlags.entrySet()) {
            options.addSessionConfigEntry(flag.getKey(), flag.getValue());
        }
    }

    /**
     * 比较优化前后模型的大小和特性
     * @param originalPath 原始模型路径
     * @param optimizedPath 优化后模型路径
     * @return 比较结果
     */
    public ModelComparisonResult compareModels(Path originalPath, Path optimizedPath) {
        Objects.requireNonNull(originalPath, "原始模型路径不能为空");
        Objects.requireNonNull(optimizedPath, "优化后模型路径不能为空");

        try {
            if (!Files.exists(originalPath)) {
                throw new IllegalArgumentException("原始模型文件不存在: " + originalPath);
            }
            if (!Files.exists(optimizedPath)) {
                throw new IllegalArgumentException("优化后模型文件不存在: " + optimizedPath);
            }

            long originalSize = Files.size(originalPath);
            long optimizedSize = Files.size(optimizedPath);

            double reductionPercent = (1 - (double)optimizedSize/originalSize) * 100;

            logger.info("原始模型大小: {} 字节", originalSize);
            logger.info("优化后模型大小: {} 字节", optimizedSize);
            logger.info("大小减少: {}%", String.format("%.2f", reductionPercent));

            // 进行模型加载时间比较
            long originalLoadTime = measureLoadTime(originalPath);
            long optimizedLoadTime = measureLoadTime(optimizedPath);

            logger.info("原始模型加载时间: {} 毫秒", originalLoadTime);
            logger.info("优化后模型加载时间: {} 毫秒", optimizedLoadTime);

            return new ModelComparisonResult(
                originalSize, optimizedSize, reductionPercent,
                originalLoadTime, optimizedLoadTime
            );
        } catch (Exception e) {
            logger.error("比较模型失败: {}", e.getMessage(), e);
            throw new RuntimeException("比较模型失败", e);
        }
    }

    /**
     * 测量模型加载时间
     */
    private long measureLoadTime(Path modelPath) {
        try {
            OrtEnvironment env = OrtEnvironment.getEnvironment();
            OrtSession.SessionOptions options = new OrtSession.SessionOptions();

            long startTime = System.currentTimeMillis();
            OrtSession session = env.createSession(modelPath.toString(), options);
            session.getInputNames(); // 确保完全加载
            long endTime = System.currentTimeMillis();

            session.close();
            env.close();

            return endTime - startTime;
        } catch (Exception e) {
            logger.error("测量模型加载时间失败: {}", e.getMessage(), e);
            return -1;
        }
    }

    /**
     * 优化级别枚举
     */
    public enum OptimizationLevel {
        BASIC,      // 基本优化
        EXTENDED,   // 扩展优化
        ALL,        // 所有优化
        QUANTIZED   // 量化优化
    }

    /**
     * 模型比较结果类
     */
    public static class ModelComparisonResult {
        private final long originalSize;
        private final long optimizedSize;
        private final double reductionPercent;
        private final long originalLoadTime;
        private final long optimizedLoadTime;

        public ModelComparisonResult(
                long originalSize,
                long optimizedSize,
                double reductionPercent,
                long originalLoadTime,
                long optimizedLoadTime) {
            this.originalSize = originalSize;
            this.optimizedSize = optimizedSize;
            this.reductionPercent = reductionPercent;
            this.originalLoadTime = originalLoadTime;
            this.optimizedLoadTime = optimizedLoadTime;
        }

        // Getters
        public long getOriginalSize() { return originalSize; }
        public long getOptimizedSize() { return optimizedSize; }
        public double getReductionPercent() { return reductionPercent; }
        public long getOriginalLoadTime() { return originalLoadTime; }
        public long getOptimizedLoadTime() { return optimizedLoadTime; }

        @Override
        public String toString() {
            return String.format(
                "模型优化结果:\n" +
                "原始大小: %d 字节\n" +
                "优化后大小: %d 字节\n" +
                "大小减少: %.2f%%\n" +
                "原始加载时间: %d 毫秒\n" +
                "优化后加载时间: %d 毫秒",
                originalSize, optimizedSize, reductionPercent,
                originalLoadTime, optimizedLoadTime
            );
        }
    }
}

7.2 模型版本管理服务

java 复制代码

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

@Service
public class ModelVersionManager {
    private static final Logger logger = LoggerFactory.getLogger(ModelVersionManager.class);

    private final ModelUploader modelUploader;
    private final InferencePipelineManager pipelineManager;
    private final ModelOptimizationService optimizationService;

    // 版本管理锁
    private final ReadWriteLock versionLock = new ReentrantReadWriteLock();

    // 活跃模型信息缓存
    private final Map<String, ModelVersion> activeVersions = new ConcurrentHashMap<>();

    // 模型别名映射
    private static final String VERSION_ALIAS_SUFFIX = "_latest";

    @Autowired
    public ModelVersionManager(
            ModelUploader modelUploader,
            InferencePipelineManager pipelineManager,
            ModelOptimizationService optimizationService) {
        this.modelUploader = modelUploader;
        this.pipelineManager = pipelineManager;
        this.optimizationService = optimizationService;
    }

    /**
     * 部署新版本模型
     * @param baseModelId 基础模型ID
     * @param modelPath 模型文件路径
     * @param description 模型描述
     * @param pipelineId 要更新的管道ID (可选)
     * @param optimize 是否优化模型
     * @return 新版本模型ID
     */
    public String deployNewVersion(
            String baseModelId,
            Path modelPath,
            String description,
            String pipelineId,
            boolean optimize) {

        Objects.requireNonNull(baseModelId, "基础模型ID不能为空");
        Objects.requireNonNull(modelPath, "模型路径不能为空");

        versionLock.writeLock().lock();
        try {
            // 生成新版本ID
            String versionId = generateVersionId(baseModelId);
            logger.info("开始部署新版本模型: {}", versionId);

            // 优化模型（如果需要）
            Path finalModelPath = modelPath;
            if (optimize) {
                try {
                    Path optimizedPath = Path.of(modelPath.toString() + ".optimized");
                    finalModelPath = optimizationService.optimizeModel(
                            modelPath,
                            optimizedPath,
                            ModelOptimizationService.OptimizationLevel.EXTENDED);

                    logger.info("模型已优化: {}", finalModelPath);
                } catch (Exception e) {
                    logger.error("模型优化失败，将使用原始模型: {}", e.getMessage(), e);
                }
            }

            // 上传模型
            modelUploader.uploadModel(
                versionId,
                finalModelPath,
                description != null ? description : "版本 " + getVersionNumber(versionId),
                "version", baseModelId
            );

            // 更新别名
            String aliasName = baseModelId + VERSION_ALIAS_SUFFIX;
            modelUploader.createModelAlias(versionId, aliasName);
            logger.info("模型别名 {} 已更新到新版本 {}", aliasName, versionId);

            // 更新使用别名的管道
            if (pipelineId != null && !pipelineId.isEmpty()) {
                pipelineManager.updatePipelineModel(pipelineId, aliasName);
                logger.info("管道 {} 已更新使用最新版本模型", pipelineId);
            }

            // 记录活跃版本
            ModelVersion version = new ModelVersion(
                    versionId,
                    baseModelId,
                    description,
                    System.currentTimeMillis());
            activeVersions.put(versionId, version);

            return versionId;
        } catch (Exception e) {
            logger.error("部署新版本模型失败: {}", e.getMessage(), e);
            throw new RuntimeException("部署新版本模型失败", e);
        } finally {
            versionLock.writeLock().unlock();
        }
    }

    /**
     * 蓝绿部署新版本模型
     * 创建新管道，而不是直接更新现有管道
     * @return 新管道ID
     */
    public String blueGreenDeploy(
            String baseModelId,
            Path modelPath,
            String description,
            String sourcePipelineId) {

        Objects.requireNonNull(baseModelId, "基础模型ID不能为空");
        Objects.requireNonNull(modelPath, "模型路径不能为空");
        Objects.requireNonNull(sourcePipelineId, "源管道ID不能为空");

        versionLock.writeLock().lock();
        try (var client = ESClientUtil.createClient()) {
            // 部署新版本模型
            String versionId = deployNewVersion(baseModelId, modelPath, description, null, true);

            // 获取源管道配置
            var response = client.ingest().getPipeline(g -> g.id(sourcePipelineId));
            if (response.result().isEmpty()) {
                throw new IllegalArgumentException("源管道不存在: " + sourcePipelineId);
            }

            // 创建新管道ID
            String newPipelineId = sourcePipelineId + "_" + getVersionNumber(versionId);

            // 分析源管道获取字段映射
            String sourceField = "text"; // 默认
            String targetField = "prediction"; // 默认

            var processors = response.result().get(sourcePipelineId).processors();
            for (var processor : processors) {
                if (processor.inference() != null) {
                    var inference = processor.inference();
                    // 尝试从字段映射中提取
                    if (inference.fieldMap() != null && !inference.fieldMap().isEmpty()) {
                        var entry = inference.fieldMap().entrySet().iterator().next();
                        sourceField = entry.getKey();
                    }

                    if (inference.targetField() != null) {
                        targetField = inference.targetField();
                    }

                    break;
                }
            }

            // 创建新管道
            pipelineManager.createInferencePipeline(
                    newPipelineId,
                    versionId,
                    sourceField,
                    targetField,
                    "版本 " + getVersionNumber(versionId) + " 的管道");

            logger.info("蓝绿部署完成，新管道: {}, 新模型: {}", newPipelineId, versionId);
            return newPipelineId;
        } catch (IOException e) {
            logger.error("蓝绿部署失败: {}", e.getMessage(), e);
            throw new RuntimeException("蓝绿部署失败", e);
        } finally {
            versionLock.writeLock().unlock();
        }
    }

    /**
     * 生成版本ID
     */
    private String generateVersionId(String baseModelId) {
        // 添加时间戳和版本号
        LocalDateTime now = LocalDateTime.now();
        String timestamp = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss").format(now);

        // 查找当前最高版本号
        int highestVersion = 0;
        for (String key : activeVersions.keySet()) {
            if (key.startsWith(baseModelId + "_v")) {
                try {
                    String versionStr = key.substring((baseModelId + "_v").length());
                    int version = Integer.parseInt(versionStr.split("_")[0]);
                    if (version > highestVersion) {
                        highestVersion = version;
                    }
                } catch (Exception e) {
                    // 忽略解析错误
                }
            }
        }

        // 新版本号
        int newVersion = highestVersion + 1;
        return String.format("%s_v%d_%s", baseModelId, newVersion, timestamp);
    }

    /**
     * 提取版本号
     */
    private String getVersionNumber(String versionId) {
        if (versionId.contains("_v")) {
            int start = versionId.indexOf("_v") + 2;
            int end = versionId.indexOf("_", start);
            if (end > start) {
                return versionId.substring(start, end);
            }
            return versionId.substring(start);
        }
        return "1"; // 默认版本号
    }

    /**
     * 获取模型的所有版本
     */
    public Map<String, ModelVersion> getVersionsForModel(String baseModelId) {
        Objects.requireNonNull(baseModelId, "基础模型ID不能为空");

        Map<String, ModelVersion> versions = new ConcurrentHashMap<>();
        for (Map.Entry<String, ModelVersion> entry : activeVersions.entrySet()) {
            if (entry.getValue().getBaseModelId().equals(baseModelId)) {
                versions.put(entry.getKey(), entry.getValue());
            }
        }

        return versions;
    }

    /**
     * 获取模型的最新版本
     */
    public ModelVersion getLatestVersion(String baseModelId) {
        Objects.requireNonNull(baseModelId, "基础模型ID不能为空");

        ModelVersion latest = null;
        long latestTime = 0;

        for (ModelVersion version : activeVersions.values()) {
            if (version.getBaseModelId().equals(baseModelId) &&
                version.getCreationTime() > latestTime) {
                latest = version;
                latestTime = version.getCreationTime();
            }
        }

        return latest;
    }

    /**
     * 模型版本信息类
     */
    public static class ModelVersion {
        private final String versionId;
        private final String baseModelId;
        private final String description;
        private final long creationTime;

        public ModelVersion(String versionId, String baseModelId, String description, long creationTime) {
            this.versionId = versionId;
            this.baseModelId = baseModelId;
            this.description = description;
            this.creationTime = creationTime;
        }

        // Getters
        public String getVersionId() { return versionId; }
        public String getBaseModelId() { return baseModelId; }
        public String getDescription() { return description; }
        public long getCreationTime() { return creationTime; }

        @Override
        public String toString() {
            return String.format(
                "ModelVersion{versionId='%s', baseModelId='%s', description='%s', creationTime=%d}",
                versionId, baseModelId, description, creationTime
            );
        }
    }
}

8. 性能监控

8.1 性能指标收集服务

java 复制代码

import co.elastic.clients.elasticsearch.ElasticsearchClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;

@Service
public class MLMetricsService {
    private static final Logger logger = LoggerFactory.getLogger(MLMetricsService.class);

    // 性能指标收集
    private final Map<String, ModelMetrics> modelMetrics = new ConcurrentHashMap<>();

    // 告警阈值
    private static final int ERROR_ALERT_THRESHOLD = 10;
    private static final long LATENCY_ALERT_THRESHOLD_MS = 1000;
    private static final double CPU_ALERT_THRESHOLD = 90.0;
    private static final double MEMORY_ALERT_THRESHOLD = 85.0;

    /**
     * 记录推理开始
     * @param modelId 模型ID
     * @return 请求ID，用于关联结束记录
     */
    public long recordInferenceStart(String modelId) {
        Objects.requireNonNull(modelId, "模型ID不能为空");
        return getOrCreateMetrics(modelId).recordStart();
    }

    /**
     * 记录推理完成
     * @param modelId 模型ID
     * @param requestId 开始时返回的请求ID
     * @param success 是否成功
     * @param itemCount 处理的项目数量（批量处理时>1）
     */
    public void recordInferenceEnd(String modelId, long requestId, boolean success, int itemCount) {
        Objects.requireNonNull(modelId, "模型ID不能为空");
        getOrCreateMetrics(modelId).recordEnd(requestId, success, itemCount);
    }

    /**
     * 获取或创建模型指标对象
     */
    private ModelMetrics getOrCreateMetrics(String modelId) {
        return modelMetrics.computeIfAbsent(modelId, k -> new ModelMetrics(modelId));
    }

    /**
     * 定期检查ES集群ML状态
     */
    @Scheduled(fixedRate = 60000) // 每分钟执行一次
    public void monitorMLStats() {
        try (var client = ESClientUtil.createClient()) {
            // 检查节点ML状态
            var nodesResponse = client.nodes().stats();

            for (var node : nodesResponse.nodes().values()) {
                // 获取ML统计信息
                var mlStats = node.mlStats();
                if (mlStats != null) {
                    double mlMemoryPercent = calculateMemoryPercent(
                            mlStats.memory().jvmPeakInferenceBytes(),
                            mlStats.memory().jvmInferenceMaxBytes());

                    logger.info("节点 {} ML内存使用: {}%, 最大内存: {}MB",
                            node.name(),
                            String.format("%.2f", mlMemoryPercent),
                            mlStats.memory().jvmInferenceMaxBytes() / (1024 * 1024));

                    // 检查内存使用是否超过阈值
                    if (mlMemoryPercent > MEMORY_ALERT_THRESHOLD) {
                        logger.warn("ML内存使用告警: 节点 {} 内存使用率 {}%",
                                node.name(), String.format("%.2f", mlMemoryPercent));
                    }
                }
            }

            // 检查模型统计信息
            var modelsResponse = client.ml().getTrainedModels(m -> m.includeModelDefinition(false));

            for (var model : modelsResponse.trainedModelConfigs()) {
                String modelId = model.modelId();

                // 获取模型统计信息
                var statsResponse = client.ml().getTrainedModelsStats(s -> s.modelId(modelId));
                if (!statsResponse.trainedModelStats().isEmpty()) {
                    var modelStats = statsResponse.trainedModelStats().get(0);
                    var inferencesStats = modelStats.inferenceStats();

                    if (inferencesStats != null) {
                        logger.info("模型 {} 推理统计: 总计: {}, 失败: {}, 平均时间: {}ms",
                                modelId,
                                inferencesStats.inferenceCount(),
                                inferencesStats.failureCount(),
                                inferencesStats.avgInferenceTime());

                        // 检查错误率
                        if (inferencesStats.failureCount() > ERROR_ALERT_THRESHOLD) {
                            logger.warn("模型 {} 错误数超过阈值: {}",
                                    modelId, inferencesStats.failureCount());
                        }

                        // 检查延迟
                        if (inferencesStats.avgInferenceTime() > LATENCY_ALERT_THRESHOLD_MS) {
                            logger.warn("模型 {} 平均推理时间超过阈值: {}ms",
                                    modelId, inferencesStats.avgInferenceTime());
                        }
                    }
                }
            }
        } catch (IOException e) {
            logger.error("监控ML统计信息失败: {}", e.getMessage(), e);
        }
    }

    /**
     * 计算内存使用百分比
     */
    private double calculateMemoryPercent(long used, long max) {
        if (max == 0) return 0.0;
        return (double) used / max * 100;
    }

    /**
     * 获取所有模型性能指标
     */
    public Map<String, Map<String, Object>> getAllModelMetrics() {
        Map<String, Map<String, Object>> result = new HashMap<>();

        for (var entry : modelMetrics.entrySet()) {
            ModelMetrics metrics = entry.getValue();
            Map<String, Object> metricsMap = new HashMap<>();

            metricsMap.put("totalRequests", metrics.getTotalRequests());
            metricsMap.put("successfulRequests", metrics.getSuccessfulRequests());
            metricsMap.put("failedRequests", metrics.getFailedRequests());
            metricsMap.put("averageLatencyMs", metrics.getAverageLatencyMs());
            metricsMap.put("p95LatencyMs", metrics.getP95LatencyMs());
            metricsMap.put("requestsPerSecond", metrics.getRequestsPerSecond());
            metricsMap.put("processingItems", metrics.getProcessingItems());

            result.put(entry.getKey(), metricsMap);
        }

        return result;
    }

    /**
     * 模型性能指标类
     */
    public static class ModelMetrics {
        private final String modelId;
        private final AtomicLong totalRequests = new AtomicLong(0);
        private final AtomicLong successfulRequests = new AtomicLong(0);
        private final AtomicLong failedRequests = new AtomicLong(0);
        private final AtomicLong processingItems = new AtomicLong(0);
        private final AtomicLong requestIdCounter = new AtomicLong(0);

        // 延迟统计
        private final List<Long> recentLatencies = Collections.synchronizedList(new ArrayList<>());
        private final Map<Long, Long> startTimes = new ConcurrentHashMap<>();

        // 吞吐量计算
        private final AtomicLong requestsInLastMinute = new AtomicLong(0);
        private final AtomicLong itemsInLastMinute = new AtomicLong(0);
        private long lastMinuteReset = System.currentTimeMillis();

        public ModelMetrics(String modelId) {
            this.modelId = modelId;
        }

        /**
         * 记录请求开始
         * @return 请求ID
         */
        public long recordStart() {
            long requestId = requestIdCounter.incrementAndGet();
            startTimes.put(requestId, System.currentTimeMillis());

            // 更新每分钟请求计数
            requestsInLastMinute.incrementAndGet();
            long now = System.currentTimeMillis();
            if (now - lastMinuteReset > 60000) {
                // 每分钟重置计数
                requestsInLastMinute.set(1);
                itemsInLastMinute.set(0);
                lastMinuteReset = now;
            }

            return requestId;
        }

        /**
         * 记录请求结束
         */
        public void recordEnd(long requestId, boolean success, int itemCount) {
            Long startTime = startTimes.remove(requestId);

            if (startTime != null) {
                totalRequests.incrementAndGet();
                processingItems.addAndGet(itemCount);
                itemsInLastMinute.addAndGet(itemCount);

                long latency = System.currentTimeMillis() - startTime;

                // 保存最近100个延迟样本
                synchronized (recentLatencies) {
                    recentLatencies.add(latency);
                    if (recentLatencies.size() > 100) {
                        recentLatencies.remove(0);
                    }
                }

                if (success) {
                    successfulRequests.incrementAndGet();
                } else {
                    failedRequests.incrementAndGet();
                }
            }
        }

        // Getters
        public long getTotalRequests() {
            return totalRequests.get();
        }

        public long getSuccessfulRequests() {
            return successfulRequests.get();
        }

        public long getFailedRequests() {
            return failedRequests.get();
        }

        public long getProcessingItems() {
            return processingItems.get();
        }

        public double getAverageLatencyMs() {
            synchronized (recentLatencies) {
                if (recentLatencies.isEmpty()) {
                    return 0.0;
                }
                return recentLatencies.stream()
                        .mapToLong(Long::longValue)
                        .average()
                        .orElse(0.0);
            }
        }

        public long getP95LatencyMs() {
            synchronized (recentLatencies) {
                if (recentLatencies.isEmpty()) {
                    return 0;
                }
                List<Long> sorted = new ArrayList<>(recentLatencies);
                Collections.sort(sorted);
                int idx = (int) Math.ceil(sorted.size() * 0.95) - 1;
                return sorted.get(Math.max(0, idx));
            }
        }

        public double getRequestsPerSecond() {
            return requestsInLastMinute.get() / 60.0;
        }

        public double getItemsPerSecond() {
            return itemsInLastMinute.get() / 60.0;
        }
    }
}

8.2 性能基准测试结果

在实际生产环境下进行了全面性能测试，以下是测试数据：

测试环境:

Elasticsearch 8.8.0
3 节点集群(每节点 16 核 CPU，64GB 内存)
模型: BERT-base 情感分析(ONNX, 110MB)

基准测试结果:

批量大小	平均延迟(ms)	吞吐量(doc/s)	CPU 使用率	内存使用
1	85	11.8	15%	450MB
10	150	66.7	35%	480MB
50	650	76.9	80%	520MB
100	1250	80.0	95%	560MB

不同硬件规格的扩展性测试:

节点配置	最大并发请求	吞吐量(doc/s)	平均延迟(ms)
4 核 8GB	20	25	800
8 核 16GB	40	60	300
16 核 32GB	80	120	150
32 核 64GB	160	240	100

8.3 资源需求估算指南

模型类型	模型大小	每节点最大推理线程	推荐内存	每秒请求数
小型分类模型	<10MB	8-16	2GB	500+
中型 NLP 模型	50-200MB	4-8	8GB	100-200
大型 Transformer	500MB+	2-4	16GB+	10-50

根据业务需求和模型复杂度，应用以下公式估算集群容量：

scss 复制代码

所需节点数 = ceil(峰值QPS / 单节点吞吐量) * (1 + 冗余系数)

9. Spring Boot 集成实现

9.1 应用配置

首先，创建一个配置类加载外部属性：

java 复制代码

import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@Configuration
@ConfigurationProperties(prefix = "elasticsearch")
public class ElasticsearchProperties {
    private String host = "localhost";
    private int port = 9200;
    private String protocol = "https";
    private String username = "elastic";
    private String password = "changeme";
    private String certPath;

    // 模型配置
    private Map<String, ModelSettings> models = new HashMap<>();

    // 推理配置
    private InferenceSettings inference = new InferenceSettings();

    // 必要的索引
    private List<String> requiredIndices = new ArrayList<>();

    // Getters and Setters
    public String getHost() { return host; }
    public void setHost(String host) { this.host = host; }

    public int getPort() { return port; }
    public void setPort(int port) { this.port = port; }

    public String getProtocol() { return protocol; }
    public void setProtocol(String protocol) { this.protocol = protocol; }

    public String getUsername() { return username; }
    public void setUsername(String username) { this.username = username; }

    public String getPassword() { return password; }
    public void setPassword(String password) { this.password = password; }

    public String getCertPath() { return certPath; }
    public void setCertPath(String certPath) { this.certPath = certPath; }

    public Map<String, ModelSettings> getModels() { return models; }
    public void setModels(Map<String, ModelSettings> models) { this.models = models; }

    public InferenceSettings getInference() { return inference; }
    public void setInference(InferenceSettings inference) { this.inference = inference; }

    public List<String> getRequiredIndices() { return requiredIndices; }
    public void setRequiredIndices(List<String> requiredIndices) { this.requiredIndices = requiredIndices; }

    /**
     * 模型设置
     */
    public static class ModelSettings {
        private String id;
        private String path;
        private String description;
        private String[] tags;
        private String pipelineId;
        private String sourceField = "text";
        private String targetField = "prediction";

        // Getters and Setters
        public String getId() { return id; }
        public void setId(String id) { this.id = id; }

        public String getPath() { return path; }
        public void setPath(String path) { this.path = path; }

        public String getDescription() { return description; }
        public void setDescription(String description) { this.description = description; }

        public String[] getTags() { return tags; }
        public void setTags(String[] tags) { this.tags = tags; }

        public String getPipelineId() { return pipelineId; }
        public void setPipelineId(String pipelineId) { this.pipelineId = pipelineId; }

        public String getSourceField() { return sourceField; }
        public void setSourceField(String sourceField) { this.sourceField = sourceField; }

        public String getTargetField() { return targetField; }
        public void setTargetField(String targetField) { this.targetField = targetField; }
    }

    /**
     * 推理设置
     */
    public static class InferenceSettings {
        private int batchSize = 20;
        private int maxBatchSize = 100;
        private int retryCount = 3;
        private long retryBackoffMs = 1000;
        private int threadsPerModel = 2;

        // Getters and Setters
        public int getBatchSize() { return batchSize; }
        public void setBatchSize(int batchSize) { this.batchSize = batchSize; }

        public int getMaxBatchSize() { return maxBatchSize; }
        public void setMaxBatchSize(int maxBatchSize) { this.maxBatchSize = maxBatchSize; }

        public int getRetryCount() { return retryCount; }
        public void setRetryCount(int retryCount) { this.retryCount = retryCount; }

        public long getRetryBackoffMs() { return retryBackoffMs; }
        public void setRetryBackoffMs(long retryBackoffMs) { this.retryBackoffMs = retryBackoffMs; }

        public int getThreadsPerModel() { return threadsPerModel; }
        public void setThreadsPerModel(int threadsPerModel) { this.threadsPerModel = threadsPerModel; }
    }
}

9.2 主应用类

java 复制代码

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.annotation.Bean;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;

import java.util.concurrent.Executor;

@SpringBootApplication
@EnableAsync
@EnableScheduling
public class ElasticsearchMlApplication {

    public static void main(String[] args) {
        SpringApplication.run(ElasticsearchMlApplication.class, args);
    }

    /**
     * 配置异步任务执行器
     */
    @Bean
    public Executor taskExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(10);
        executor.setMaxPoolSize(20);
        executor.setQueueCapacity(500);
        executor.setThreadNamePrefix("es-ml-");
        executor.initialize();
        return executor;
    }
}

9.3 REST API 控制器

java 复制代码

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;

@RestController
@RequestMapping("/api/ml")
public class MLController {
    private static final Logger logger = LoggerFactory.getLogger(MLController.class);

    private final ModelUploader modelUploader;
    private final InferencePipelineManager pipelineManager;
    private final InferenceService inferenceService;
    private final FaultTolerantInferenceService faultTolerantService;
    private final ModelOptimizationService optimizationService;
    private final ModelVersionManager versionManager;
    private final MLMetricsService metricsService;

    @Autowired
    public MLController(
            ModelUploader modelUploader,
            InferencePipelineManager pipelineManager,
            InferenceService inferenceService,
            FaultTolerantInferenceService faultTolerantService,
            ModelOptimizationService optimizationService,
            ModelVersionManager versionManager,
            MLMetricsService metricsService) {
        this.modelUploader = modelUploader;
        this.pipelineManager = pipelineManager;
        this.inferenceService = inferenceService;
        this.faultTolerantService = faultTolerantService;
        this.optimizationService = optimizationService;
        this.versionManager = versionManager;
        this.metricsService = metricsService;
    }

    /**
     * 上传和注册模型
     */
    @PostMapping("/models")
    public ResponseEntity<?> uploadModel(
            @RequestParam("file") MultipartFile file,
            @RequestParam("modelId") String modelId,
            @RequestParam(value = "description", required = false) String description,
            @RequestParam(value = "tags", required = false, defaultValue = "") String tags,
            @RequestParam(value = "optimize", required = false, defaultValue = "false") boolean optimize) {

        try {
            logger.info("接收模型上传请求: {}", modelId);

            // 保存上传的文件
            Path tempFile = Files.createTempFile("model-", ".onnx");
            file.transferTo(tempFile.toFile());

            // 优化模型（异步）
            CompletableFuture.supplyAsync(() -> {
                try {
                    if (optimize) {
                        Path optimizedPath = Files.createTempFile("optimized-", ".onnx");
                        return optimizationService.optimizeModel(
                                tempFile,
                                optimizedPath,
                                ModelOptimizationService.OptimizationLevel.EXTENDED);
                    }
                    return tempFile;
                } catch (Exception e) {
                    logger.error("模型优化失败，使用原始模型: {}", e.getMessage(), e);
                    return tempFile;
                }
            }).thenAccept(modelPath -> {
                try {
                    // 上传模型到ES
                    String[] tagArray = tags.isEmpty() ? new String[0] : tags.split(",");
                    modelUploader.uploadModel(modelId, modelPath, description, tagArray);

                    // 清理临时文件
                    Files.deleteIfExists(tempFile);
                    if (!modelPath.equals(tempFile)) {
                        Files.deleteIfExists(modelPath);
                    }

                    logger.info("模型 {} 上传完成", modelId);
                } catch (Exception e) {
                    logger.error("处理模型上传失败: {}", e.getMessage(), e);
                }
            });

            return ResponseEntity.accepted().body(Map.of(
                "message", "模型上传请求已接受，处理中",
                "modelId", modelId
            ));
        } catch (Exception e) {
            logger.error("模型上传失败: {}", e.getMessage(), e);
            return ResponseEntity.badRequest().body(Map.of(
                "error", "模型上传失败: " + e.getMessage()
            ));
        }
    }

    /**
     * 创建推理管道
     */
    @PostMapping("/pipelines")
    public ResponseEntity<?> createPipeline(
            @RequestParam("pipelineId") String pipelineId,
            @RequestParam("modelId") String modelId,
            @RequestParam(value = "sourceField", defaultValue = "text") String sourceField,
            @RequestParam(value = "targetField", defaultValue = "prediction") String targetField,
            @RequestParam(value = "description", required = false) String description) {

        try {
            pipelineManager.createInferencePipeline(
                    pipelineId, modelId, sourceField, targetField,
                    description != null ? description : "推理管道 " + pipelineId);

            return ResponseEntity.ok(Map.of(
                "message", "推理管道创建成功",
                "pipelineId", pipelineId
            ));
        } catch (Exception e) {
            logger.error("创建推理管道失败: {}", e.getMessage(), e);
            return ResponseEntity.badRequest().body(Map.of(
                "error", "创建推理管道失败: " + e.getMessage()
            ));
        }
    }

    /**
     * 单文本推理
     */
    @PostMapping("/infer")
    public ResponseEntity<?> infer(
            @RequestParam("text") String text,
            @RequestParam("modelId") String modelId,
            @RequestParam("pipelineId") String pipelineId,
            @RequestParam(value = "indexName", defaultValue = "inference-results") String indexName,
            @RequestParam(value = "faultTolerant", defaultValue = "false") boolean faultTolerant) {

        try {
            // 记录指标
            long metricRequestId = metricsService.recordInferenceStart(modelId);

            // 执行推理
            Map<String, Object> result;
            if (faultTolerant) {
                result = faultTolerantService.faultTolerantInfer(
                        indexName, pipelineId, modelId, text);
            } else {
                result = inferenceService.inferSingle(
                        indexName, pipelineId, modelId, text);
            }

            // 记录完成
            metricsService.recordInferenceEnd(modelId, metricRequestId, true, 1);

            return ResponseEntity.ok(result);
        } catch (Exception e) {
            logger.error("推理失败: {}", e.getMessage(), e);

            // 记录失败
            try {
                long metricRequestId = metricsService.recordInferenceStart(modelId);
                metricsService.recordInferenceEnd(modelId, metricRequestId, false, 0);
            } catch (Exception me) {
                logger.error("记录失败指标时出错: {}", me.getMessage(), me);
            }

            return ResponseEntity.badRequest().body(Map.of(
                "error", "推理失败: " + e.getMessage(),
                "text", text
            ));
        }
    }

    /**
     * 批量文本推理
     */
    @PostMapping("/infer/batch")
    public ResponseEntity<?> inferBatch(
            @RequestBody List<String> texts,
            @RequestParam("modelId") String modelId,
            @RequestParam("pipelineId") String pipelineId,
            @RequestParam(value = "indexName", defaultValue = "inference-results") String indexName) {

        try {
            if (texts == null || texts.isEmpty()) {
                return ResponseEntity.badRequest().body(Map.of(
                    "error", "文本列表不能为空"
                ));
            }

            logger.info("接收批量推理请求: {} 条文本", texts.size());

            // 记录指标
            long metricRequestId = metricsService.recordInferenceStart(modelId);

            List<Map<String, Object>> results = inferenceService.inferBatch(
                    indexName, pipelineId, modelId, texts);

            // 记录完成
            metricsService.recordInferenceEnd(modelId, metricRequestId, true, texts.size());

            return ResponseEntity.ok(results);
        } catch (Exception e) {
            logger.error("批量推理失败: {}", e.getMessage(), e);

            // 记录失败
            try {
                long metricRequestId = metricsService.recordInferenceStart(modelId);
                metricsService.recordInferenceEnd(modelId, metricRequestId, false, 0);
            } catch (Exception me) {
                logger.error("记录失败指标时出错: {}", me.getMessage(), me);
            }

            return ResponseEntity.badRequest().body(Map.of(
                "error", "批量推理失败: " + e.getMessage(),
                "textsCount", texts != null ? texts.size() : 0
            ));
        }
    }

    /**
     * 部署新版本模型
     */
    @PostMapping("/models/versions")
    public ResponseEntity<?> deployNewVersion(
            @RequestParam("file") MultipartFile file,
            @RequestParam("baseModelId") String baseModelId,
            @RequestParam(value = "description", required = false) String description,
            @RequestParam(value = "pipelineId", required = false) String pipelineId,
            @RequestParam(value = "optimize", defaultValue = "true") boolean optimize) {

        try {
            logger.info("接收新版本模型部署请求: {}", baseModelId);

            // 保存上传的文件
            Path tempFile = Files.createTempFile("model-", ".onnx");
            file.transferTo(tempFile.toFile());

            // 异步部署
            CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> {
                try {
                    return versionManager.deployNewVersion(
                            baseModelId, tempFile, description, pipelineId, optimize);
                } finally {
                    try {
                        Files.deleteIfExists(tempFile);
                    } catch (Exception e) {
                        logger.error("清理临时文件失败: {}", e.getMessage(), e);
                    }
                }
            });

            // 等待部署完成，但设置超时
            String versionId = future.get();

            return ResponseEntity.ok(Map.of(
                "message", "新版本模型部署成功",
                "versionId", versionId
            ));
        } catch (Exception e) {
            logger.error("部署新版本模型失败: {}", e.getMessage(), e);
            return ResponseEntity.badRequest().body(Map.of(
                "error", "部署新版本模型失败: " + e.getMessage()
            ));
        }
    }

    /**
     * 蓝绿部署新版本
     */
    @PostMapping("/models/bluegreen")
    public ResponseEntity<?> blueGreenDeploy(
            @RequestParam("file") MultipartFile file,
            @RequestParam("baseModelId") String baseModelId,
            @RequestParam("sourcePipelineId") String sourcePipelineId,
            @RequestParam(value = "description", required = false) String description) {

        try {
            logger.info("接收蓝绿部署请求: {}", baseModelId);

            // 保存上传的文件
            Path tempFile = Files.createTempFile("model-", ".onnx");
            file.transferTo(tempFile.toFile());

            // 异步部署
            CompletableFuture<String> future = CompletableFuture.supplyAsync(() -> {
                try {
                    return versionManager.blueGreenDeploy(
                            baseModelId, tempFile, description, sourcePipelineId);
                } finally {
                    try {
                        Files.deleteIfExists(tempFile);
                    } catch (Exception e) {
                        logger.error("清理临时文件失败: {}", e.getMessage(), e);
                    }
                }
            });

            // 等待部署完成
            String newPipelineId = future.get();

            return ResponseEntity.ok(Map.of(
                "message", "蓝绿部署成功",
                "newPipelineId", newPipelineId
            ));
        } catch (Exception e) {
            logger.error("蓝绿部署失败: {}", e.getMessage(), e);
            return ResponseEntity.badRequest().body(Map.of(
                "error", "蓝绿部署失败: " + e.getMessage()
            ));
        }
    }

    /**
     * 获取性能指标
     */
    @GetMapping("/metrics")
    public ResponseEntity<Map<String, Map<String, Object>>> getMetrics() {
        return ResponseEntity.ok(metricsService.getAllModelMetrics());
    }

    /**
     * 重置熔断器
     */
    @PostMapping("/circuit-breakers/reset")
    public ResponseEntity<?> resetCircuitBreakers() {
        faultTolerantService.resetAllCircuitBreakers();
        return ResponseEntity.ok(Map.of(
            "message", "所有熔断器已重置"
        ));
    }
}

10. 实际应用案例：电商评论实时分析系统

10.1 评论分析服务

java 复制代码

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.CompletableFuture;

@Service
public class ReviewAnalysisService {
    private static final Logger logger = LoggerFactory.getLogger(ReviewAnalysisService.class);

    private final InferenceService inferenceService;
    private final MLMetricsService metricsService;
    private final NotificationService notificationService;

    private static final String INDEX_NAME = "product_reviews";
    private static final String PIPELINE_ID = "sentiment-analysis-pipeline";
    private static final String MODEL_ID = "sentiment-analysis-model";

    // 负面评论阈值
    private static final double NEGATIVE_THRESHOLD = 0.3;
    private static final double PRODUCT_ALERT_THRESHOLD = 0.4;

    @Autowired
    public ReviewAnalysisService(
            InferenceService inferenceService,
            MLMetricsService metricsService,
            NotificationService notificationService) {
        this.inferenceService = inferenceService;
        this.metricsService = metricsService;
        this.notificationService = notificationService;
    }

    /**
     * 处理新提交的产品评论
     */
    public CompletableFuture<Map<String, Object>> processReview(
            String reviewId, String userId, String productId, String reviewText) {

        // 参数验证
        Objects.requireNonNull(reviewId, "评论ID不能为空");
        Objects.requireNonNull(userId, "用户ID不能为空");
        Objects.requireNonNull(productId, "产品ID不能为空");
        Objects.requireNonNull(reviewText, "评论内容不能为空");

        // 记录指标
        long metricRequestId = metricsService.recordInferenceStart(MODEL_ID);

        try {
            // 创建异步任务
            return CompletableFuture.supplyAsync(() -> {
                try {
                    // 使用推理服务处理文本
                    Map<String, Object> result = inferenceService.inferSingle(
                            INDEX_NAME, PIPELINE_ID, MODEL_ID, reviewText);

                    // 添加原始评论字段
                    result.put("review_id", reviewId);
                    result.put("user_id", userId);
                    result.put("product_id", productId);

                    // 异步检查负面评论
                    checkNegativeReview(result);

                    // 记录成功
                    metricsService.recordInferenceEnd(MODEL_ID, metricRequestId, true, 1);

                    return result;
                } catch (Exception e) {
                    logger.error("处理评论失败: {}", e.getMessage(), e);

                    // 记录失败
                    metricsService.recordInferenceEnd(MODEL_ID, metricRequestId, false, 0);

                    Map<String, Object> errorResult = new HashMap<>();
                    errorResult.put("review_id", reviewId);
                    errorResult.put("user_id", userId);
                    errorResult.put("product_id", productId);
                    errorResult.put("text", reviewText);
                    errorResult.put("error", "处理失败: " + e.getMessage());
                    return errorResult;
                }
            });
        } catch (Exception e) {
            logger.error("创建评论处理任务失败: {}", e.getMessage(), e);

            // 记录失败
            metricsService.recordInferenceEnd(MODEL_ID, metricRequestId, false, 0);

            CompletableFuture<Map<String, Object>> future = new CompletableFuture<>();
            Map<String, Object> errorResult = new HashMap<>();
            errorResult.put("review_id", reviewId);
            errorResult.put("user_id", userId);
            errorResult.put("product_id", productId);
            errorResult.put("text", reviewText);
            errorResult.put("error", "任务创建失败: " + e.getMessage());
            future.complete(errorResult);
            return future;
        }
    }

    /**
     * 检查负面评论并触发告警
     */
    @Async
    public void checkNegativeReview(Map<String, Object> review) {
        try {
            if (review == null || !review.containsKey("prediction")) {
                return;
            }

            Map<String, Object> prediction = (Map<String, Object>) review.get("prediction");

            if (prediction == null || !prediction.containsKey("score")) {
                return;
            }

            double score = Double.parseDouble(prediction.get("score").toString());
            String productId = review.get("product_id").toString();
            String reviewText = review.get("text").toString();
            String reviewId = review.get("review_id").toString();

            // 负面评论处理
            if (score < NEGATIVE_THRESHOLD) {
                String severity = score < 0.2 ? "严重" : "中度";
                String alertMessage = String.format(
                        "%s负面评论 (分数: %.2f): %s", severity, score, reviewText);

                logger.warn("产品 {} 收到{}负面评论, 分数: {}, ID: {}",
                        productId, severity, String.format("%.2f", score), reviewId);

                // 发送告警
                notificationService.sendNegativeReviewAlert(
                    productId, reviewText, score, alertMessage);
            }
        } catch (Exception e) {
            logger.error("处理负面评论检查失败: {}", e.getMessage(), e);
        }
    }

    /**
     * 批量处理评论
     */
    public List<Map<String, Object>> processBatchReviews(List<Map<String, Object>> reviews) {
        if (reviews == null || reviews.isEmpty()) {
            return Collections.emptyList();
        }

        // 提取文本
        List<String> texts = reviews.stream()
                .map(r -> r.get("text").toString())
                .toList();

        // 记录指标
        long metricRequestId = metricsService.recordInferenceStart(MODEL_ID);

        try {
            // 批量推理
            List<Map<String, Object>> results = inferenceService.inferBatch(
                    INDEX_NAME, PIPELINE_ID, MODEL_ID, texts);

            // 合并原始数据与推理结果
            List<Map<String, Object>> finalResults = new ArrayList<>(reviews.size());
            for (int i = 0; i < reviews.size(); i++) {
                Map<String, Object> review = reviews.get(i);
                Map<String, Object> result = i < results.size() ? results.get(i) : new HashMap<>();

                // 添加原始数据
                result.put("review_id", review.get("review_id"));
                result.put("user_id", review.get("user_id"));
                result.put("product_id", review.get("product_id"));

                finalResults.add(result);

                // 异步检查负面评论
                checkNegativeReview(result);
            }

            // 记录成功
            metricsService.recordInferenceEnd(MODEL_ID, metricRequestId, true, texts.size());

            return finalResults;
        } catch (Exception e) {
            logger.error("批量处理评论失败: {}", e.getMessage(), e);

            // 记录失败
            metricsService.recordInferenceEnd(MODEL_ID, metricRequestId, false, 0);

            return reviews.stream()
                    .map(review -> {
                        Map<String, Object> errorResult = new HashMap<>(review);
                        errorResult.put("error", "批量处理失败: " + e.getMessage());
                        return errorResult;
                    })
                    .toList();
        }
    }

    /**
     * 定期分析产品评论趋势
     */
    @Scheduled(cron = "0 0 */3 * * *") // 每3小时执行一次
    public void analyzeProductTrends() {
        logger.info("开始分析产品评论趋势");

        try (var client = ESClientUtil.createClient()) {
            // 获取最近24小时的数据
            Instant now = Instant.now();
            Instant yesterday = now.minus(24, ChronoUnit.HOURS);
            long yesterdayMs = yesterday.toEpochMilli();

            // 查询各产品的评论情感统计
            var response = client.search(s -> s
                .index(INDEX_NAME)
                .query(q -> q
                    .bool(b -> b
                        .must(m -> m
                            .range(r -> r
                                .field("timestamp")
                                .gte(String.valueOf(yesterdayMs))
                            )
                        )
                    )
                )
                .aggregations("by_product", a -> a
                    .terms(t -> t.field("product_id.keyword"))
                    .aggregations("sentiment_stats", a2 -> a2
                        .stats(s2 -> s2.field("prediction.score"))
                    )
                )
                .size(0)
            );

            // 解析聚合结果
            processProductAggregations(response);
        } catch (Exception e) {
            logger.error("分析产品评论趋势失败: {}", e.getMessage(), e);
        }
    }

    /**
     * 处理产品聚合结果
     */
    private void processProductAggregations(co.elastic.clients.elasticsearch.core.SearchResponse<?> response) {
        try {
            var aggregations = response.aggregations();
            if (aggregations == null) {
                logger.warn("未找到聚合结果");
                return;
            }

            var byProduct = aggregations.get("by_product");
            if (byProduct == null) {
                logger.warn("未找到产品聚合");
                return;
            }

            var buckets = byProduct.sterms().buckets().array();

            for (var bucket : buckets) {
                String productId = bucket.key().stringValue();
                var stats = bucket.aggregations().get("sentiment_stats").stats();

                double avgScore = stats.avg();
                long count = (long) stats.count();

                logger.info("产品 {} 评论统计: 数量={}, 平均分={}, 最低分={}",
                        productId, count, String.format("%.2f", avgScore), String.format("%.2f", stats.min()));

                // 如果平均情感分数低于阈值且评论数量足够，触发产品质量告警
                if (avgScore < PRODUCT_ALERT_THRESHOLD && count >= 5) {
                    handleProductQualityAlert(productId, avgScore, count);
                }
            }
        } catch (Exception e) {
            logger.error("处理聚合结果失败: {}", e.getMessage(), e);
        }
    }

    /**
     * 处理产品质量告警
     */
    private void handleProductQualityAlert(String productId, double avgScore, long count) {
        try {
            logger.warn("产品 {} 评论情感得分偏低: {} (共{}条评论)",
                    productId, String.format("%.2f", avgScore), count);

            // 查找最近的负面评论示例
            List<String> negativeReviews = findNegativeReviews(productId, NEGATIVE_THRESHOLD, 5);

            // 发送产品质量告警
            String alertMessage = String.format(
                    "产品 %s 近期评论情感均分较低: %.2f (共%d条评论)",
                    productId, avgScore, count);

            notificationService.sendProductQualityAlert(
                productId, avgScore, negativeReviews, alertMessage);
        } catch (Exception e) {
            logger.error("处理产品质量告警失败: {}", e.getMessage(), e);
        }
    }

    /**
     * 查找产品的负面评论
     */
    private List<String> findNegativeReviews(String productId, double maxScore, int limit) {
        try (var client = ESClientUtil.createClient()) {
            var response = client.search(s -> s
                .index(INDEX_NAME)
                .query(q -> q
                    .bool(b -> b
                        .must(m -> m
                            .term(t -> t
                                .field("product_id.keyword")
                                .value(productId)
                            )
                        )
                        .must(m -> m
                            .range(r -> r
                                .field("prediction.score")
                                .lte(String.valueOf(maxScore))
                            )
                        )
                    )
                )
                .sort(s1 -> s1
                    .field(f -> f
                        .field("prediction.score")
                        .order(co.elastic.clients.elasticsearch._types.SortOrder.Asc)
                    )
                )
                .size(limit)
            );

            return response.hits().hits().stream()
                .map(hit -> {
                    Map<String, Object> source = (Map<String, Object>) hit.source();
                    return source.get("text").toString();
                })
                .toList();
        } catch (Exception e) {
            logger.error("查找负面评论失败: {}", e.getMessage(), e);
            return Collections.emptyList();
        }
    }
}

10.2 告警服务

java 复制代码

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.mail.SimpleMailMessage;
import org.springframework.mail.javamail.JavaMailSender;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;

import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

@Service
public class NotificationService {
    private static final Logger logger = LoggerFactory.getLogger(NotificationService.class);

    private final JavaMailSender mailSender;
    private final ScheduledExecutorService scheduler;

    // 告警配置
    private final Map<String, List<String>> productAlertRecipients = new ConcurrentHashMap<>();
    private final Map<String, Long> lastAlertTime = new ConcurrentHashMap<>();
    private static final long ALERT_COOLDOWN_MS = 1800000; // 30分钟

    // 告警频率限制
    private final Map<String, AtomicInteger> alertCounts = new ConcurrentHashMap<>();
    private static final int MAX_ALERTS_PER_HOUR = 5;

    @Autowired
    public NotificationService(JavaMailSender mailSender) {
        this.mailSender = mailSender;
        this.scheduler = Executors.newScheduledThreadPool(1);

        // 启动定期清理任务
        scheduler.scheduleAtFixedRate(
                this::cleanupAlertCounts, 1, 1, TimeUnit.HOURS);
    }

    /**
     * 发送负面评论告警
     */
    public void sendNegativeReviewAlert(
            String productId, String reviewText, double score, String message) {

        Objects.requireNonNull(productId, "产品ID不能为空");
        Objects.requireNonNull(reviewText, "评论文本不能为空");
        Objects.requireNonNull(message, "告警消息不能为空");

        String alertKey = "negative_" + productId;

        // 检查告警冷却时间
        if (!canSendAlert(alertKey)) {
            logger.info("负面评论告警 {} 在冷却期内，跳过", alertKey);
            return;
        }

        // 构建邮件内容
        String subject = String.format("负面评论告警: 产品 %s", productId);
        StringBuilder body = new StringBuilder();
        body.append("产品ID: ").append(productId).append("\n");
        body.append("情感分数: ").append(String.format("%.2f", score)).append("\n");
        body.append("评论内容: ").append(reviewText).append("\n\n");
        body.append("请尽快处理此负面反馈。");

        // 发送邮件
        sendEmail(getRecipientsForProduct(productId), subject, body.toString());

        // 记录告警时间
        recordAlert(alertKey);

        logger.info("已发送负面评论告警: {}", alertKey);
    }

    /**
     * 发送产品质量告警
     */
    public void sendProductQualityAlert(
            String productId, double avgScore, List<String> negativeReviews, String message) {

        Objects.requireNonNull(productId, "产品ID不能为空");
        Objects.requireNonNull(message, "告警消息不能为空");

        String alertKey = "quality_" + productId;

        // 检查告警冷却时间
        if (!canSendAlert(alertKey)) {
            logger.info("产品质量告警 {} 在冷却期内，跳过", alertKey);
            return;
        }

        // 构建邮件内容
        String subject = String.format("产品质量告警: 产品 %s", productId);
        StringBuilder body = new StringBuilder();
        body.append("产品ID: ").append(productId).append("\n");
        body.append("平均情感分数: ").append(String.format("%.2f", avgScore)).append("\n\n");
        body.append("近期负面评论示例:\n");

        if (negativeReviews != null && !negativeReviews.isEmpty()) {
            for (int i = 0; i < negativeReviews.size(); i++) {
                body.append(i + 1).append(". ").append(negativeReviews.get(i)).append("\n");
            }
        } else {
            body.append("(无具体负面评论示例)\n");
        }

        body.append("\n请关注产品质量并及时处理客户反馈。");

        // 发送邮件
        sendEmail(getRecipientsForProduct(productId), subject, body.toString());

        // 记录告警时间
        recordAlert(alertKey);

        logger.info("已发送产品质量告警: {}", alertKey);
    }

    /**
     * 检查是否可以发送告警
     */
    private boolean canSendAlert(String alertKey) {
        // 检查冷却时间
        Long lastTime = lastAlertTime.get(alertKey);
        if (lastTime != null) {
            long now = System.currentTimeMillis();
            if (now - lastTime < ALERT_COOLDOWN_MS) {
                return false;
            }
        }

        // 检查频率限制
        AtomicInteger count = alertCounts.computeIfAbsent(alertKey, k -> new AtomicInteger(0));
        if (count.get() >= MAX_ALERTS_PER_HOUR) {
            return false;
        }

        return true;
    }

    /**
     * 记录告警发送
     */
    private void recordAlert(String alertKey) {
        lastAlertTime.put(alertKey, System.currentTimeMillis());

        // 增加计数
        AtomicInteger count = alertCounts.computeIfAbsent(alertKey, k -> new AtomicInteger(0));
        count.incrementAndGet();
    }

    /**
     * 清理告警计数
     */
    @Scheduled(fixedRate = 3600000) // 每小时执行一次
    public void cleanupAlertCounts() {
        logger.debug("清理告警计数...");
        alertCounts.clear();
    }

    /**
     * 获取产品的告警接收人
     */
    private List<String> getRecipientsForProduct(String productId) {
        // 先尝试获取产品特定的接收人
        List<String> recipients = productAlertRecipients.get(productId);
        if (recipients != null && !recipients.isEmpty()) {
            return recipients;
        }

        // 默认接收人
        return List.of("[email protected]", "[email protected]");
    }

    /**
     * 配置产品告警接收人
     */
    public void configureProductRecipients(String productId, List<String> recipients) {
        Objects.requireNonNull(productId, "产品ID不能为空");
        Objects.requireNonNull(recipients, "接收人列表不能为空");

        productAlertRecipients.put(productId, recipients);
    }

    /**
     * 发送邮件
     */
    private void sendEmail(List<String> recipients, String subject, String body) {
        try {
            SimpleMailMessage message = new SimpleMailMessage();
            message.setTo(recipients.toArray(new String[0]));
            message.setSubject(subject);
            message.setText(body);

            mailSender.send(message);
        } catch (Exception e) {
            logger.error("发送邮件失败: {}", e.getMessage(), e);
        }
    }
}

11. 单元测试示例

java 复制代码

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;

@ExtendWith(MockitoExtension.class)
class InferenceServiceTest {

    @Mock
    private ModelUploader modelUploader;

    @Mock
    private InferencePipelineManager pipelineManager;

    private ESInferenceService inferenceService;

    @BeforeEach
    void setUp() {
        inferenceService = new ESInferenceService(modelUploader, pipelineManager);
    }

    @Test
    void testChainedInference() {
        // 准备测试数据
        List<ChainedInference.ModelConfig> modelChain = Arrays.asList(
            new ChainedInference.ModelConfig(
                "model1", "pipeline1", "index1", "text", "output1"),
            new ChainedInference.ModelConfig(
                "model2", "pipeline2", "index2", "output1", "output2")
        );

        String text = "测试文本";

        // 模拟方法行为
        ESInferenceService spyService = spy(inferenceService);

        Map<String, Object> stage1Result = new HashMap<>();
        stage1Result.put("text", text);
        stage1Result.put("output1", "中间结果");
        stage1Result.put("timestamp", System.currentTimeMillis());

        Map<String, Object> stage2Result = new HashMap<>();
        stage2Result.put("text", text);
        stage2Result.put("output1", "中间结果");
        stage2Result.put("output2", "最终结果");
        stage2Result.put("timestamp", System.currentTimeMillis());

        try {
            doReturn(stage1Result).when(spyService)
                .runPipelineStage(eq("index1"), eq("pipeline1"), any(), eq("stage_1"));

            doReturn(stage2Result).when(spyService)
                .runPipelineStage(eq("index2"), eq("pipeline2"), any(), eq("stage_2"));

            // 执行测试
            Map<String, Object> result = spyService.chainedInference(modelChain, text);

            // 验证结果
            assertNotNull(result);
            assertEquals("最终结果", result.get("output2"));

            // 验证调用
            verify(pipelineManager).ensurePipelineExists(
                eq("pipeline1"), eq("model1"), eq("text"), eq("output1"), anyString());

            verify(pipelineManager).ensurePipelineExists(
                eq("pipeline2"), eq("model2"), eq("output1"), eq("output2"), anyString());
        } catch (Exception e) {
            fail("测试抛出异常: " + e.getMessage());
        }
    }

    @Test
    void testProcessBatchesInChunks() throws Exception {
        // 准备测试数据
        List<String> texts = new ArrayList<>();
        for (int i = 0; i < 150; i++) {
            texts.add("测试文本 " + i);
        }

        // 模拟方法行为
        ESInferenceService spyService = spy(inferenceService);

        List<Map<String, Object>> batch1Results = new ArrayList<>();
        for (int i = 0; i < 20; i++) {
            Map<String, Object> result = new HashMap<>();
            result.put("text", "测试文本 " + i);
            result.put("prediction", Map.of("score", 0.8));
            batch1Results.add(result);
        }

        List<Map<String, Object>> batch2Results = new ArrayList<>();
        for (int i = 20; i < 40; i++) {
            Map<String, Object> result = new HashMap<>();
            result.put("text", "测试文本 " + i);
            result.put("prediction", Map.of("score", 0.7));
            batch2Results.add(result);
        }

        doReturn(batch1Results)
            .doReturn(batch2Results)
            .doThrow(new RuntimeException("模拟批次处理错误"))
            .when(spyService)
            .inferBatchInternal(anyString(), anyString(), anyList());

        // 执行测试
        List<Map<String, Object>> results = spyService.inferBatch(
            "test-index", "test-pipeline", "test-model", texts);

        // 验证结果
        assertNotNull(results);
        assertEquals(150, results.size());

        // 前两个批次应该有正常结果
        assertEquals(0.8, ((Map<String, Object>)results.get(0).get("prediction")).get("score"));
        assertEquals(0.7, ((Map<String, Object>)results.get(20).get("prediction")).get("score"));

        // 第三个批次应该有错误信息
        assertTrue(results.get(40).containsKey("error"));

        // 验证调用
        verify(pipelineManager).ensurePipelineExists(
            eq("test-pipeline"), eq("test-model"), eq("text"), eq("prediction"), anyString());
    }

    @Test
    void testInferSingleWithInvalidParams() {
        // 测试参数验证
        assertThrows(NullPointerException.class, () ->
            inferenceService.inferSingle(null, "pipeline", "model", "text"));

        assertThrows(NullPointerException.class, () ->
            inferenceService.inferSingle("index", null, "model", "text"));

        assertThrows(NullPointerException.class, () ->
            inferenceService.inferSingle("index", "pipeline", null, "text"));

        assertThrows(NullPointerException.class, () ->
            inferenceService.inferSingle("index", "pipeline", "model", null));
    }
}

@ExtendWith(MockitoExtension.class)
class CircuitBreakerTest {

    private CircuitBreaker breaker;

    @BeforeEach
    void setUp() {
        breaker = new CircuitBreaker("test-breaker", 3, 100); // 3次失败阈值，100ms重置时间
    }

    @Test
    void testInitialState() {
        assertFalse(breaker.isOpen());
        assertEquals(CircuitBreaker.CircuitState.CLOSED, breaker.getState());
    }

    @Test
    void testOpenAfterFailures() {
        // 记录失败直到打开熔断器
        for (int i = 0; i < 3; i++) {
            assertFalse(breaker.isOpen());
            breaker.recordFailure();
        }

        // 达到阈值后熔断器应该打开
        assertTrue(breaker.isOpen());
        assertEquals(CircuitBreaker.CircuitState.OPEN, breaker.getState());
    }

    @Test
    void testHalfOpenAfterTimeout() throws InterruptedException {
        // 打开熔断器
        for (int i = 0; i < 3; i++) {
            breaker.recordFailure();
        }
        assertTrue(breaker.isOpen());

        // 等待超过重置时间
        Thread.sleep(150);

        // 检查状态，应该变为半开
        assertFalse(breaker.isOpen()); // isOpen()会触发状态转换
        assertEquals(CircuitBreaker.CircuitState.HALF_OPEN, breaker.getState());
    }

    @Test
    void testSuccessInHalfOpenState() throws InterruptedException {
        // 打开熔断器
        for (int i = 0; i < 3; i++) {
            breaker.recordFailure();
        }

        // 等待超过重置时间
        Thread.sleep(150);

        // 检查状态，触发半开状态
        assertFalse(breaker.isOpen());

        // 记录成功，应该完全关闭
        breaker.recordSuccess();
        assertFalse(breaker.isOpen());
        assertEquals(CircuitBreaker.CircuitState.CLOSED, breaker.getState());
    }

    @Test
    void testFailureInHalfOpenState() throws InterruptedException {
        // 打开熔断器
        for (int i = 0; i < 3; i++) {
            breaker.recordFailure();
        }

        // 等待超过重置时间
        Thread.sleep(150);

        // 检查状态，触发半开状态
        assertFalse(breaker.isOpen());

        // 记录失败，应该再次打开
        breaker.recordFailure();
        assertTrue(breaker.isOpen());
        assertEquals(CircuitBreaker.CircuitState.OPEN, breaker.getState());
    }

    @Test
    void testManualReset() {
        // 打开熔断器
        for (int i = 0; i < 3; i++) {
            breaker.recordFailure();
        }
        assertTrue(breaker.isOpen());

        // 手动重置
        breaker.reset();

        // 验证状态
        assertFalse(breaker.isOpen());
        assertEquals(CircuitBreaker.CircuitState.CLOSED, breaker.getState());
    }
}

12. 总结

核心技术点	实现方法	注意事项
模型导入	ONNX 格式转换 + 分块上传	验证模型操作集兼容性，使用流式 API 处理大文件
推理管道	处理器定义 + 管道缓存	正确映射字段，并配置适当的内存限制
批量推理	分批处理 + 指标收集	根据模型复杂度调整批次大小，避免 OOM
故障处理	熔断器 + 降级策略	设计合理的重试与恢复机制，保障系统弹性
性能优化	模型量化 + 池化管理	定期监控资源使用率，根据指标调整配置
版本管理	别名策略 + 蓝绿部署	先验证新版本，再平滑切换流量
监控告警	实时指标 + 阈值监控	设置合理的告警规则，避免告警风暴
应用集成	REST API + 异步处理	考虑超时和错误处理，提供优雅降级