一、AI应用异常处理的特殊挑战
在Spring AI应用中,异常处理面临独特的复杂性,远超过传统Web应用:
多层异常来源:
-
模型服务层:API限额、网络超时、认证失败
-
数据处理层:输入格式错误、Token超限、内容过滤
-
业务逻辑层:上下文过长、响应格式异常、逻辑冲突
-
基础设施层:数据库连接、缓存失效、消息队列堆积
国内模型特有异常模式:
// 各厂商异常特征对比
public enum ProviderExceptionPattern {
ALIYUN("频率限制、图片审核失败", "QPS超限、内容违规"),
BYTE_DANCE("会话超时、参数校验", "session_timeout、invalid_param"),
DEEPSEEK("服务忙、认证失败", "service_busy、auth_failed"),
ZHIPU("余额不足、模型不可用", "insufficient_balance、model_not_available")
}
二、Spring AI异常体系深度解析
2.1 核心异常类层次结构
// Spring AI异常基类体系
@RestControllerAdvice
public class AiExceptionHandler {
// AI特定异常基类
public abstract class AiException extends RuntimeException {
private final String errorCode;
private final String provider;
private final Instant timestamp;
public AiException(String message, String errorCode, String provider) {
super(message);
this.errorCode = errorCode;
this.provider = provider;
this.timestamp = Instant.now();
}
}
// 模型服务异常
public class ModelServiceException extends AiException {
private final int statusCode;
private final Map<String, Object> details;
public ModelServiceException(String provider, int statusCode,
String message, Map<String, Object> details) {
super(message, "MODEL_SERVICE_ERROR", provider);
this.statusCode = statusCode;
this.details = details != null ? details : Map.of();
}
}
// 内容过滤异常
public class ContentFilterException extends AiException {
private final String filterType; // 敏感词、违法内容、偏见等
private final String filteredContent;
public ContentFilterException(String provider, String filterType,
String filteredContent) {
super("内容审核未通过: " + filterType, "CONTENT_FILTERED", provider);
this.filterType = filterType;
this.filteredContent = filteredContent;
}
}
// Token限制异常
public class TokenLimitException extends AiException {
private final int promptTokens;
private final int maxTokens;
private final String limitType; // 输入、输出、总长度
public TokenLimitException(String provider, int promptTokens,
int maxTokens, String limitType) {
super(String.format("Token限制超出: 输入%d tokens, 最大%d tokens",
promptTokens, maxTokens), "TOKEN_LIMIT_EXCEEDED", provider);
this.promptTokens = promptTokens;
this.maxTokens = maxTokens;
this.limitType = limitType;
}
}
}
2.2 异常自动识别与分类
@Component
public class ExceptionClassifier {
private final Map<String, Pattern> exceptionPatterns = Map.of(
"RATE_LIMIT", Pattern.compile("rate.limit|quota.exceeded|请求过于频繁", Pattern.CASE_INSENSITIVE),
"AUTH_FAILED", Pattern.compile("invalid.api.key|unauthorized|认证失败", Pattern.CASE_INSENSITIVE),
"SERVICE_UNAVAILABLE", Pattern.compile("service.unavailable|internal.error|服务不可用", Pattern.CASE_INSENSITIVE),
"CONTENT_FILTER", Pattern.compile("content.violation|sensitive.content|内容违规", Pattern.CASE_INSENSITIVE)
);
public AiException classifyException(String provider, Exception rawException) {
String message = rawException.getMessage().toLowerCase();
for (Map.Entry<String, Pattern> entry : exceptionPatterns.entrySet()) {
if (entry.getValue().matcher(message).find()) {
return createTypedException(entry.getKey(), provider, rawException);
}
}
return new ModelServiceException(provider, 500, "未知错误",
Map.of("original_message", rawException.getMessage()));
}
private AiException createTypedException(String type, String provider, Exception cause) {
switch (type) {
case "RATE_LIMIT":
return new RateLimitException(provider, extractRetryAfter(cause));
case "AUTH_FAILED":
return new AuthenticationException(provider, "API密钥无效或过期");
case "CONTENT_FILTER":
return new ContentFilterException(provider, "auto_detected", extractFilteredContent(cause));
default:
return new ModelServiceException(provider, 500, cause.getMessage(), null);
}
}
}
三、多层容错策略实战
3.1 重试机制与退避策略
@Configuration
@EnableRetry
public class RetryConfig {
@Bean
public RetryTemplate aiServiceRetryTemplate() {
return RetryTemplate.builder()
.maxAttempts(3)
.exponentialBackoff(1000, 2, 5000) // 初始1s,倍数2,最大5s
.retryOn(Exception.class)
.notRetryOn(AuthenticationException.class) // 认证失败不重试
.withListener(new AiRetryListener())
.build();
}
// 智能重试监听器
@Component
public class AiRetryListener implements RetryListener {
@Override
public <T, E extends Throwable> boolean open(RetryContext context,
RetryCallback<T, E> callback) {
log.info("开始重试操作,重试上下文: {}", context);
return true;
}
@Override
public <T, E extends Throwable> void onError(RetryContext context,
RetryCallback<T, E> callback,
Throwable throwable) {
AiException aiException = exceptionClassifier.classifyException(
getProviderFromContext(context), (Exception) throwable);
// 根据异常类型调整重试策略
if (aiException instanceof RateLimitException) {
adjustBackoffForRateLimit(context, (RateLimitException) aiException);
}
log.warn("AI服务调用失败,正在进行第{}次重试", context.getRetryCount(), throwable);
}
}
}
// 注解式重试
@Service
public class ResilientAiService {
@Retryable(value = {ModelServiceException.class, TimeoutException.class},
maxAttempts = 3,
backoff = @Backoff(delay = 1000, multiplier = 2))
@Recover
public String fallbackAiCall(String prompt, Exception e) {
log.warn("重试失败,启用降级策略", e);
return getCachedResponse(prompt).orElse("服务暂时不可用,请稍后重试");
}
public String callWithRetry(String prompt) {
return aiServiceRetryTemplate.execute(context -> {
// 根据重试次数选择备用模型
String model = selectModelBasedOnRetryCount(context.getRetryCount());
return chatClient.call(prompt, model);
});
}
}
3.2 熔断器与降级策略
@Component
public class CircuitBreakerManager {
private final Map<String, CircuitBreaker> breakers = new ConcurrentHashMap<>();
@PostConstruct
public void initCircuitBreakers() {
// 为每个模型服务创建独立的熔断器
List<String> providers = Arrays.asList("aliyun", "byte-dance", "deepseek", "zhipu");
providers.forEach(provider -> {
CircuitBreakerConfig config = CircuitBreakerConfig.custom()
.failureRateThreshold(50) // 失败率阈值50%
.slidingWindowSize(10) // 最近10次调用
.minimumNumberOfCalls(5) // 最少5次调用才开始计算
.waitDurationInOpenState(Duration.ofSeconds(30))
.build();
breakers.put(provider, CircuitBreaker.of(provider + "-cb", config));
});
}
public String executeWithCircuitBreaker(String provider, Supplier<String> operation) {
CircuitBreaker breaker = breakers.get(provider);
return breaker.executeSupplier(() -> {
try {
return operation.get();
} catch (Exception e) {
// 记录失败指标
metrics.recordFailure(provider, e);
throw e;
}
});
}
// 熔断器状态监控
@Scheduled(fixedRate = 10000)
public void monitorCircuitBreakers() {
breakers.forEach((provider, breaker) -> {
CircuitBreaker.State state = breaker.getState();
Metrics.gauge("circuit_breaker_state",
Tags.of("provider", provider),
state.ordinal());
if (state == CircuitBreaker.State.OPEN) {
alertService.sendAlert(provider + "服务熔断开启");
}
});
}
}
四、优雅降级与备用方案
4.1 多级降级策略
@Service
public class GracefulDegradationService {
// 降级策略枚举
public enum DegradationLevel {
NONE, // 无降级
MODEL_FALLBACK, // 模型降级(付费->免费)
CACHE_ONLY, // 仅缓存
STATIC_RESPONSE // 静态响应
}
public String executeWithFallback(String prompt, DegradationLevel maxLevel) {
for (DegradationLevel level : getDegradationSequence(maxLevel)) {
try {
switch (level) {
case NONE:
return callPrimaryModel(prompt);
case MODEL_FALLBACK:
return callFallbackModel(prompt);
case CACHE_ONLY:
return getCachedResponse(prompt)
.orElseThrow(() -> new FallbackException("缓存未命中"));
case STATIC_RESPONSE:
return getStaticResponse(prompt);
}
} catch (Exception e) {
log.warn("降级级别 {} 执行失败", level, e);
continue; // 尝试下一级降级
}
}
return "系统繁忙,请稍后重试";
}
private List<DegradationLevel> getDegradationSequence(DegradationLevel maxLevel) {
List<DegradationLevel> sequence = new ArrayList<>();
for (DegradationLevel level : DegradationLevel.values()) {
if (level.ordinal() <= maxLevel.ordinal()) {
sequence.add(level);
}
}
return sequence;
}
private String callFallbackModel(String prompt) {
// 降级模型调用顺序
List<String> fallbackSequence = Arrays.asList(
"deepseek-chat", // 成本最低
"qwen-turbo", // 速度最快
"ernie-speed" // 最稳定
);
for (String model : fallbackSequence) {
try {
return chatClient.call(prompt, model);
} catch (Exception e) {
log.warn("降级模型 {} 调用失败", model, e);
}
}
throw new FallbackException("所有降级模型均失败");
}
}
4.2 响应缓存与兜底数据
@Component
@CacheConfig(cacheNames = "ai-responses")
public class ResponseCacheService {
@Cacheable(key = "T(com.example.HashUtil).sha256(#prompt)",
unless = "#result == null")
public Optional<String> getCachedResponse(String prompt) {
// Redis或本地缓存查询
return cacheStore.get(generateCacheKey(prompt));
}
@CachePut(key = "T(com.example.HashUtil).sha256(#prompt)")
public void cacheResponse(String prompt, String response, Duration ttl) {
cacheStore.put(generateCacheKey(prompt), response, ttl);
}
// 热门问题预缓存
@Scheduled(fixedRate = 300000) // 5分钟更新一次
public void warmUpCache() {
List<String> hotQuestions = hotQuestionService.getHotQuestions(50);
hotQuestions.parallelStream().forEach(question -> {
if (!cacheStore.exists(generateCacheKey(question))) {
try {
String response = callPrimaryModel(question);
cacheResponse(question, response, Duration.ofHours(1));
} catch (Exception e) {
log.debug("预热缓存失败: {}", question, e);
}
}
});
}
}
五、监控、告警与自愈
5.1 异常监控体系
@Component
public class ExceptionMonitoring {
@EventListener
public void handleAiException(AiException event) {
// 记录详细异常指标
Counter.builder("ai.exceptions")
.tag("provider", event.getProvider())
.tag("error_code", event.getErrorCode())
.tag("severity", event.getSeverity().name())
.register(meterRegistry)
.increment();
// 异常模式检测
detectExceptionPatterns(event);
// 触发相应告警
if (requiresImmediateAlert(event)) {
alertService.sendImmediateAlert(event);
}
}
private void detectExceptionPatterns(AiException exception) {
// 异常频率检测
if (isExceptionFrequencyHigh(exception)) {
adjustCircuitBreaker(exception.getProvider());
}
// 关联异常检测
if (isCorrelatedFailure(exception)) {
escalateToEngineering(exception);
}
}
@Scheduled(fixedRate = 60000) // 每分钟检查一次
public void healthCheck() {
providers.forEach(provider -> {
try {
Health health = performHealthCheck(provider);
updateProviderHealthStatus(provider, health);
if (health.getStatus() == Status.DOWN) {
triggerRecoveryProcedure(provider);
}
} catch (Exception e) {
log.error("健康检查失败: {}", provider, e);
}
});
}
}
5.2 智能告警与自愈
# alert-rules.yml
ai:
alerts:
- name: "high-error-rate"
condition: "rate(ai_exceptions_total[5m]) > 10"
severity: "warning"
actions: ["scale-out", "switch-provider"]
- name: "circuit-breaker-open"
condition: "circuit_breaker_state == 2" # OPEN state
severity: "critical"
actions: ["alert-engineering", "enable-fallback"]
- name: "response-time-degradation"
condition: "ai_response_time_seconds{quantile='0.95'} > 10"
severity: "warning"
actions: ["adjust-timeout", "enable-cache"]
六、测试策略与质量保障
6.1 异常测试覆盖
@SpringBootTest
class ExceptionHandlingTest {
@Test
void testRateLimitHandling() {
// 模拟频率限制异常
when(chatClient.call(anyString()))
.thenThrow(new RateLimitException("aliyun", 60));
String result = resilientAiService.callWithRetry("test prompt");
assertThat(result).isEqualTo("服务繁忙,请稍后重试");
verify(chatClient, times(3)).call(anyString()); // 验证重试次数
}
@Test
void testCircuitBreakerTransition() {
// 连续失败触发熔断
for (int i = 0; i < 10; i++) {
try {
resilientAiService.callWithCircuitBreaker("aliyun",
() -> { throw new RuntimeException("模拟失败"); });
} catch (Exception ignored) {}
}
CircuitBreaker breaker = circuitBreakerManager.getBreaker("aliyun");
assertThat(breaker.getState()).isEqualTo(CircuitBreaker.State.OPEN);
}
}
总结
Spring AI应用中的异常处理与容错机制,主要包括:
-
分层异常体系:建立针对AI应用的特有异常分类和处理策略
-
智能重试机制:基于异常类型的自适应重试与退避策略
-
熔断降级方案:多级熔断器保护与优雅降级流程
-
全面监控告警:实时异常检测与自愈能力
-
质量保障体系:完整的异常场景测试覆盖
通过实施本文的异常处理方案,可以显著提升AI应用的稳定性和用户体验,确保在部分服务不可用时的业务连续性。