概述
在开发 AI 代理应用时,错误处理是确保应用稳定性和用户体验的关键。本文将介绍完整的错误处理方法、代码示例和优雅处理失败的策略。
为什么错误处理很重要?
想象一下,如果你的 AI 助手在用户提问时突然崩溃,或者显示一堆技术错误信息,用户会有什么感受?
好的错误处理就像给你的应用装上"安全气囊",在出现问题时:
-
保护应用不崩溃
-
给用户友好的提示
-
记录问题以便修复
-
自动恢复或重试
常见的错误类型
1. 网络错误
-
API 调用超时
-
网络连接中断
-
DNS 解析失败
2. API 错误
-
API 密钥无效
-
配额超限
-
速率限制
-
模型不可用
3. 输入错误
-
消息过长
-
格式不正确
-
包含非法内容
4. 系统错误
-
内存不足
-
磁盘空间不足
-
依赖服务不可用
错误处理的基本原则
1. 捕获所有异常
❌ 不好的做法:不处理异常
public async Task<string> ProcessMessage(string message)
{
// 如果出错,整个应用会崩溃
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
return response.Content;
}
✅ 好的做法:捕获并处理异常
public async Task<string> ProcessMessage(string message)
{
try
{
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
return response.Content;
}
catch (Exception ex)
{
// 记录错误
Console.WriteLine($"错误: {ex.Message}");
// 返回友好的错误消息
return "抱歉,我遇到了一些问题。请稍后再试。";
}
}
2. 区分不同类型的错误
public async Task<string> ProcessMessageWithDetailedErrorHandling(string message)
{
try
{
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
return response.Content;
}
catch (HttpRequestException ex)
{
// 网络错误
Console.WriteLine($"网络错误: {ex.Message}");
return "网络连接出现问题,请检查网络后重试。";
}
catch (TaskCanceledException ex)
{
// 超时错误
Console.WriteLine($"请求超时: {ex.Message}");
return "请求超时,请稍后重试。";
}
catch (UnauthorizedAccessException ex)
{
// 认证错误
Console.WriteLine($"认证失败: {ex.Message}");
return "服务认证失败,请联系管理员。";
}
catch (ArgumentException ex)
{
// 输入错误
Console.WriteLine($"输入错误: {ex.Message}");
return $"输入有误: {ex.Message}";
}
catch (Exception ex)
{
// 其他未知错误
Console.WriteLine($"未知错误: {ex.Message}");
return "抱歉,发生了未知错误。我们会尽快修复。";
}
}
3. 实现重试机制
对于临时性错误(如网络波动),应该自动重试。
public class RetryPolicy
{
private readonly int _maxRetries;
private readonly TimeSpan _initialDelay;
public RetryPolicy(int maxRetries = 3, TimeSpan? initialDelay = null)
{
_maxRetries = maxRetries;
_initialDelay = initialDelay ?? TimeSpan.FromSeconds(1);
}
// 指数退避重试
public async Task<T> ExecuteWithRetryAsync<T>(Func<Task<T>> action)
{
int attempt = 0;
TimeSpan delay = _initialDelay;
while (true)
{
try
{
return await action();
}
catch (Exception ex) when (IsTransientError(ex) && attempt < _maxRetries)
{
attempt++;
Console.WriteLine($"尝试 {attempt}/{_maxRetries} 失败: {ex.Message}");
Console.WriteLine($"等待 {delay.TotalSeconds} 秒后重试...");
await Task.Delay(delay);
// 指数退避:每次延迟时间翻倍
delay = TimeSpan.FromSeconds(delay.TotalSeconds * 2);
}
catch (Exception ex)
{
// 非临时性错误或重试次数用尽
Console.WriteLine($"操作失败: {ex.Message}");
throw;
}
}
}
// 判断是否为临时性错误
private bool IsTransientError(Exception ex)
{
return ex is HttpRequestException ||
ex is TaskCanceledException ||
ex is TimeoutException ||
(ex.Message?.Contains("429") ?? false) || // 速率限制
(ex.Message?.Contains("503") ?? false); // 服务不可用
}
}
使用重试策略:
public class ResilientAgentService
{
private readonly ChatCompletionAgent _agent;
private readonly RetryPolicy _retryPolicy;
public ResilientAgentService(ChatCompletionAgent agent)
{
_agent = agent;
_retryPolicy = new RetryPolicy(maxRetries: 3);
}
public async Task<string> ProcessMessageAsync(string message)
{
return await _retryPolicy.ExecuteWithRetryAsync(async () =>
{
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
return response.Content;
});
}
}
4. 实现断路器模式
当服务持续失败时,应该暂时停止调用,避免雪崩效应。
public class CircuitBreaker
{
private int _failureCount = 0;
private DateTime _lastFailureTime = DateTime.MinValue;
private CircuitState _state = CircuitState.Closed;
private readonly int _failureThreshold;
private readonly TimeSpan _timeout;
public CircuitBreaker(int failureThreshold = 5, TimeSpan? timeout = null)
{
_failureThreshold = failureThreshold;
_timeout = timeout ?? TimeSpan.FromMinutes(1);
}
public async Task<T> ExecuteAsync<T>(Func<Task<T>> action)
{
// 检查断路器状态
if (_state == CircuitState.Open)
{
// 检查是否可以尝试恢复
if (DateTime.UtcNow - _lastFailureTime > _timeout)
{
_state = CircuitState.HalfOpen;
Console.WriteLine("断路器进入半开状态,尝试恢复...");
}
else
{
throw new InvalidOperationException("服务暂时不可用,请稍后重试");
}
}
try
{
var result = await action();
// 成功,重置计数器
if (_state == CircuitState.HalfOpen)
{
_state = CircuitState.Closed;
Console.WriteLine("断路器已关闭,服务恢复正常");
}
_failureCount = 0;
return result;
}
catch (Exception ex)
{
_failureCount++;
_lastFailureTime = DateTime.UtcNow;
Console.WriteLine($"操作失败 ({_failureCount}/{_failureThreshold}): {ex.Message}");
// 达到阈值,打开断路器
if (_failureCount >= _failureThreshold)
{
_state = CircuitState.Open;
Console.WriteLine($"断路器已打开,将在 {_timeout.TotalSeconds} 秒后尝试恢复");
}
throw;
}
}
public CircuitState State => _state;
}
public enum CircuitState
{
Closed, // 正常状态
Open, // 断开状态(停止调用)
HalfOpen // 半开状态(尝试恢复)
}
使用断路器:
public class ProtectedAgentService
{
private readonly ChatCompletionAgent _agent;
private readonly CircuitBreaker _circuitBreaker;
public ProtectedAgentService(ChatCompletionAgent agent)
{
_agent = agent;
_circuitBreaker = new CircuitBreaker(failureThreshold: 5);
}
public async Task<string> ProcessMessageAsync(string message)
{
try
{
return await _circuitBreaker.ExecuteAsync(async () =>
{
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
return response.Content;
});
}
catch (InvalidOperationException ex) when (ex.Message.Contains("服务暂时不可用"))
{
return "服务正在维护中,请稍后再试。";
}
}
}
完整的错误处理框架
1. 自定义异常类型
// 基础异常类
public class AgentException : Exception
{
public AgentException(string message) : base(message) { }
public AgentException(string message, Exception innerException)
: base(message, innerException) { }
}
// API 相关异常
public class ApiException : AgentException
{
public int? StatusCode { get; }
public ApiException(string message, int? statusCode = null)
: base(message)
{
StatusCode = statusCode;
}
}
// 配额超限异常
public class QuotaExceededException : ApiException
{
public QuotaExceededException(string message)
: base(message, 429) { }
}
// 输入验证异常
public class ValidationException : AgentException
{
public ValidationException(string message) : base(message) { }
}
// 配置异常
public class ConfigurationException : AgentException
{
public ConfigurationException(string message) : base(message) { }
}
2. 错误处理中间件
public class ErrorHandlingMiddleware
{
private readonly ILogger _logger;
public ErrorHandlingMiddleware(ILogger logger)
{
_logger = logger;
}
public async Task<Result<T>> ExecuteAsync<T>(Func<Task<T>> action)
{
try
{
var result = await action();
return Result<T>.Success(result);
}
catch (ValidationException ex)
{
_logger.LogWarning(ex, "输入验证失败");
return Result<T>.Failure("输入验证失败: " + ex.Message);
}
catch (QuotaExceededException ex)
{
_logger.LogWarning(ex, "配额超限");
return Result<T>.Failure("请求过于频繁,请稍后再试");
}
catch (ApiException ex)
{
_logger.LogError(ex, "API 调用失败");
return Result<T>.Failure($"服务调用失败 (状态码: {ex.StatusCode})");
}
catch (ConfigurationException ex)
{
_logger.LogError(ex, "配置错误");
return Result<T>.Failure("服务配置错误,请联系管理员");
}
catch (Exception ex)
{
_logger.LogError(ex, "未知错误");
return Result<T>.Failure("发生未知错误,请稍后重试");
}
}
}
// 结果类型
public class Result<T>
{
public bool IsSuccess { get; }
public T Value { get; }
public string Error { get; }
private Result(bool isSuccess, T value, string error)
{
IsSuccess = isSuccess;
Value = value;
Error = error;
}
public static Result<T> Success(T value) => new(true, value, null);
public static Result<T> Failure(string error) => new(false, default, error);
}
3. 日志记录
public interface ILogger
{
void LogInfo(string message);
void LogWarning(Exception ex, string message);
void LogError(Exception ex, string message);
}
public class ConsoleLogger : ILogger
{
public void LogInfo(string message)
{
Console.WriteLine($"[INFO] {DateTime.Now:yyyy-MM-dd HH:mm:ss} - {message}");
}
public void LogWarning(Exception ex, string message)
{
Console.WriteLine($"[WARN] {DateTime.Now:yyyy-MM-dd HH:mm:ss} - {message}");
Console.WriteLine($" 异常: {ex.Message}");
}
public void LogError(Exception ex, string message)
{
Console.WriteLine($"[ERROR] {DateTime.Now:yyyy-MM-dd HH:mm:ss} - {message}");
Console.WriteLine($" 异常: {ex.Message}");
Console.WriteLine($" 堆栈: {ex.StackTrace}");
}
}
4. 完整的错误处理服务
public class RobustAgentService
{
private readonly ChatCompletionAgent _agent;
private readonly RetryPolicy _retryPolicy;
private readonly CircuitBreaker _circuitBreaker;
private readonly ErrorHandlingMiddleware _errorHandler;
private readonly ILogger _logger;
public RobustAgentService(ChatCompletionAgent agent)
{
_agent = agent;
_retryPolicy = new RetryPolicy(maxRetries: 3);
_circuitBreaker = new CircuitBreaker(failureThreshold: 5);
_logger = new ConsoleLogger();
_errorHandler = new ErrorHandlingMiddleware(_logger);
}
public async Task<Result<string>> ProcessMessageAsync(string message)
{
return await _errorHandler.ExecuteAsync(async () =>
{
// 输入验证
ValidateInput(message);
// 使用断路器和重试策略
return await _circuitBreaker.ExecuteAsync(async () =>
{
return await _retryPolicy.ExecuteWithRetryAsync(async () =>
{
_logger.LogInfo($"处理消息: {message.Substring(0, Math.Min(50, message.Length))}...");
var thread = new AgentThread();
await thread.AddUserMessageAsync(message);
var response = await _agent.InvokeAsync(thread);
_logger.LogInfo("消息处理成功");
return response.Content;
});
});
});
}
private void ValidateInput(string message)
{
if (string.IsNullOrWhiteSpace(message))
throw new ValidationException("消息不能为空");
if (message.Length > 4000)
throw new ValidationException("消息长度不能超过 4000 字符");
}
}
使用示例
基本使用
public class Program
{
public static async Task Main(string[] args)
{
// 初始化代理
var chatClient = new AzureOpenAIClient(
new Uri(Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT")),
new ApiKeyCredential(Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY"))
).GetChatClient("gpt-35-turbo");
var agent = new ChatCompletionAgent(
chatClient: chatClient,
name: "RobustAgent",
instructions: "你是一个可靠的助手"
);
var service = new RobustAgentService(agent);
// 处理用户消息
Console.WriteLine("请输入消息(输入 'exit' 退出):");
while (true)
{
Console.Write("\n用户: ");
var input = Console.ReadLine();
if (input?.ToLower() == "exit")
break;
// 处理消息
var result = await service.ProcessMessageAsync(input);
if (result.IsSuccess)
{
Console.WriteLine($"AI: {result.Value}");
}
else
{
Console.WriteLine($"错误: {result.Error}");
}
}
}
}
高级使用:带监控和告警
public class MonitoredAgentService
{
private readonly RobustAgentService _service;
private int _totalRequests = 0;
private int _successfulRequests = 0;
private int _failedRequests = 0;
public MonitoredAgentService(RobustAgentService service)
{
_service = service;
}
public async Task<Result<string>> ProcessMessageAsync(string message)
{
_totalRequests++;
var startTime = DateTime.UtcNow;
var result = await _service.ProcessMessageAsync(message);
var duration = DateTime.UtcNow - startTime;
if (result.IsSuccess)
{
_successfulRequests++;
}
else
{
_failedRequests++;
// 如果失败率过高,发送告警
if (GetFailureRate() > 0.5) // 失败率超过 50%
{
SendAlert($"警告:失败率过高 ({GetFailureRate():P})");
}
}
// 记录指标
Console.WriteLine($"[指标] 总请求: {_totalRequests}, " +
$"成功: {_successfulRequests}, " +
$"失败: {_failedRequests}, " +
$"成功率: {GetSuccessRate():P}, " +
$"耗时: {duration.TotalSeconds:F2}秒");
return result;
}
private double GetSuccessRate() =>
_totalRequests > 0 ? (double)_successfulRequests / _totalRequests : 0;
private double GetFailureRate() =>
_totalRequests > 0 ? (double)_failedRequests / _totalRequests : 0;
private void SendAlert(string message)
{
Console.WriteLine($"\n🚨 告警: {message}\n");
// 实际实现:发送邮件、短信或推送通知
}
}
错误处理检查清单
在部署应用之前,使用这个清单检查错误处理:
-
\] **异常捕获** * \[ \] 所有 API 调用都有 try-catch * \[ \] 区分不同类型的异常 * \[ \] 记录所有异常信息
-
\] 实现了指数退避重试
-
\] 只对临时性错误重试
-
\] 实现了断路器模式
-
\] 配置了恢复超时时间
-
\] 错误消息友好易懂
-
\] 提供解决建议
-
\] 记录所有错误
-
\] 不记录敏感信息
-
\] 监控错误率
-
\] 及时响应告警
-
错误处理是构建可靠应用的基础,关键要点:
-
捕获所有异常:不要让应用崩溃
-
区分错误类型:不同错误不同处理
-
实现重试机制:自动恢复临时性错误
-
使用断路器:防止雪崩效应
-
友好的错误消息:让用户知道发生了什么
-
完善的日志:帮助快速定位问题
-
持续监控:及时发现和解决问题
记住:好的错误处理不是避免错误,而是优雅地处理错误。
