企业级消息中心运维与扩展篇:监控运维、扩展性设计
系列文章第四篇:深入解析消息中心的监控运维体系和扩展性设计
📖 系列文章导读
本系列文章将全面解析企业级消息中心的设计与实现,共分为5篇:
- 架构设计篇:设计哲学、架构演进、技术选型
- 核心实现篇:整体架构设计、核心功能实现
- 存储与可靠性篇:数据存储设计、高可用保障
- 运维与扩展篇(本篇):监控运维、扩展性设计
- 实战总结篇:业务价值、经验总结
📊 监控告警体系建设
监控体系架构设计
scss
┌─────────────────────────────────────────────────────────────┐
│ 监控告警体系 │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 应用监控 │ │ 基础监控 │ │ 业务监控 │ │
│ │ (APM) │ │ (Infrastructure)│ (Business) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Prometheus │ │ Grafana │ │ AlertManager│ │
│ │ (指标收集) │ │ (可视化) │ │ (告警) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ ELK │ │ Jaeger │ │ SkyWalking│ │
│ │ (日志) │ │ (链路追踪) │ │ (APM) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────────────────────────────────────┘
应用性能监控(APM)
1. 核心指标定义
系统层面指标:
java
/**
* 系统指标收集器
*/
@Component
public class SystemMetricsCollector {
private final MeterRegistry meterRegistry;
private final Timer.Sample sample;
public SystemMetricsCollector(MeterRegistry meterRegistry) {
this.meterRegistry = meterRegistry;
this.sample = Timer.start(meterRegistry);
// 注册自定义指标
registerCustomMetrics();
}
/**
* 注册自定义指标
*/
private void registerCustomMetrics() {
// 1. 消息处理指标
Gauge.builder("message.queue.size")
.description("消息队列大小")
.register(meterRegistry, this, SystemMetricsCollector::getQueueSize);
Gauge.builder("message.processing.rate")
.description("消息处理速率")
.register(meterRegistry, this, SystemMetricsCollector::getProcessingRate);
// 2. 连接池指标
Gauge.builder("datasource.connection.active")
.description("活跃数据库连接数")
.register(meterRegistry, this, SystemMetricsCollector::getActiveConnections);
Gauge.builder("datasource.connection.idle")
.description("空闲数据库连接数")
.register(meterRegistry, this, SystemMetricsCollector::getIdleConnections);
// 3. 缓存指标
Gauge.builder("cache.hit.ratio")
.description("缓存命中率")
.register(meterRegistry, this, SystemMetricsCollector::getCacheHitRatio);
// 4. 业务指标
Gauge.builder("message.send.success.rate")
.description("消息发送成功率")
.register(meterRegistry, this, SystemMetricsCollector::getSendSuccessRate);
}
/**
* 获取队列大小
*/
private double getQueueSize(SystemMetricsCollector collector) {
// 实现获取队列大小的逻辑
return messageQueueService.getQueueSize();
}
/**
* 获取处理速率
*/
private double getProcessingRate(SystemMetricsCollector collector) {
// 实现获取处理速率的逻辑
return messageProcessService.getProcessingRate();
}
/**
* 获取活跃连接数
*/
private double getActiveConnections(SystemMetricsCollector collector) {
return dataSourceMonitor.getActiveConnections();
}
/**
* 获取缓存命中率
*/
private double getCacheHitRatio(SystemMetricsCollector collector) {
return cacheManager.getHitRatio();
}
/**
* 获取发送成功率
*/
private double getSendSuccessRate(SystemMetricsCollector collector) {
return messageStatisticsService.getSendSuccessRate();
}
}
/**
* 业务指标记录器
*/
@Component
public class BusinessMetricsRecorder {
private final Counter messageCounter;
private final Timer messageProcessingTimer;
private final DistributionSummary messageSizeDistribution;
public BusinessMetricsRecorder(MeterRegistry meterRegistry) {
// 消息计数器
this.messageCounter = Counter.builder("message.total")
.description("消息总数")
.tag("type", "all")
.register(meterRegistry);
// 消息处理时间
this.messageProcessingTimer = Timer.builder("message.processing.time")
.description("消息处理时间")
.register(meterRegistry);
// 消息大小分布
this.messageSizeDistribution = DistributionSummary.builder("message.size")
.description("消息大小分布")
.baseUnit("bytes")
.register(meterRegistry);
}
/**
* 记录消息处理
*/
public void recordMessageProcessing(String messageType, long processingTime, int messageSize) {
// 增加计数
Counter.builder("message.processed")
.tag("type", messageType)
.register(meterRegistry)
.increment();
// 记录处理时间
Timer.builder("message.processing.time")
.tag("type", messageType)
.register(meterRegistry)
.record(processingTime, TimeUnit.MILLISECONDS);
// 记录消息大小
messageSizeDistribution.record(messageSize);
}
/**
* 记录消息发送结果
*/
public void recordMessageSendResult(String channel, boolean success) {
Counter.builder("message.send.result")
.tag("channel", channel)
.tag("result", success ? "success" : "failure")
.register(meterRegistry)
.increment();
}
/**
* 记录错误
*/
public void recordError(String errorType, String errorMessage) {
Counter.builder("message.error")
.tag("type", errorType)
.tag("message", errorMessage)
.register(meterRegistry)
.increment();
}
}
2. 性能监控实现
方法级性能监控:
java
/**
* 性能监控注解
*/
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
public @interface PerformanceMonitor {
/**
* 监控名称
*/
String value() default "";
/**
* 是否记录参数
*/
boolean recordArgs() default false;
/**
* 是否记录返回值
*/
boolean recordResult() default false;
/**
* 慢方法阈值(毫秒)
*/
long slowThreshold() default 1000;
}
/**
* 性能监控切面
*/
@Aspect
@Component
public class PerformanceMonitorAspect {
@Autowired
private MeterRegistry meterRegistry;
@Autowired
private BusinessMetricsRecorder metricsRecorder;
@Around("@annotation(performanceMonitor)")
public Object around(ProceedingJoinPoint point, PerformanceMonitor performanceMonitor) throws Throwable {
String methodName = getMethodName(point, performanceMonitor);
// 开始计时
Timer.Sample sample = Timer.start(meterRegistry);
long startTime = System.currentTimeMillis();
try {
// 执行方法
Object result = point.proceed();
// 记录成功指标
long duration = System.currentTimeMillis() - startTime;
recordSuccess(methodName, duration, performanceMonitor.slowThreshold());
// 记录详细信息
if (performanceMonitor.recordArgs() || performanceMonitor.recordResult()) {
recordMethodDetails(point, result, duration, performanceMonitor);
}
return result;
} catch (Exception e) {
log.error("分布式事务执行失败", e);
throw e;
}
}
}
3. 服务治理策略
服务注册与发现:
java
/**
* 服务注册配置
*/
@Configuration
@EnableEurekaClient
public class ServiceDiscoveryConfig {
@Bean
public EurekaInstanceConfigBean eurekaInstanceConfig() {
EurekaInstanceConfigBean config = new EurekaInstanceConfigBean();
// 实例配置
config.setInstanceId(getInstanceId());
config.setPreferIpAddress(true);
config.setLeaseRenewalIntervalInSeconds(10);
config.setLeaseExpirationDurationInSeconds(30);
// 健康检查
config.setHealthCheckUrlPath("/actuator/health");
config.setStatusPageUrlPath("/actuator/info");
// 元数据
Map<String, String> metadata = new HashMap<>();
metadata.put("version", getApplicationVersion());
metadata.put("region", getRegion());
config.setMetadataMap(metadata);
return config;
}
private String getInstanceId() {
return InetAddress.getLocalHost().getHostName() + ":" +
environment.getProperty("server.port", "8080");
}
}
/**
* 服务健康检查
*/
@Component
public class ServiceHealthIndicator implements HealthIndicator {
@Autowired
private DataSource dataSource;
@Autowired
private RedisTemplate<String, Object> redisTemplate;
@Override
public Health health() {
Health.Builder builder = Health.up();
try {
// 检查数据库连接
checkDatabase(builder);
// 检查Redis连接
checkRedis(builder);
// 检查消息队列
checkMessageQueue(builder);
// 检查外部依赖
checkExternalDependencies(builder);
} catch (Exception e) {
return Health.down(e).build();
}
return builder.build();
}
private void checkDatabase(Health.Builder builder) {
try (Connection connection = dataSource.getConnection()) {
if (connection.isValid(3)) {
builder.withDetail("database", "UP");
} else {
builder.withDetail("database", "DOWN");
builder.down();
}
} catch (Exception e) {
builder.withDetail("database", "DOWN: " + e.getMessage());
builder.down();
}
}
private void checkRedis(Health.Builder builder) {
try {
redisTemplate.opsForValue().set("health:check", "ok", Duration.ofSeconds(10));
String result = (String) redisTemplate.opsForValue().get("health:check");
if ("ok".equals(result)) {
builder.withDetail("redis", "UP");
} else {
builder.withDetail("redis", "DOWN");
builder.down();
}
} catch (Exception e) {
builder.withDetail("redis", "DOWN: " + e.getMessage());
builder.down();
}
}
}
容器化部署
1. Docker配置
多阶段构建Dockerfile:
dockerfile
# Dockerfile
# 第一阶段:构建
FROM maven:3.8.4-openjdk-11 AS builder
WORKDIR /app
# 复制pom文件并下载依赖
COPY pom.xml .
RUN mvn dependency:go-offline -B
# 复制源码并构建
COPY src ./src
RUN mvn clean package -DskipTests -B
# 第二阶段:运行
FROM openjdk:11-jre-slim
# 安装必要工具
RUN apt-get update && apt-get install -y \
curl \
jq \
&& rm -rf /var/lib/apt/lists/*
# 创建应用用户
RUN groupadd -r appuser && useradd -r -g appuser appuser
# 设置工作目录
WORKDIR /app
# 复制应用文件
COPY --from=builder /app/target/message-center-*.jar app.jar
COPY docker/entrypoint.sh .
COPY docker/health-check.sh .
# 设置权限
RUN chmod +x entrypoint.sh health-check.sh
RUN chown -R appuser:appuser /app
# 切换到应用用户
USER appuser
# 暴露端口
EXPOSE 8080
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD ./health-check.sh
# 启动应用
ENTRYPOINT ["./entrypoint.sh"]
启动脚本:
bash
#!/bin/bash
# entrypoint.sh
set -e
# 环境变量默认值
JAVA_OPTS=${JAVA_OPTS:-"-Xms2g -Xmx2g -XX:+UseG1GC"}
SPRING_PROFILES_ACTIVE=${SPRING_PROFILES_ACTIVE:-"prod"}
# 等待依赖服务启动
echo "等待依赖服务启动..."
# 等待数据库
if [ -n "$DB_HOST" ]; then
echo "等待数据库: $DB_HOST:$DB_PORT"
while ! nc -z $DB_HOST $DB_PORT; do
sleep 1
done
echo "数据库已就绪"
fi
# 等待Redis
if [ -n "$REDIS_HOST" ]; then
echo "等待Redis: $REDIS_HOST:$REDIS_PORT"
while ! nc -z $REDIS_HOST $REDIS_PORT; do
sleep 1
done
echo "Redis已就绪"
fi
# 启动应用
echo "启动消息中心服务..."
exec java $JAVA_OPTS \
-Dspring.profiles.active=$SPRING_PROFILES_ACTIVE \
-Djava.security.egd=file:/dev/./urandom \
-jar app.jar
2. Kubernetes部署
K8s部署配置:
yaml
# k8s-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: message-center
namespace: production
labels:
app: message-center
version: v1.0.0
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 1
maxUnavailable: 0
selector:
matchLabels:
app: message-center
template:
metadata:
labels:
app: message-center
version: v1.0.0
spec:
containers:
- name: message-center
image: message-center:v1.0.0
ports:
- containerPort: 8080
name: http
env:
- name: SPRING_PROFILES_ACTIVE
value: "k8s"
- name: DB_HOST
valueFrom:
secretKeyRef:
name: db-secret
key: host
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
name: db-secret
key: password
resources:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "4Gi"
cpu: "2000m"
livenessProbe:
httpGet:
path: /actuator/health/liveness
port: 8080
initialDelaySeconds: 60
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /actuator/health/readiness
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
volumeMounts:
- name: config-volume
mountPath: /app/config
- name: log-volume
mountPath: /app/logs
volumes:
- name: config-volume
configMap:
name: message-center-config
- name: log-volume
emptyDir: {}
imagePullSecrets:
- name: registry-secret
---
apiVersion: v1
kind: Service
metadata:
name: message-center-service
namespace: production
spec:
selector:
app: message-center
ports:
- name: http
port: 80
targetPort: 8080
type: ClusterIP
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: message-center-hpa
namespace: production
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: message-center
minReplicas: 3
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: 80
📈 运维最佳实践
日志管理
1. 结构化日志
日志配置优化:
xml
<!-- logback-spring.xml -->
<configuration>
<springProfile name="prod">
<!-- 生产环境配置 -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
<providers>
<timestamp/>
<logLevel/>
<loggerName/>
<message/>
<mdc/>
<arguments/>
<stackTrace/>
</providers>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>/app/logs/message-center.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<fileNamePattern>/app/logs/message-center.%d{yyyy-MM-dd}.%i.log.gz</fileNamePattern>
<maxFileSize>100MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>10GB</totalSizeCap>
</rollingPolicy>
<encoder class="net.logstash.logback.encoder.LoggingEventCompositeJsonEncoder">
<providers>
<timestamp/>
<logLevel/>
<loggerName/>
<message/>
<mdc/>
<arguments/>
<stackTrace/>
</providers>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT"/>
<appender-ref ref="FILE"/>
</root>
</springProfile>
</configuration>
备份恢复
1. 自动化备份
数据备份脚本:
bash
#!/bin/bash
# backup.sh
set -e
# 配置
BACKUP_DIR="/backup/message-center"
DATE=$(date +%Y%m%d_%H%M%S)
RETENTION_DAYS=30
# 创建备份目录
mkdir -p $BACKUP_DIR/$DATE
echo "开始备份: $DATE"
# 1. 备份MySQL数据
echo "备份MySQL数据..."
mysqldump -h $DB_HOST -u $DB_USER -p$DB_PASSWORD \
--single-transaction \
--routines \
--triggers \
message_center > $BACKUP_DIR/$DATE/mysql_backup.sql
# 2. 备份MongoDB数据
echo "备份MongoDB数据..."
mongodump --host $MONGO_HOST \
--db message_center_logs \
--out $BACKUP_DIR/$DATE/mongodb_backup
# 3. 备份Redis数据
echo "备份Redis数据..."
redis-cli --rdb $BACKUP_DIR/$DATE/redis_backup.rdb
# 4. 备份配置文件
echo "备份配置文件..."
cp -r /app/config $BACKUP_DIR/$DATE/
# 5. 压缩备份文件
echo "压缩备份文件..."
tar -czf $BACKUP_DIR/backup_$DATE.tar.gz -C $BACKUP_DIR $DATE
rm -rf $BACKUP_DIR/$DATE
# 6. 上传到云存储
echo "上传到云存储..."
aws s3 cp $BACKUP_DIR/backup_$DATE.tar.gz \
s3://message-center-backup/daily/
# 7. 清理过期备份
echo "清理过期备份..."
find $BACKUP_DIR -name "backup_*.tar.gz" \
-mtime +$RETENTION_DAYS -delete
echo "备份完成: $DATE"
📝 本篇总结
本篇《运维与扩展篇》深入解析了消息中心的监控运维体系和扩展性设计:
🎯 核心内容回顾
-
监控告警体系
- 构建了完整的APM监控体系
- 实现了多维度的性能指标收集
- 设计了智能化的告警规则和通知机制
-
性能调优实践
- JVM参数优化和GC调优策略
- 数据库连接池和SQL性能优化
- 多级缓存架构和缓存预热机制
-
系统扩展性设计
- 无状态服务架构设计
- 微服务拆分和服务治理策略
- 容器化部署和K8s编排配置
-
运维最佳实践
- 结构化日志管理
- 自动化备份恢复机制
- 服务健康检查和故障自愈
🚀 技术亮点
- 全方位监控:从系统资源到业务指标的全链路监控
- 智能告警:基于规则引擎的多渠道告警通知
- 弹性扩展:支持水平扩展的无状态架构设计
- 自动化运维:从部署到监控的全自动化流程
📖 系列预告
下一篇《实战总结篇》将为您带来:
- 🎯 业务价值分析:量化展示消息中心带来的业务提升
- 📊 性能数据对比:上线前后的关键指标对比
- 🔍 踩坑经验分享:开发过程中遇到的问题和解决方案
- 🚀 未来规划展望:消息中心的演进方向和技术趋势
- 💡 架构设计心得:从技术选型到架构设计的思考总结
💡 温馨提示:本系列文章基于真实的企业级项目实践,所有代码和配置都经过生产环境验证。如果您在实施过程中遇到问题,欢迎交流讨论!
scss
) {
// 记录异常指标
long duration = System.currentTimeMillis() - startTime;
recordException(methodName, duration, e);
throw e;
} finally {
// 停止计时
sample.stop(Timer.builder("method.execution.time")
.tag("method", methodName)
.register(meterRegistry));
}
}
/**
* 记录成功指标
*/
private void recordSuccess(String methodName, long duration, long slowThreshold) {
// 记录执行次数
Counter.builder("method.execution.count")
.tag("method", methodName)
.tag("result", "success")
.register(meterRegistry)
.increment();
// 记录慢方法
if (duration > slowThreshold) {
Counter.builder("method.slow.count")
.tag("method", methodName)
.register(meterRegistry)
.increment();
log.warn("检测到慢方法: method={}, duration={}ms", methodName, duration);
}
}
/**
* 记录异常指标
*/
private void recordException(String methodName, long duration, Exception e) {
Counter.builder("method.execution.count")
.tag("method", methodName)
.tag("result", "error")
.tag("exception", e.getClass().getSimpleName())
.register(meterRegistry)
.increment();
log.error("方法执行异常: method={}, duration={}ms", methodName, duration, e);
}
/**
* 获取方法名
*/
private String getMethodName(ProceedingJoinPoint point, PerformanceMonitor monitor) {
if (StringUtils.isNotBlank(monitor.value())) {
return monitor.value();
}
MethodSignature signature = (MethodSignature) point.getSignature();
return signature.getDeclaringType().getSimpleName() + "." + signature.getName();
}
}
/**
* 使用示例
*/
@Service
public class MessageSendService {
@PerformanceMonitor(value = "message.send", slowThreshold = 500)
public SendResult sendMessage(MessageRequest request) {
// 发送逻辑
return doSendMessage(request);
}
@PerformanceMonitor(value = "message.batch.send", recordArgs = true)
public List<SendResult> batchSendMessage(List<MessageRequest> requests) {
// 批量发送逻辑
return requests.stream()
.map(this::sendMessage)
.collect(Collectors.toList());
}
}
告警规则配置
1. 告警规则定义
Prometheus告警规则:
yaml
# prometheus-alerts.yml
groups:
- name: message-center-alerts
rules:
# 系统资源告警
- alert: HighCPUUsage
expr: cpu_usage_percent > 80
for: 5m
labels:
severity: warning
service: message-center
annotations:
summary: "CPU使用率过高"
description: "CPU使用率已超过80%,当前值: {{ $value }}%"
- alert: HighMemoryUsage
expr: memory_usage_percent > 85
for: 5m
labels:
severity: warning
service: message-center
annotations:
summary: "内存使用率过高"
description: "内存使用率已超过85%,当前值: {{ $value }}%"
# 应用性能告警
- alert: HighResponseTime
expr: avg(message_processing_time_seconds) > 2
for: 3m
labels:
severity: warning
service: message-center
annotations:
summary: "响应时间过长"
description: "平均响应时间超过2秒,当前值: {{ $value }}秒"
- alert: LowSuccessRate
expr: rate(message_send_success_total[5m]) / rate(message_send_total[5m]) < 0.95
for: 5m
labels:
severity: critical
service: message-center
annotations:
summary: "消息发送成功率过低"
description: "消息发送成功率低于95%,当前值: {{ $value | humanizePercentage }}"
# 队列积压告警
- alert: MessageQueueBacklog
expr: message_queue_size > 10000
for: 2m
labels:
severity: warning
service: message-center
annotations:
summary: "消息队列积压"
description: "消息队列积压超过10000条,当前值: {{ $value }}"
# 数据库连接告警
- alert: DatabaseConnectionHigh
expr: datasource_connection_active / datasource_connection_max > 0.8
for: 3m
labels:
severity: warning
service: message-center
annotations:
summary: "数据库连接使用率过高"
description: "数据库连接使用率超过80%,当前值: {{ $value | humanizePercentage }}"
# 缓存命中率告警
- alert: LowCacheHitRate
expr: cache_hit_ratio < 0.8
for: 5m
labels:
severity: warning
service: message-center
annotations:
summary: "缓存命中率过低"
description: "缓存命中率低于80%,当前值: {{ $value | humanizePercentage }}"
# 错误率告警
- alert: HighErrorRate
expr: rate(message_error_total[5m]) > 10
for: 2m
labels:
severity: critical
service: message-center
annotations:
summary: "错误率过高"
description: "每分钟错误数超过10个,当前值: {{ $value }}"
2. 告警通知实现
多渠道告警通知:
java
/**
* 告警通知服务
*/
@Service
public class AlertNotificationService {
@Autowired
private List<AlertChannel> alertChannels;
@Autowired
private AlertRuleService alertRuleService;
/**
* 发送告警
*/
public void sendAlert(AlertEvent alertEvent) {
// 1. 获取告警规则
AlertRule rule = alertRuleService.getRule(alertEvent.getRuleName());
if (rule == null || !rule.isEnabled()) {
return;
}
// 2. 检查告警频率限制
if (isRateLimited(alertEvent)) {
log.info("告警被频率限制跳过: {}", alertEvent.getRuleName());
return;
}
// 3. 构建告警消息
AlertMessage message = buildAlertMessage(alertEvent, rule);
// 4. 发送到各个通道
for (AlertChannel channel : alertChannels) {
if (rule.getChannels().contains(channel.getChannelType())) {
try {
channel.sendAlert(message);
log.info("告警发送成功: channel={}, rule={}",
channel.getChannelType(), alertEvent.getRuleName());
} catch (Exception e) {
log.error("告警发送失败: channel={}, rule={}",
channel.getChannelType(), alertEvent.getRuleName(), e);
}
}
}
// 5. 记录告警历史
recordAlertHistory(alertEvent, message);
}
/**
* 检查频率限制
*/
private boolean isRateLimited(AlertEvent alertEvent) {
String key = "alert:rate:" + alertEvent.getRuleName();
// 使用Redis实现频率限制
String count = redisTemplate.opsForValue().get(key);
if (count == null) {
redisTemplate.opsForValue().set(key, "1", 300, TimeUnit.SECONDS); // 5分钟窗口
return false;
}
int currentCount = Integer.parseInt(count);
if (currentCount >= 3) { // 5分钟内最多3次
return true;
}
redisTemplate.opsForValue().increment(key);
return false;
}
/**
* 构建告警消息
*/
private AlertMessage buildAlertMessage(AlertEvent alertEvent, AlertRule rule) {
return AlertMessage.builder()
.title(String.format("[%s] %s", alertEvent.getSeverity().toUpperCase(), rule.getTitle()))
.content(formatAlertContent(alertEvent, rule))
.severity(alertEvent.getSeverity())
.timestamp(alertEvent.getTimestamp())
.tags(alertEvent.getTags())
.build();
}
/**
* 格式化告警内容
*/
private String formatAlertContent(AlertEvent alertEvent, AlertRule rule) {
StringBuilder content = new StringBuilder();
content.append("**告警详情**\n");
content.append("- 服务: ").append(alertEvent.getService()).append("\n");
content.append("- 规则: ").append(rule.getName()).append("\n");
content.append("- 级别: ").append(alertEvent.getSeverity()).append("\n");
content.append("- 时间: ").append(alertEvent.getTimestamp()).append("\n");
content.append("- 描述: ").append(rule.getDescription()).append("\n");
if (!alertEvent.getTags().isEmpty()) {
content.append("- 标签: ");
alertEvent.getTags().forEach((k, v) ->
content.append(k).append("=").append(v).append(" "));
content.append("\n");
}
return content.toString();
}
}
/**
* 钉钉告警通道
*/
@Component
public class DingTalkAlertChannel implements AlertChannel {
@Value("${alert.dingtalk.webhook}")
private String webhookUrl;
@Value("${alert.dingtalk.secret}")
private String secret;
@Override
public String getChannelType() {
return "dingtalk";
}
@Override
public void sendAlert(AlertMessage message) {
try {
// 构建钉钉消息
DingTalkMessage dingMessage = DingTalkMessage.builder()
.msgtype("markdown")
.markdown(DingTalkMarkdown.builder()
.title(message.getTitle())
.text(message.getContent())
.build())
.build();
// 计算签名
String timestamp = String.valueOf(System.currentTimeMillis());
String sign = calculateSign(timestamp, secret);
// 发送请求
String url = webhookUrl + "×tamp=" + timestamp + "&sign=" + sign;
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
HttpEntity<DingTalkMessage> entity = new HttpEntity<>(dingMessage, headers);
RestTemplate restTemplate = new RestTemplate();
ResponseEntity<String> response = restTemplate.postForEntity(url, entity, String.class);
if (response.getStatusCode().is2xxSuccessful()) {
log.info("钉钉告警发送成功");
} else {
log.error("钉钉告警发送失败: {}", response.getBody());
}
} catch (Exception e) {
log.error("钉钉告警发送异常", e);
throw new RuntimeException("钉钉告警发送失败", e);
}
}
/**
* 计算钉钉签名
*/
private String calculateSign(String timestamp, String secret) throws Exception {
String stringToSign = timestamp + "\n" + secret;
Mac mac = Mac.getInstance("HmacSHA256");
mac.init(new SecretKeySpec(secret.getBytes("UTF-8"), "HmacSHA256"));
byte[] signData = mac.doFinal(stringToSign.getBytes("UTF-8"));
return URLEncoder.encode(Base64.getEncoder().encodeToString(signData), "UTF-8");
}
}
/**
* 邮件告警通道
*/
@Component
public class EmailAlertChannel implements AlertChannel {
@Autowired
private JavaMailSender mailSender;
@Value("${alert.email.from}")
private String fromEmail;
@Value("${alert.email.to}")
private List<String> toEmails;
@Override
public String getChannelType() {
return "email";
}
@Override
public void sendAlert(AlertMessage message) {
try {
MimeMessage mimeMessage = mailSender.createMimeMessage();
MimeMessageHelper helper = new MimeMessageHelper(mimeMessage, true, "UTF-8");
helper.setFrom(fromEmail);
helper.setTo(toEmails.toArray(new String[0]));
helper.setSubject(message.getTitle());
helper.setText(formatEmailContent(message), true);
mailSender.send(mimeMessage);
log.info("邮件告警发送成功");
} catch (Exception e) {
log.error("邮件告警发送失败", e);
throw new RuntimeException("邮件告警发送失败", e);
}
}
/**
* 格式化邮件内容
*/
private String formatEmailContent(AlertMessage message) {
return String.format(
"<html><body>" +
"<h2>%s</h2>" +
"<pre>%s</pre>" +
"<p><small>发送时间: %s</small></p>" +
"</body></html>",
message.getTitle(),
message.getContent(),
message.getTimestamp()
);
}
}
🚀 性能调优实践
JVM调优
1. JVM参数配置
生产环境JVM配置:
bash
#!/bin/bash
# jvm-config.sh
# 堆内存配置
JVM_OPTS="-Xms4g -Xmx4g"
# 新生代配置
JVM_OPTS="$JVM_OPTS -Xmn1g"
# 垃圾收集器配置(G1GC)
JVM_OPTS="$JVM_OPTS -XX:+UseG1GC"
JVM_OPTS="$JVM_OPTS -XX:MaxGCPauseMillis=200"
JVM_OPTS="$JVM_OPTS -XX:G1HeapRegionSize=16m"
# GC日志配置
JVM_OPTS="$JVM_OPTS -XX:+PrintGC"
JVM_OPTS="$JVM_OPTS -XX:+PrintGCDetails"
JVM_OPTS="$JVM_OPTS -XX:+PrintGCTimeStamps"
JVM_OPTS="$JVM_OPTS -XX:+PrintGCApplicationStoppedTime"
JVM_OPTS="$JVM_OPTS -Xloggc:/var/log/message-center/gc.log"
JVM_OPTS="$JVM_OPTS -XX:+UseGCLogFileRotation"
JVM_OPTS="$JVM_OPTS -XX:NumberOfGCLogFiles=5"
JVM_OPTS="$JVM_OPTS -XX:GCLogFileSize=100M"
# 内存溢出处理
JVM_OPTS="$JVM_OPTS -XX:+HeapDumpOnOutOfMemoryError"
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=/var/log/message-center/"
# 性能监控
JVM_OPTS="$JVM_OPTS -XX:+PrintStringDeduplicationStatistics"
JVM_OPTS="$JVM_OPTS -XX:+UseStringDeduplication"
# 启动应用
java $JVM_OPTS -jar message-center.jar
2. GC调优策略
GC监控和分析:
java
/**
* GC监控服务
*/
@Service
public class GCMonitorService {
private final List<GarbageCollectorMXBean> gcBeans;
private final MemoryMXBean memoryBean;
public GCMonitorService() {
this.gcBeans = ManagementFactory.getGarbageCollectorMXBeans();
this.memoryBean = ManagementFactory.getMemoryMXBean();
}
/**
* 获取GC统计信息
*/
@Scheduled(fixedDelay = 60000) // 每分钟收集一次
public void collectGCStats() {
for (GarbageCollectorMXBean gcBean : gcBeans) {
String gcName = gcBean.getName();
long collectionCount = gcBean.getCollectionCount();
long collectionTime = gcBean.getCollectionTime();
// 记录GC指标
Gauge.builder("jvm.gc.collection.count")
.tag("gc", gcName)
.register(meterRegistry, gcBean, GarbageCollectorMXBean::getCollectionCount);
Gauge.builder("jvm.gc.collection.time")
.tag("gc", gcName)
.register(meterRegistry, gcBean, GarbageCollectorMXBean::getCollectionTime);
log.debug("GC统计: name={}, count={}, time={}ms", gcName, collectionCount, collectionTime);
}
// 内存使用情况
MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
MemoryUsage nonHeapUsage = memoryBean.getNonHeapMemoryUsage();
log.debug("内存使用: heap={}MB, nonHeap={}MB",
heapUsage.getUsed() / 1024 / 1024,
nonHeapUsage.getUsed() / 1024 / 1024);
}
/**
* 检查GC异常
*/
public void checkGCAnomaly() {
for (GarbageCollectorMXBean gcBean : gcBeans) {
long avgGCTime = calculateAverageGCTime(gcBean);
// 检查GC时间是否过长
if (avgGCTime > 100) { // 平均GC时间超过100ms
alertService.sendAlert("GC时间过长",
String.format("GC %s 平均时间: %dms", gcBean.getName(), avgGCTime));
}
// 检查GC频率是否过高
long gcFrequency = calculateGCFrequency(gcBean);
if (gcFrequency > 10) { // 每分钟GC超过10次
alertService.sendAlert("GC频率过高",
String.format("GC %s 频率: %d次/分钟", gcBean.getName(), gcFrequency));
}
}
}
}
数据库调优
1. 连接池优化
HikariCP连接池配置:
java
/**
* 数据库连接池配置
*/
@Configuration
public class DataSourceOptimizationConfig {
@Bean
@Primary
public DataSource optimizedDataSource() {
HikariConfig config = new HikariConfig();
// 基本连接信息
config.setJdbcUrl("jdbc:mysql://localhost:3306/message_center");
config.setUsername("root");
config.setPassword("password");
config.setDriverClassName("com.mysql.cj.jdbc.Driver");
// 连接池大小配置
config.setMaximumPoolSize(20); // 最大连接数
config.setMinimumIdle(5); // 最小空闲连接数
// 连接超时配置
config.setConnectionTimeout(30000); // 连接超时30秒
config.setIdleTimeout(600000); // 空闲超时10分钟
config.setMaxLifetime(1800000); // 最大生命周期30分钟
// 连接测试配置
config.setConnectionTestQuery("SELECT 1");
config.setValidationTimeout(5000);
// 性能优化配置
config.setLeakDetectionThreshold(60000); // 连接泄漏检测
config.setRegisterMbeans(true); // 启用JMX监控
// MySQL特定优化
config.addDataSourceProperty("cachePrepStmts", "true");
config.addDataSourceProperty("prepStmtCacheSize", "250");
config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048");
config.addDataSourceProperty("useServerPrepStmts", "true");
config.addDataSourceProperty("useLocalSessionState", "true");
config.addDataSourceProperty("rewriteBatchedStatements", "true");
config.addDataSourceProperty("cacheResultSetMetadata", "true");
config.addDataSourceProperty("cacheServerConfiguration", "true");
config.addDataSourceProperty("elideSetAutoCommits", "true");
config.addDataSourceProperty("maintainTimeStats", "false");
return new HikariDataSource(config);
}
}
/**
* 连接池监控
*/
@Component
public class ConnectionPoolMonitor {
@Autowired
private HikariDataSource dataSource;
/**
* 监控连接池状态
*/
@Scheduled(fixedDelay = 30000)
public void monitorConnectionPool() {
HikariPoolMXBean poolBean = dataSource.getHikariPoolMXBean();
int activeConnections = poolBean.getActiveConnections();
int idleConnections = poolBean.getIdleConnections();
int totalConnections = poolBean.getTotalConnections();
int threadsAwaitingConnection = poolBean.getThreadsAwaitingConnection();
// 记录指标
Gauge.builder("hikari.connections.active")
.register(meterRegistry, poolBean, HikariPoolMXBean::getActiveConnections);
Gauge.builder("hikari.connections.idle")
.register(meterRegistry, poolBean, HikariPoolMXBean::getIdleConnections);
Gauge.builder("hikari.connections.total")
.register(meterRegistry, poolBean, HikariPoolMXBean::getTotalConnections);
// 检查连接池健康状态
double utilizationRate = (double) activeConnections / totalConnections;
if (utilizationRate > 0.8) {
log.warn("连接池使用率过高: {}%", utilizationRate * 100);
}
if (threadsAwaitingConnection > 0) {
log.warn("有{}个线程等待数据库连接", threadsAwaitingConnection);
}
}
}
2. SQL优化
慢查询监控和优化:
java
/**
* SQL性能监控
*/
@Component
public class SQLPerformanceMonitor {
private final Map<String, SQLMetrics> sqlMetricsMap = new ConcurrentHashMap<>();
/**
* 记录SQL执行
*/
public void recordSQLExecution(String sql, long executionTime, boolean success) {
String sqlKey = normalizeSql(sql);
SQLMetrics metrics = sqlMetricsMap.computeIfAbsent(sqlKey, k -> new SQLMetrics());
metrics.recordExecution(executionTime, success);
// 检查慢查询
if (executionTime > 1000) { // 超过1秒的查询
log.warn("检测到慢查询: sql={}, time={}ms", sqlKey, executionTime);
// 记录慢查询指标
Counter.builder("sql.slow.count")
.tag("sql", sqlKey)
.register(meterRegistry)
.increment();
}
}
/**
* 标准化SQL(去除参数)
*/
private String normalizeSql(String sql) {
return sql.replaceAll("\\d+", "?") // 数字参数
.replaceAll("'[^']*'", "?") // 字符串参数
.replaceAll("\\s+", " ") // 多个空格
.trim();
}
/**
* 获取SQL性能报告
*/
@Scheduled(fixedDelay = 300000) // 每5分钟生成报告
public void generatePerformanceReport() {
List<SQLPerformanceReport> reports = sqlMetricsMap.entrySet().stream()
.map(entry -> {
String sql = entry.getKey();
SQLMetrics metrics = entry.getValue();
return SQLPerformanceReport.builder()
.sql(sql)
.totalExecutions(metrics.getTotalExecutions())
.averageTime(metrics.getAverageTime())
.maxTime(metrics.getMaxTime())
.errorRate(metrics.getErrorRate())
.build();
})
.sorted((a, b) -> Long.compare(b.getAverageTime(), a.getAverageTime()))
.limit(10) // 取前10个最慢的查询
.collect(Collectors.toList());
log.info("SQL性能报告: {}", JsonUtil.toJson(reports));
}
}
/**
* SQL优化建议
*/
@Service
public class SQLOptimizationService {
/**
* 分析SQL并提供优化建议
*/
public List<OptimizationSuggestion> analyzeSql(String sql, long executionTime) {
List<OptimizationSuggestion> suggestions = new ArrayList<>();
// 1. 检查是否缺少索引
if (sql.toLowerCase().contains("where") && executionTime > 500) {
suggestions.add(OptimizationSuggestion.builder()
.type("INDEX")
.description("考虑在WHERE条件字段上添加索引")
.priority("HIGH")
.build());
}
// 2. 检查是否使用了SELECT *
if (sql.toLowerCase().contains("select *")) {
suggestions.add(OptimizationSuggestion.builder()
.type("SELECT")
.description("避免使用SELECT *,明确指定需要的字段")
.priority("MEDIUM")
.build());
}
// 3. 检查是否有不必要的JOIN
long joinCount = sql.toLowerCase().chars()
.mapToObj(c -> (char) c)
.map(String::valueOf)
.collect(Collectors.joining())
.split("join")
.length - 1;
if (joinCount > 3) {
suggestions.add(OptimizationSuggestion.builder()
.type("JOIN")
.description("考虑减少JOIN的数量或使用子查询")
.priority("MEDIUM")
.build());
}
// 4. 检查是否有ORDER BY但没有LIMIT
if (sql.toLowerCase().contains("order by") &&
!sql.toLowerCase().contains("limit")) {
suggestions.add(OptimizationSuggestion.builder()
.type("LIMIT")
.description("ORDER BY查询建议添加LIMIT限制结果集大小")
.priority("MEDIUM")
.build());
}
return suggestions;
}
}
缓存优化
1. 多级缓存策略
缓存架构优化:
java
/**
* 多级缓存管理器
*/
@Component
public class MultiLevelCacheManager {
private final Cache<String, Object> l1Cache; // 本地缓存
private final RedisTemplate<String, Object> l2Cache; // 分布式缓存
public MultiLevelCacheManager(RedisTemplate<String, Object> redisTemplate) {
// L1缓存配置(Caffeine)
this.l1Cache = Caffeine.newBuilder()
.maximumSize(10000)
.expireAfterWrite(5, TimeUnit.MINUTES)
.expireAfterAccess(2, TimeUnit.MINUTES)
.recordStats()
.removalListener((key, value, cause) -> {
log.debug("L1缓存移除: key={}, cause={}", key, cause);
})
.build();
this.l2Cache = redisTemplate;
}
/**
* 获取缓存值
*/
@SuppressWarnings("unchecked")
public <T> T get(String key, Class<T> type) {
// 1. 先查L1缓存
Object value = l1Cache.getIfPresent(key);
if (value != null) {
recordCacheHit("L1", key);
return (T) value;
}
// 2. 查L2缓存
value = l2Cache.opsForValue().get(key);
if (value != null) {
// 回写到L1缓存
l1Cache.put(key, value);
recordCacheHit("L2", key);
return (T) value;
}
recordCacheMiss(key);
return null;
}
/**
* 设置缓存值
*/
public void put(String key, Object value, Duration ttl) {
// 同时写入L1和L2缓存
l1Cache.put(key, value);
l2Cache.opsForValue().set(key, value, ttl);
}
/**
* 删除缓存
*/
public void evict(String key) {
l1Cache.invalidate(key);
l2Cache.delete(key);
}
/**
* 获取缓存统计
*/
@Scheduled(fixedDelay = 60000)
public void reportCacheStats() {
CacheStats stats = l1Cache.stats();
// 记录L1缓存指标
Gauge.builder("cache.l1.hit.ratio")
.register(meterRegistry, stats, CacheStats::hitRate);
Gauge.builder("cache.l1.size")
.register(meterRegistry, l1Cache, cache -> cache.estimatedSize());
log.info("L1缓存统计: hitRate={}, size={}, evictions={}",
stats.hitRate(), l1Cache.estimatedSize(), stats.evictionCount());
}
/**
* 记录缓存命中
*/
private void recordCacheHit(String level, String key) {
Counter.builder("cache.hit")
.tag("level", level)
.register(meterRegistry)
.increment();
}
/**
* 记录缓存未命中
*/
private void recordCacheMiss(String key) {
Counter.builder("cache.miss")
.register(meterRegistry)
.increment();
}
}
/**
* 缓存预热服务
*/
@Service
public class CacheWarmupService {
@Autowired
private MultiLevelCacheManager cacheManager;
@Autowired
private MessageTemplateService templateService;
/**
* 应用启动时预热缓存
*/
@EventListener(ApplicationReadyEvent.class)
public void warmupCache() {
log.info("开始缓存预热");
try {
// 1. 预热消息模板
warmupMessageTemplates();
// 2. 预热用户配置
warmupUserConfigs();
// 3. 预热系统配置
warmupSystemConfigs();
log.info("缓存预热完成");
} catch (Exception e) {
log.error("缓存预热失败", e);
}
}
/**
* 预热消息模板
*/
private void warmupMessageTemplates() {
List<MessageTemplate> templates = templateService.getAllActiveTemplates();
for (MessageTemplate template : templates) {
String key = "template:" + template.getId();
cacheManager.put(key, template, Duration.ofHours(1));
}
log.info("预热消息模板: count={}", templates.size());
}
/**
* 定时刷新热点数据
*/
@Scheduled(fixedDelay = 300000) // 每5分钟刷新一次
public void refreshHotData() {
// 刷新热点模板
List<String> hotTemplateIds = getHotTemplateIds();
for (String templateId : hotTemplateIds) {
MessageTemplate template = templateService.getById(templateId);
if (template != null) {
String key = "template:" + templateId;
cacheManager.put(key, template, Duration.ofHours(1));
}
}
}
/**
* 获取热点模板ID
*/
private List<String> getHotTemplateIds() {
// 基于访问频率获取热点模板
return templateAccessService.getTopAccessedTemplates(100);
}
}
🔧 系统扩展性设计
水平扩展策略
1. 无状态服务设计
无状态架构实现:
java
/**
* 无状态服务配置
*/
@Configuration
public class StatelessServiceConfig {
/**
* 会话管理配置
*/
@Bean
public RedisTemplate<String, Object> sessionRedisTemplate() {
RedisTemplate<String, Object> template = new RedisTemplate<>();
template.setConnectionFactory(redisConnectionFactory);
// 使用JSON序列化
template.setDefaultSerializer(new GenericJackson2JsonRedisSerializer());
template.setKeySerializer(new StringRedisSerializer());
return template;
}
/**
* 分布式会话配置
*/
@Bean
public SessionRepository<RedisSession> sessionRepository() {
RedisSessionRepository repository = new RedisSessionRepository(sessionRedisTemplate());
repository.setDefaultMaxInactiveInterval(Duration.ofMinutes(30));
return repository;
}
}
/**
* 状态管理服务
*/
@Service
public class StatelessContextService {
@Autowired
private RedisTemplate<String, Object> redisTemplate;
/**
* 存储请求上下文
*/
public void storeContext(String requestId, RequestContext context) {
String key = "context:" + requestId;
redisTemplate.opsForValue().set(key, context, Duration.ofMinutes(10));
}
/**
* 获取请求上下文
*/
public RequestContext getContext(String requestId) {
String key = "context:" + requestId;
return (RequestContext) redisTemplate.opsForValue().get(key);
}
/**
* 清理上下文
*/
public void clearContext(String requestId) {
String key = "context:" + requestId;
redisTemplate.delete(key);
}
}
2. 负载均衡配置
Nginx负载均衡配置:
nginx
# nginx.conf
upstream message-center-backend {
# 负载均衡策略:ip_hash保证会话粘性
ip_hash;
# 后端服务器列表
server 192.168.1.10:8080 weight=3 max_fails=3 fail_timeout=30s;
server 192.168.1.11:8080 weight=3 max_fails=3 fail_timeout=30s;
server 192.168.1.12:8080 weight=2 max_fails=3 fail_timeout=30s;
# 备用服务器
server 192.168.1.13:8080 backup;
# 健康检查
check interval=3000 rise=2 fall=3 timeout=1000 type=http;
check_http_send "GET /actuator/health HTTP/1.0\r\n\r\n";
check_http_expect_alive http_2xx http_3xx;
}
server {
listen 80;
server_name message-center.example.com;
# 访问日志
access_log /var/log/nginx/message-center.access.log main;
error_log /var/log/nginx/message-center.error.log;
# 限流配置
limit_req_zone $binary_remote_addr zone=api:10m rate=100r/s;
location / {
# 应用限流
limit_req zone=api burst=200 nodelay;
# 代理配置
proxy_pass http://message-center-backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# 超时配置
proxy_connect_timeout 5s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# 缓冲配置
proxy_buffering on;
proxy_buffer_size 4k;
proxy_buffers 8 4k;
# 重试配置
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
proxy_next_upstream_tries 3;
proxy_next_upstream_timeout 10s;
}
# 健康检查端点
location /health {
access_log off;
proxy_pass http://message-center-backend/actuator/health;
}
# 静态资源缓存
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
}
微服务拆分策略
1. 服务拆分原则
领域驱动的服务拆分:
markdown
服务拆分策略:
1. 消息接入服务 (message-gateway)
- 职责:API网关、请求路由、限流熔断
- 技术栈:Spring Cloud Gateway
2. 消息处理服务 (message-processor)
- 职责:消息解析、模板渲染、路由决策
- 技术栈:Spring Boot + RocketMQ
3. 通道管理服务 (channel-manager)
- 职责:通道适配、发送执行、状态回调
- 技术栈:Spring Boot + 各种SDK
4. 用户配置服务 (user-config)
- 职责:用户偏好、订阅管理、权限控制
- 技术栈:Spring Boot + MySQL
5. 数据统计服务 (data-analytics)
- 职责:数据收集、统计分析、报表生成
- 技术栈:Spring Boot + ClickHouse
服务间通信设计:
java
/**
* 服务间通信接口
*/
@FeignClient(name = "channel-manager", fallback = ChannelServiceFallback.class)
public interface ChannelService {
@PostMapping("/api/channels/send")
SendResult sendMessage(@RequestBody SendRequest request);
@GetMapping("/api/channels/{channelId}/status")
ChannelStatus getChannelStatus(@PathVariable String channelId);
@PostMapping("/api/channels/batch-send")
List<SendResult> batchSendMessage(@RequestBody List<SendRequest> requests);
}
/**
* 服务降级实现
*/
@Component
public class ChannelServiceFallback implements ChannelService {
@Override
public SendResult sendMessage(SendRequest request) {
log.warn("通道服务不可用,执行降级策略: requestId={}", request.getRequestId());
// 降级策略:保存到重试队列
retryQueueService.addToRetryQueue(request);
return SendResult.builder()
.success(false)
.message("服务暂时不可用,已加入重试队列")
.build();
}
@Override
public ChannelStatus getChannelStatus(String channelId) {
return ChannelStatus.UNKNOWN;
}
@Override
public List<SendResult> batchSendMessage(List<SendRequest> requests) {
return requests.stream()
.map(this::sendMessage)
.collect(Collectors.toList());
}
}
/**
* 分布式事务协调
*/
@Service
public class DistributedTransactionCoordinator {
@Autowired
private MessageProcessorService processorService;
@Autowired
private ChannelService channelService;
@Autowired
private UserConfigService userConfigService;
/**
* 协调消息发送事务
*/
@GlobalTransactional(rollbackFor = Exception.class)
public void coordinateMessageSending(MessageRequest request) {
try {
// 1. 处理消息
ProcessResult processResult = processorService.processMessage(request);
// 2. 检查用户配置
UserConfig userConfig = userConfigService.getUserConfig(request.getUserId());
// 3. 发送消息
SendResult sendResult = channelService.sendMessage(buildSendRequest(processResult, userConfig));
if (!sendResult.isSuccess()) {
throw new MessageSendException("消息发送失败: " + sendResult.getMessage());
}
} catch (Exception e)