订单事件消费者迁移方案 - 幂等性与可靠性设计
1. 整体架构设计
1.1 幂等性层次
- 消息幂等: 防止同一消息被重复消费
- 业务幂等: 防止同一业务操作被重复执行
- 状态机幂等: 防止状态机重复流转
1.2 核心组件
- 幂等表(idempotent_record)
- 分布式锁
- 事务管理
- 重试机制
- 兜底补偿
2. 数据库设计
2.1 幂等记录表
less
CREATE TABLE `idempotent_record` (
`id` BIGINT(20) NOT NULL AUTO_INCREMENT,
`biz_id` VARCHAR(64) NOT NULL COMMENT '业务ID(订单ID)',
`event_type` VARCHAR(64) NOT NULL COMMENT '事件类型',
`message_id` VARCHAR(128) NOT NULL COMMENT '消息ID',
`idempotent_key` VARCHAR(256) NOT NULL COMMENT '幂等键',
`status` TINYINT(2) NOT NULL DEFAULT 0 COMMENT '处理状态:0-处理中,1-成功,2-失败',
`retry_count` INT(11) DEFAULT 0 COMMENT '重试次数',
`error_msg` TEXT COMMENT '错误信息',
`request_data` TEXT COMMENT '请求数据',
`response_data` TEXT COMMENT '响应数据',
`created_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
`updated_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`expired_time` DATETIME NOT NULL COMMENT '过期时间',
PRIMARY KEY (`id`),
UNIQUE KEY `uk_idempotent_key` (`idempotent_key`),
KEY `idx_biz_id` (`biz_id`),
KEY `idx_message_id` (`message_id`),
KEY `idx_status_retry` (`status`, `retry_count`),
KEY `idx_expired_time` (`expired_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='幂等记录表';
-- 状态机流转记录表(可选,用于审计和兜底)
CREATE TABLE `state_machine_transition_log` (
`id` BIGINT(20) NOT NULL AUTO_INCREMENT,
`biz_id` VARCHAR(64) NOT NULL,
`event_type` VARCHAR(64) NOT NULL,
`from_state` VARCHAR(32) NOT NULL,
`to_state` VARCHAR(32) NOT NULL,
`transition_result` TINYINT(2) NOT NULL COMMENT '0-失败,1-成功',
`error_msg` TEXT,
`created_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `idx_biz_id` (`biz_id`),
KEY `idx_created_time` (`created_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
3. 核心代码实现
3.1 幂等性注解
java
package com.example.userorder.annotation;
import java.lang.annotation.*;
import java.util.concurrent.TimeUnit;
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface Idempotent {
/**
* 幂等键SpEL表达式
*/
String key();
/**
* 过期时间
*/
long expireTime() default 7;
/**
* 时间单位
*/
TimeUnit timeUnit() default TimeUnit.DAYS;
/**
* 是否需要分布式锁
*/
boolean needLock() default true;
/**
* 锁超时时间(秒)
*/
int lockTimeout() default 10;
}
3.2 幂等记录实体
arduino
package com.example.userorder.entity;
import lombok.Data;
import java.time.LocalDateTime;
@Data
public class IdempotentRecord {
private Long id;
private String bizId;
private String eventType;
private String messageId;
private String idempotentKey;
private Integer status; // 0-处理中, 1-成功, 2-失败
private Integer retryCount;
private String errorMsg;
private String requestData;
private String responseData;
private LocalDateTime createdTime;
private LocalDateTime updatedTime;
private LocalDateTime expiredTime;
public enum Status {
PROCESSING(0, "处理中"),
SUCCESS(1, "成功"),
FAILED(2, "失败");
private final int code;
private final String desc;
Status(int code, String desc) {
this.code = code;
this.desc = desc;
}
public int getCode() {
return code;
}
}
}
3.3 幂等服务
java
package com.example.userorder.service;
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
import java.time.LocalDateTime;
import java.util.concurrent.TimeUnit;
@Slf4j
@Service
public class IdempotentService {
@Resource
private IdempotentRecordMapper idempotentRecordMapper;
/**
* 检查是否已处理(幂等检查)
*/
public IdempotentRecord checkIdempotent(String idempotentKey) {
return idempotentRecordMapper.selectByIdempotentKey(idempotentKey);
}
/**
* 创建幂等记录(处理中状态)
*/
@Transactional(rollbackFor = Exception.class)
public boolean createProcessingRecord(String bizId, String eventType,
String messageId, String idempotentKey,
String requestData, long expireTime,
TimeUnit timeUnit) {
IdempotentRecord record = new IdempotentRecord();
record.setBizId(bizId);
record.setEventType(eventType);
record.setMessageId(messageId);
record.setIdempotentKey(idempotentKey);
record.setStatus(IdempotentRecord.Status.PROCESSING.getCode());
record.setRetryCount(0);
record.setRequestData(requestData);
record.setExpiredTime(LocalDateTime.now().plusSeconds(timeUnit.toSeconds(expireTime)));
try {
int rows = idempotentRecordMapper.insert(record);
return rows > 0;
} catch (Exception e) {
// 唯一键冲突,说明已经有记录在处理
log.warn("创建幂等记录失败,可能已存在: {}", idempotentKey, e);
return false;
}
}
/**
* 更新为成功状态
*/
@Transactional(rollbackFor = Exception.class)
public void updateSuccess(String idempotentKey, String responseData) {
idempotentRecordMapper.updateStatus(
idempotentKey,
IdempotentRecord.Status.SUCCESS.getCode(),
null,
responseData
);
}
/**
* 更新为失败状态
*/
@Transactional(rollbackFor = Exception.class)
public void updateFailed(String idempotentKey, String errorMsg, int retryCount) {
idempotentRecordMapper.updateStatus(
idempotentKey,
IdempotentRecord.Status.FAILED.getCode(),
errorMsg,
null
);
idempotentRecordMapper.incrementRetryCount(idempotentKey, retryCount);
}
/**
* 清理过期记录
*/
public int cleanExpiredRecords() {
return idempotentRecordMapper.deleteExpired(LocalDateTime.now());
}
}
3.4 分布式锁服务
java
package com.example.userorder.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
@Slf4j
@Service
public class DistributedLockService {
@Resource
private StringRedisTemplate stringRedisTemplate;
private static final String LOCK_PREFIX = "idempotent:lock:";
/**
* 尝试获取锁
*/
public String tryLock(String key, long timeout, TimeUnit unit) {
String lockKey = LOCK_PREFIX + key;
String lockValue = UUID.randomUUID().toString();
Boolean success = stringRedisTemplate.opsForValue()
.setIfAbsent(lockKey, lockValue, timeout, unit);
if (Boolean.TRUE.equals(success)) {
log.debug("获取分布式锁成功: {}", lockKey);
return lockValue;
}
log.warn("获取分布式锁失败: {}", lockKey);
return null;
}
/**
* 释放锁
*/
public void unlock(String key, String lockValue) {
String lockKey = LOCK_PREFIX + key;
String currentValue = stringRedisTemplate.opsForValue().get(lockKey);
if (lockValue != null && lockValue.equals(currentValue)) {
stringRedisTemplate.delete(lockKey);
log.debug("释放分布式锁成功: {}", lockKey);
}
}
}
3.5 幂等切面
java
package com.example.userorder.aspect;
import com.example.userorder.annotation.Idempotent;
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.exception.IdempotentException;
import com.example.userorder.service.DistributedLockService;
import com.example.userorder.service.IdempotentService;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.reflect.MethodSignature;
import org.springframework.core.LocalVariableTableParameterNameDiscoverer;
import org.springframework.expression.EvaluationContext;
import org.springframework.expression.Expression;
import org.springframework.expression.ExpressionParser;
import org.springframework.expression.spel.standard.SpelExpressionParser;
import org.springframework.expression.spel.support.StandardEvaluationContext;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.lang.reflect.Method;
@Slf4j
@Aspect
@Component
public class IdempotentAspect {
@Resource
private IdempotentService idempotentService;
@Resource
private DistributedLockService lockService;
@Resource
private ObjectMapper objectMapper;
private final ExpressionParser parser = new SpelExpressionParser();
private final LocalVariableTableParameterNameDiscoverer discoverer =
new LocalVariableTableParameterNameDiscoverer();
@Around("@annotation(com.example.userorder.annotation.Idempotent)")
public Object around(ProceedingJoinPoint point) throws Throwable {
MethodSignature signature = (MethodSignature) point.getSignature();
Method method = signature.getMethod();
Idempotent idempotent = method.getAnnotation(Idempotent.class);
// 解析幂等键
String idempotentKey = parseKey(idempotent.key(), method, point.getArgs());
log.info("幂等处理开始, key: {}", idempotentKey);
// 幂等检查
IdempotentRecord existRecord = idempotentService.checkIdempotent(idempotentKey);
if (existRecord != null) {
return handleExistRecord(existRecord, idempotentKey);
}
// 需要分布式锁
String lockValue = null;
if (idempotent.needLock()) {
lockValue = lockService.tryLock(idempotentKey,
idempotent.lockTimeout(), java.util.concurrent.TimeUnit.SECONDS);
if (lockValue == null) {
throw new IdempotentException("获取分布式锁失败,请稍后重试: " + idempotentKey);
}
// 双重检查
existRecord = idempotentService.checkIdempotent(idempotentKey);
if (existRecord != null) {
lockService.unlock(idempotentKey, lockValue);
return handleExistRecord(existRecord, idempotentKey);
}
}
try {
// 创建处理中记录
String requestData = objectMapper.writeValueAsString(point.getArgs());
boolean created = idempotentService.createProcessingRecord(
extractBizId(point.getArgs()),
extractEventType(point.getArgs()),
extractMessageId(point.getArgs()),
idempotentKey,
requestData,
idempotent.expireTime(),
idempotent.timeUnit()
);
if (!created) {
throw new IdempotentException("创建幂等记录失败: " + idempotentKey);
}
// 执行业务逻辑
Object result = point.proceed();
// 更新为成功
String responseData = objectMapper.writeValueAsString(result);
idempotentService.updateSuccess(idempotentKey, responseData);
log.info("幂等处理成功, key: {}", idempotentKey);
return result;
} catch (Exception e) {
// 更新为失败
idempotentService.updateFailed(idempotentKey, e.getMessage(), 0);
log.error("幂等处理失败, key: {}", idempotentKey, e);
throw e;
} finally {
if (lockValue != null) {
lockService.unlock(idempotentKey, lockValue);
}
}
}
private Object handleExistRecord(IdempotentRecord record, String key) {
if (record.getStatus().equals(IdempotentRecord.Status.SUCCESS.getCode())) {
log.info("消息已成功处理,跳过: {}", key);
// 返回之前的结果或者返回幂等标识
return parseResponse(record.getResponseData());
} else if (record.getStatus().equals(IdempotentRecord.Status.PROCESSING.getCode())) {
log.warn("消息正在处理中,请勿重复提交: {}", key);
throw new IdempotentException("消息正在处理中: " + key);
} else {
log.warn("消息之前处理失败,将重新处理: {}", key);
// 可以选择重新处理或抛异常
throw new IdempotentException("消息之前处理失败: " + key);
}
}
private String parseKey(String keyExpression, Method method, Object[] args) {
String[] paraNameArr = discoverer.getParameterNames(method);
EvaluationContext context = new StandardEvaluationContext();
if (paraNameArr != null) {
for (int i = 0; i < paraNameArr.length; i++) {
context.setVariable(paraNameArr[i], args[i]);
}
}
Expression expression = parser.parseExpression(keyExpression);
return expression.getValue(context, String.class);
}
private String extractBizId(Object[] args) {
// 从参数中提取业务ID(订单ID)
// 这里需要根据实际参数结构实现
return ""; // TODO: 实现提取逻辑
}
private String extractEventType(Object[] args) {
// 从参数中提取事件类型
return ""; // TODO: 实现提取逻辑
}
private String extractMessageId(Object[] args) {
// 从参数中提取消息ID
return ""; // TODO: 实现提取逻辑
}
private Object parseResponse(String responseData) {
// 解析响应数据
return null;
}
}
3.6 新的消费者实现
typescript
package com.example.userorder.consumer;
import com.example.userorder.annotation.Idempotent;
import com.example.userorder.dto.OrderEventDTO;
import com.example.userorder.service.OrderEventHandlerService;
import com.example.userorder.service.OrderStateMachineService;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.support.Acknowledgment;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
@Slf4j
@Component
public class UserOrderEventStrategyConsumer {
@Resource
private OrderEventHandlerService eventHandlerService;
@Resource
private OrderStateMachineService stateMachineService;
/**
* 消费订单事件
*/
@KafkaListener(
topics = "${kafka.topic.order-events}",
groupId = "${kafka.group.user-order}",
containerFactory = "kafkaListenerContainerFactory"
)
public void consumeOrderEvent(ConsumerRecord<String, String> record,
Acknowledgment ack) {
try {
log.info("接收订单事件: offset={}, key={}, value={}",
record.offset(), record.key(), record.value());
// 解析事件
OrderEventDTO event = parseEvent(record.value());
// 处理事件(带幂等)
processEventWithIdempotent(event);
// 手动提交offset
ack.acknowledge();
log.info("订单事件处理成功: orderId={}, eventType={}",
event.getOrderId(), event.getEventType());
} catch (Exception e) {
log.error("订单事件处理失败: offset={}", record.offset(), e);
// 不提交offset,等待重试
// 可以根据异常类型决定是否需要人工介入
throw new RuntimeException("事件处理失败", e);
}
}
/**
* 带幂等的事件处理
*/
@Idempotent(
key = "'order:event:' + #event.orderId + ':' + #event.eventType + ':' + #event.messageId",
expireTime = 7,
timeUnit = java.util.concurrent.TimeUnit.DAYS,
needLock = true,
lockTimeout = 10
)
@Transactional(rollbackFor = Exception.class)
public void processEventWithIdempotent(OrderEventDTO event) {
// 1. 业务处理(新逻辑)
eventHandlerService.handleEvent(event);
// 2. 调用老的状态机接口(保持兼容)
fireStateMachine(event);
}
/**
* 触发状态机流转(调用老接口)
*/
private void fireStateMachine(OrderEventDTO event) {
try {
// 状态机本身应该有幂等保护
stateMachineService.fireEvent(
event.getOrderId(),
event.getEventType(),
event.getEventData()
);
log.info("状态机流转成功: orderId={}, event={}",
event.getOrderId(), event.getEventType());
} catch (Exception e) {
log.error("状态机流转失败: orderId={}, event={}",
event.getOrderId(), event.getEventType(), e);
// 记录状态机流转失败,但不影响业务处理
// 可以通过补偿任务重试
throw e;
}
}
private OrderEventDTO parseEvent(String eventJson) {
// 解析JSON
return null; // TODO: 实现JSON解析
}
}
3.7 事件处理服务
kotlin
package com.example.userorder.service;
import com.example.userorder.dto.OrderEventDTO;
import com.example.userorder.strategy.OrderEventStrategy;
import com.example.userorder.strategy.OrderEventStrategyFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
@Slf4j
@Service
public class OrderEventHandlerService {
@Resource
private OrderEventStrategyFactory strategyFactory;
/**
* 处理订单事件
*/
@Transactional(rollbackFor = Exception.class)
public void handleEvent(OrderEventDTO event) {
log.info("开始处理订单事件: orderId={}, eventType={}",
event.getOrderId(), event.getEventType());
// 获取对应的处理策略
OrderEventStrategy strategy = strategyFactory.getStrategy(event.getEventType());
if (strategy == null) {
log.warn("未找到事件处理策略: eventType={}", event.getEventType());
return;
}
// 执行策略
strategy.handle(event);
log.info("订单事件处理完成: orderId={}, eventType={}",
event.getOrderId(), event.getEventType());
}
}
3.8 状态机服务接口(调用老系统)
typescript
package com.example.userorder.service;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
/**
* 状态机服务 - 调用老的orders系统的状态机接口
*/
@Slf4j
@Service
public class OrderStateMachineService {
// 这里注入老系统的状态机服务(通过RPC/HTTP/Dubbo等)
// @Resource
// private OldOrderStateMachineService oldStateMachineService;
/**
* 触发状态机事件
* 注意:老的状态机应该自己实现了幂等
*/
@Transactional(rollbackFor = Exception.class)
public void fireEvent(String orderId, String eventType, Object eventData) {
log.info("调用老状态机接口: orderId={}, eventType={}", orderId, eventType);
try {
// 调用老系统的状态机接口
// oldStateMachineService.fireEvent(orderId, eventType, eventData);
// 或者通过HTTP调用
// restTemplate.postForObject(url, request, Response.class);
log.info("老状态机调用成功: orderId={}, eventType={}", orderId, eventType);
} catch (Exception e) {
log.error("老状态机调用失败: orderId={}, eventType={}", orderId, eventType, e);
throw new RuntimeException("状态机流转失败", e);
}
}
}
4. 重试机制
4.1 消费者重试配置
typescript
package com.example.userorder.config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.listener.ContainerProperties;
import org.springframework.retry.backoff.ExponentialBackOffPolicy;
import org.springframework.retry.policy.SimpleRetryPolicy;
import org.springframework.retry.support.RetryTemplate;
@Slf4j
@Configuration
public class KafkaConsumerConfig {
@Bean
public ConcurrentKafkaListenerContainerFactory<String, String>
kafkaListenerContainerFactory(ConsumerFactory<String, String> consumerFactory) {
ConcurrentKafkaListenerContainerFactory<String, String> factory =
new ConcurrentKafkaListenerContainerFactory<>();
factory.setConsumerFactory(consumerFactory);
// 手动提交offset
factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
// 配置重试
factory.setCommonErrorHandler(new DefaultErrorHandler((record, exception) -> {
log.error("消息消费失败达到最大重试次数: offset={}, exception={}",
record.offset(), exception.getMessage());
// 发送到死信队列或记录到数据库
// sendToDeadLetterQueue(record);
}, new FixedBackOff(1000L, 3L))); // 1秒间隔,重试3次
return factory;
}
/**
* 业务重试模板
*/
@Bean
public RetryTemplate retryTemplate() {
RetryTemplate retryTemplate = new RetryTemplate();
// 重试策略: 最多重试3次
SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
retryPolicy.setMaxAttempts(3);
retryTemplate.setRetryPolicy(retryPolicy);
// 退避策略: 指数退避
ExponentialBackOffPolicy backOffPolicy = new ExponentialBackOffPolicy();
backOffPolicy.setInitialInterval(1000L); // 初始1秒
backOffPolicy.setMaxInterval(10000L); // 最大10秒
backOffPolicy.setMultiplier(2.0); // 每次翻倍
retryTemplate.setBackOffPolicy(backOffPolicy);
return retryTemplate;
}
}
4.2 异步重试任务
java
package com.example.userorder.task;
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import com.example.userorder.service.OrderEventHandlerService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.time.LocalDateTime;
import java.util.List;
/**
* 失败消息重试任务
*/
@Slf4j
@Component
public class FailedMessageRetryTask {
@Resource
private IdempotentRecordMapper idempotentRecordMapper;
@Resource
private OrderEventHandlerService eventHandlerService;
private static final int MAX_RETRY_COUNT = 5;
/**
* 每5分钟扫描失败的消息进行重试
*/
@Scheduled(cron = "0 */5 * * * ?")
public void retryFailedMessages() {
log.info("开始扫描失败消息进行重试");
try {
// 查询失败且未超过最大重试次数的记录
List<IdempotentRecord> failedRecords =
idempotentRecordMapper.selectFailedRecords(MAX_RETRY_COUNT, 100);
log.info("查询到{}条失败消息待重试", failedRecords.size());
for (IdempotentRecord record : failedRecords) {
retryRecord(record);
}
} catch (Exception e) {
log.error("失败消息重试任务异常", e);
}
}
private void retryRecord(IdempotentRecord record) {
try {
log.info("重试失败消息: bizId={}, eventType={}, retryCount={}",
record.getBizId(), record.getEventType(), record.getRetryCount());
// 解析请求数据并重新处理
// OrderEventDTO event = parseRequestData(record.getRequestData());
// eventHandlerService.handleEvent(event);
// 更新为成功
idempotentRecordMapper.updateStatus(
record.getIdempotentKey(),
IdempotentRecord.Status.SUCCESS.getCode(),
null,
null
);
log.info("失败消息重试成功: bizId={}", record.getBizId());
} catch (Exception e) {
log.error("失败消息重试失败: bizId={}", record.getBizId(), e);
// 增加重试次数
int newRetryCount = record.getRetryCount() + 1;
idempotentRecordMapper.incrementRetryCount(
record.getIdempotentKey(),
newRetryCount
);
// 如果达到最大重试次数,发送告警
if (newRetryCount >= MAX_RETRY_COUNT) {
sendAlert(record);
}
}
}
private void sendAlert(IdempotentRecord record) {
log.error("消息重试达到最大次数,需要人工介入: bizId={}, eventType={}",
record.getBizId(), record.getEventType());
// TODO: 发送告警(钉钉/邮件/短信等)
}
}
5. 兜底处理
5.1 状态机流转兜底
java
package com.example.userorder.task;
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import com.example.userorder.service.OrderStateMachineService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.util.List;
/**
* 状态机流转兜底任务
*/
@Slf4j
@Component
public class StateMachineCompensationTask {
@Resource
private IdempotentRecordMapper idempotentRecordMapper;
@Resource
private OrderStateMachineService stateMachineService;
/**
* 每小时检查一次业务处理成功但状态机可能未流转的情况
*/
@Scheduled(cron = "0 0 * * * ?")
public void compensateStateMachine() {
log.info("开始执行状态机流转兜底任务");
try {
// 查询业务处理成功的记录
List<IdempotentRecord> successRecords =
idempotentRecordMapper.selectSuccessRecords(100);
for (IdempotentRecord record : successRecords) {
compensateRecord(record);
}
log.info("状态机流转兜底任务完成");
} catch (Exception e) {
log.error("状态机流转兜底任务异常", e);
}
}
private void compensateRecord(IdempotentRecord record) {
try {
// 检查状态机是否已流转
// 如果未流转,重新触发
log.info("检查并补偿状态机流转: bizId={}, eventType={}",
record.getBizId(), record.getEventType());
// TODO: 实现状态检查和补偿逻辑
// 1. 查询当前订单状态
// 2. 判断是否需要触发状态机
// 3. 如果需要,重新调用状态机接口
} catch (Exception e) {
log.error("状态机补偿失败: bizId={}", record.getBizId(), e);
}
}
}
5.2 数据一致性校验
typescript
package com.example.userorder.task;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
/**
* 数据一致性校验任务
*/
@Slf4j
@Component
public class DataConsistencyCheckTask {
/**
* 每天凌晨2点执行数据一致性校验
*/
@Scheduled(cron = "0 0 2 * * ?")
public void checkConsistency() {
log.info("开始执行数据一致性校验");
try {
// 1. 对比新老系统的订单状态
checkOrderStatus();
// 2. 检查是否有遗漏的事件
checkMissingEvents();
// 3. 检查状态机状态是否一致
checkStateMachineStatus();
log.info("数据一致性校验完成");
} catch (Exception e) {
log.error("数据一致性校验异常", e);
}
}
private void checkOrderStatus() {
// TODO: 实现订单状态对比
}
private void checkMissingEvents() {
// TODO: 实现事件遗漏检查
}
private void checkStateMachineStatus() {
// TODO: 实现状态机状态检查
}
}
6. Mapper接口
less
package com.example.userorder.mapper;
import com.example.userorder.entity.IdempotentRecord;
import org.apache.ibatis.annotations.*;
import java.time.LocalDateTime;
import java.util.List;
@Mapper
public interface IdempotentRecordMapper {
@Select("SELECT * FROM idempotent_record WHERE idempotent_key = #{idempotentKey}")
IdempotentRecord selectByIdempotentKey(@Param("idempotentKey") String idempotentKey);
@Insert("INSERT INTO idempotent_record (biz_id, event_type, message_id, " +
"idempotent_key, status, retry_count, request_data, expired_time) " +
"VALUES (#{bizId}, #{eventType}, #{messageId}, #{idempotentKey}, " +
"#{status}, #{retryCount}, #{requestData}, #{expiredTime})")
@Options(useGeneratedKeys = true, keyProperty = "id")
int insert(IdempotentRecord record);
@Update("UPDATE idempotent_record SET status = #{status}, " +
"error_msg = #{errorMsg}, response_data = #{responseData}, " +
"updated_time = NOW() WHERE idempotent_key = #{idempotentKey}")
int updateStatus(@Param("idempotentKey") String idempotentKey,
@Param("status") Integer status,
@Param("errorMsg") String errorMsg,
@Param("responseData") String responseData);
@Update("UPDATE idempotent_record SET retry_count = #{retryCount}, " +
"updated_time = NOW() WHERE idempotent_key = #{idempotentKey}")
int incrementRetryCount(@Param("idempotentKey") String idempotentKey,
@Param("retryCount") Integer retryCount);
@Select("SELECT * FROM idempotent_record WHERE status = 2 " +
"AND retry_count < #{maxRetryCount} " +
"AND updated_time < DATE_SUB(NOW(), INTERVAL 5 MINUTE) " +
"LIMIT #{limit}")
List<IdempotentRecord> selectFailedRecords(@Param("maxRetryCount") Integer maxRetryCount,
@Param("limit") Integer limit);
@Select("SELECT * FROM idempotent_record WHERE status = 1 " +
"AND created_time > DATE_SUB(NOW(), INTERVAL 1 HOUR) " +
"LIMIT #{limit}")
List<IdempotentRecord> selectSuccessRecords(@Param("limit") Integer limit);
@Delete("DELETE FROM idempotent_record WHERE expired_time < #{now}")
int deleteExpired(@Param("now") LocalDateTime now);
}
7. 配置文件
yaml
# application.yml
spring:
kafka:
bootstrap-servers: localhost:9092
consumer:
group-id: user-order-consumer
auto-offset-reset: earliest
enable-auto-commit: false # 手动提交
key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
max-poll-records: 10
properties:
session.timeout.ms: 30000
max.poll.interval.ms: 300000
redis:
host: localhost
port: 6379
database: 0
timeout: 3000
jedis:
pool:
max-active: 8
max-idle: 8
min-idle: 0
datasource:
url: jdbc:mysql://localhost:3306/user_order?useUnicode=true&characterEncoding=utf8
username: root
password: password
driver-class-name: com.mysql.cj.jdbc.Driver
task:
scheduling:
pool:
size: 5
kafka:
topic:
order-events: order-events-topic
group:
user-order: user-order-consumer-group
# 幂等配置
idempotent:
default-expire-days: 7
lock-timeout-seconds: 10
clean-expired-cron: 0 0 3 * * ? # 每天凌晨3点清理过期记录
8. 迁移步骤
8.1 准备阶段
- 创建幂等表 : 执行SQL创建
idempotent_record表 - 部署新服务 : 部署
user-order服务但不启动消费 - 配置灰度: 配置流量灰度规则
8.2 灰度阶段
- 启动新消费者: 使用不同的消费者组并行消费
- 监控对比: 对比新老系统处理结果
- 修复问题: 发现问题及时修复
8.3 切换阶段
- 停止老消费者 : 停止
orders的OrderEventTransparentBroadcastConsumer - 全量切换: 新消费者接管所有流量
- 监控告警: 加强监控和告警
8.4 收尾阶段
- 数据校验: 执行数据一致性校验
- 下线老代码: 确认无误后下线老代码
- 文档更新: 更新相关文档
9. 监控告警
9.1 监控指标
- 消息消费延迟
- 消息消费TPS
- 幂等拦截率
- 失败重试次数
- 状态机流转成功率
9.2 告警规则
arduino
package com.example.userorder.monitor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@Slf4j
@Component
public class MonitorService {
/**
* 消费延迟告警
*/
public void alertConsumerLag(String topic, long lag) {
if (lag > 10000) {
log.error("消费延迟过高: topic={}, lag={}", topic, lag);
// 发送告警
}
}
/**
* 重试次数告警
*/
public void alertRetryCount(String bizId, int retryCount) {
if (retryCount > 3) {
log.error("重试次数过多: bizId={}, retryCount={}", bizId, retryCount);
// 发送告警
}
}
/**
* 状态机流转失败告警
*/
public void alertStateMachineFailed(String orderId, String event) {
log.error("状态机流转失败: orderId={}, event={}", orderId, event);
// 发送告警
}
}
10. 总结
10.1 幂等保证
✅ 消息幂等 : 通过唯一约束的幂等表 + 分布式锁 ✅ 业务幂等 : 通过事务 + 幂等记录状态 ✅ 状态机幂等: 调用老接口,老接口自身保证幂等
10.2 可靠性保证
✅ 重试机制 : Kafka重试 + 数据库记录重试 + 定时任务重试 ✅ 兜底补偿 : 定时任务检查并补偿失败的流转 ✅ 监控告警: 多维度监控 + 及时告警
10.3 注意事项
⚠️ 幂等键设计要合理,包含订单ID+事件类型+消息ID ⚠️ 分布式锁超时时间要大于业务处理时间 ⚠️ 重试次数要合理,避免无限重试 ⚠️ 定期清理过期的幂等记录 ⚠️ 灰度期间要做好数据对比和监控
11. 扩展优化
11.1 性能优化
- 幂等表分库分表
- Redis缓存热点幂等键
- 异步处理非核心逻辑
11.2 功能增强
- 支持消息延迟消费
- 支持消息优先级
- 支持动态路由策略
完整方案已编写完成,包含:
- 数据库设计
- 完整代码实现
- 重试机制
- 兜底处理
- 监控告警
- 迁移步骤