订单事件消费者迁移方案 - 幂等性与可靠性设计

订单事件消费者迁移方案 - 幂等性与可靠性设计

1. 整体架构设计

1.1 幂等性层次

  • 消息幂等: 防止同一消息被重复消费
  • 业务幂等: 防止同一业务操作被重复执行
  • 状态机幂等: 防止状态机重复流转

1.2 核心组件

  • 幂等表(idempotent_record)
  • 分布式锁
  • 事务管理
  • 重试机制
  • 兜底补偿

2. 数据库设计

2.1 幂等记录表

less 复制代码
CREATE TABLE `idempotent_record` (
  `id` BIGINT(20) NOT NULL AUTO_INCREMENT,
  `biz_id` VARCHAR(64) NOT NULL COMMENT '业务ID(订单ID)',
  `event_type` VARCHAR(64) NOT NULL COMMENT '事件类型',
  `message_id` VARCHAR(128) NOT NULL COMMENT '消息ID',
  `idempotent_key` VARCHAR(256) NOT NULL COMMENT '幂等键',
  `status` TINYINT(2) NOT NULL DEFAULT 0 COMMENT '处理状态:0-处理中,1-成功,2-失败',
  `retry_count` INT(11) DEFAULT 0 COMMENT '重试次数',
  `error_msg` TEXT COMMENT '错误信息',
  `request_data` TEXT COMMENT '请求数据',
  `response_data` TEXT COMMENT '响应数据',
  `created_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  `updated_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  `expired_time` DATETIME NOT NULL COMMENT '过期时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `uk_idempotent_key` (`idempotent_key`),
  KEY `idx_biz_id` (`biz_id`),
  KEY `idx_message_id` (`message_id`),
  KEY `idx_status_retry` (`status`, `retry_count`),
  KEY `idx_expired_time` (`expired_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='幂等记录表';
​
-- 状态机流转记录表(可选,用于审计和兜底)
CREATE TABLE `state_machine_transition_log` (
  `id` BIGINT(20) NOT NULL AUTO_INCREMENT,
  `biz_id` VARCHAR(64) NOT NULL,
  `event_type` VARCHAR(64) NOT NULL,
  `from_state` VARCHAR(32) NOT NULL,
  `to_state` VARCHAR(32) NOT NULL,
  `transition_result` TINYINT(2) NOT NULL COMMENT '0-失败,1-成功',
  `error_msg` TEXT,
  `created_time` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`id`),
  KEY `idx_biz_id` (`biz_id`),
  KEY `idx_created_time` (`created_time`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

3. 核心代码实现

3.1 幂等性注解

java 复制代码
package com.example.userorder.annotation;
​
import java.lang.annotation.*;
import java.util.concurrent.TimeUnit;
​
@Target(ElementType.METHOD)
@Retention(RetentionPolicy.RUNTIME)
@Documented
public @interface Idempotent {
    /**
     * 幂等键SpEL表达式
     */
    String key();
    
    /**
     * 过期时间
     */
    long expireTime() default 7;
    
    /**
     * 时间单位
     */
    TimeUnit timeUnit() default TimeUnit.DAYS;
    
    /**
     * 是否需要分布式锁
     */
    boolean needLock() default true;
    
    /**
     * 锁超时时间(秒)
     */
    int lockTimeout() default 10;
}

3.2 幂等记录实体

arduino 复制代码
package com.example.userorder.entity;
​
import lombok.Data;
import java.time.LocalDateTime;
​
@Data
public class IdempotentRecord {
    private Long id;
    private String bizId;
    private String eventType;
    private String messageId;
    private String idempotentKey;
    private Integer status; // 0-处理中, 1-成功, 2-失败
    private Integer retryCount;
    private String errorMsg;
    private String requestData;
    private String responseData;
    private LocalDateTime createdTime;
    private LocalDateTime updatedTime;
    private LocalDateTime expiredTime;
    
    public enum Status {
        PROCESSING(0, "处理中"),
        SUCCESS(1, "成功"),
        FAILED(2, "失败");
        
        private final int code;
        private final String desc;
        
        Status(int code, String desc) {
            this.code = code;
            this.desc = desc;
        }
        
        public int getCode() {
            return code;
        }
    }
}

3.3 幂等服务

java 复制代码
package com.example.userorder.service;
​
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
import java.time.LocalDateTime;
import java.util.concurrent.TimeUnit;
​
@Slf4j
@Service
public class IdempotentService {
    
    @Resource
    private IdempotentRecordMapper idempotentRecordMapper;
    
    /**
     * 检查是否已处理(幂等检查)
     */
    public IdempotentRecord checkIdempotent(String idempotentKey) {
        return idempotentRecordMapper.selectByIdempotentKey(idempotentKey);
    }
    
    /**
     * 创建幂等记录(处理中状态)
     */
    @Transactional(rollbackFor = Exception.class)
    public boolean createProcessingRecord(String bizId, String eventType, 
                                         String messageId, String idempotentKey,
                                         String requestData, long expireTime, 
                                         TimeUnit timeUnit) {
        IdempotentRecord record = new IdempotentRecord();
        record.setBizId(bizId);
        record.setEventType(eventType);
        record.setMessageId(messageId);
        record.setIdempotentKey(idempotentKey);
        record.setStatus(IdempotentRecord.Status.PROCESSING.getCode());
        record.setRetryCount(0);
        record.setRequestData(requestData);
        record.setExpiredTime(LocalDateTime.now().plusSeconds(timeUnit.toSeconds(expireTime)));
        
        try {
            int rows = idempotentRecordMapper.insert(record);
            return rows > 0;
        } catch (Exception e) {
            // 唯一键冲突,说明已经有记录在处理
            log.warn("创建幂等记录失败,可能已存在: {}", idempotentKey, e);
            return false;
        }
    }
    
    /**
     * 更新为成功状态
     */
    @Transactional(rollbackFor = Exception.class)
    public void updateSuccess(String idempotentKey, String responseData) {
        idempotentRecordMapper.updateStatus(
            idempotentKey, 
            IdempotentRecord.Status.SUCCESS.getCode(),
            null,
            responseData
        );
    }
    
    /**
     * 更新为失败状态
     */
    @Transactional(rollbackFor = Exception.class)
    public void updateFailed(String idempotentKey, String errorMsg, int retryCount) {
        idempotentRecordMapper.updateStatus(
            idempotentKey,
            IdempotentRecord.Status.FAILED.getCode(),
            errorMsg,
            null
        );
        idempotentRecordMapper.incrementRetryCount(idempotentKey, retryCount);
    }
    
    /**
     * 清理过期记录
     */
    public int cleanExpiredRecords() {
        return idempotentRecordMapper.deleteExpired(LocalDateTime.now());
    }
}

3.4 分布式锁服务

java 复制代码
package com.example.userorder.service;
​
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.redis.core.StringRedisTemplate;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
​
@Slf4j
@Service
public class DistributedLockService {
    
    @Resource
    private StringRedisTemplate stringRedisTemplate;
    
    private static final String LOCK_PREFIX = "idempotent:lock:";
    
    /**
     * 尝试获取锁
     */
    public String tryLock(String key, long timeout, TimeUnit unit) {
        String lockKey = LOCK_PREFIX + key;
        String lockValue = UUID.randomUUID().toString();
        
        Boolean success = stringRedisTemplate.opsForValue()
            .setIfAbsent(lockKey, lockValue, timeout, unit);
        
        if (Boolean.TRUE.equals(success)) {
            log.debug("获取分布式锁成功: {}", lockKey);
            return lockValue;
        }
        
        log.warn("获取分布式锁失败: {}", lockKey);
        return null;
    }
    
    /**
     * 释放锁
     */
    public void unlock(String key, String lockValue) {
        String lockKey = LOCK_PREFIX + key;
        String currentValue = stringRedisTemplate.opsForValue().get(lockKey);
        
        if (lockValue != null && lockValue.equals(currentValue)) {
            stringRedisTemplate.delete(lockKey);
            log.debug("释放分布式锁成功: {}", lockKey);
        }
    }
}

3.5 幂等切面

java 复制代码
package com.example.userorder.aspect;
​
import com.example.userorder.annotation.Idempotent;
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.exception.IdempotentException;
import com.example.userorder.service.DistributedLockService;
import com.example.userorder.service.IdempotentService;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.aspectj.lang.ProceedingJoinPoint;
import org.aspectj.lang.annotation.Around;
import org.aspectj.lang.annotation.Aspect;
import org.aspectj.lang.reflect.MethodSignature;
import org.springframework.core.LocalVariableTableParameterNameDiscoverer;
import org.springframework.expression.EvaluationContext;
import org.springframework.expression.Expression;
import org.springframework.expression.ExpressionParser;
import org.springframework.expression.spel.standard.SpelExpressionParser;
import org.springframework.expression.spel.support.StandardEvaluationContext;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.lang.reflect.Method;
​
@Slf4j
@Aspect
@Component
public class IdempotentAspect {
    
    @Resource
    private IdempotentService idempotentService;
    
    @Resource
    private DistributedLockService lockService;
    
    @Resource
    private ObjectMapper objectMapper;
    
    private final ExpressionParser parser = new SpelExpressionParser();
    private final LocalVariableTableParameterNameDiscoverer discoverer = 
        new LocalVariableTableParameterNameDiscoverer();
    
    @Around("@annotation(com.example.userorder.annotation.Idempotent)")
    public Object around(ProceedingJoinPoint point) throws Throwable {
        MethodSignature signature = (MethodSignature) point.getSignature();
        Method method = signature.getMethod();
        Idempotent idempotent = method.getAnnotation(Idempotent.class);
        
        // 解析幂等键
        String idempotentKey = parseKey(idempotent.key(), method, point.getArgs());
        log.info("幂等处理开始, key: {}", idempotentKey);
        
        // 幂等检查
        IdempotentRecord existRecord = idempotentService.checkIdempotent(idempotentKey);
        if (existRecord != null) {
            return handleExistRecord(existRecord, idempotentKey);
        }
        
        // 需要分布式锁
        String lockValue = null;
        if (idempotent.needLock()) {
            lockValue = lockService.tryLock(idempotentKey, 
                idempotent.lockTimeout(), java.util.concurrent.TimeUnit.SECONDS);
            if (lockValue == null) {
                throw new IdempotentException("获取分布式锁失败,请稍后重试: " + idempotentKey);
            }
            
            // 双重检查
            existRecord = idempotentService.checkIdempotent(idempotentKey);
            if (existRecord != null) {
                lockService.unlock(idempotentKey, lockValue);
                return handleExistRecord(existRecord, idempotentKey);
            }
        }
        
        try {
            // 创建处理中记录
            String requestData = objectMapper.writeValueAsString(point.getArgs());
            boolean created = idempotentService.createProcessingRecord(
                extractBizId(point.getArgs()),
                extractEventType(point.getArgs()),
                extractMessageId(point.getArgs()),
                idempotentKey,
                requestData,
                idempotent.expireTime(),
                idempotent.timeUnit()
            );
            
            if (!created) {
                throw new IdempotentException("创建幂等记录失败: " + idempotentKey);
            }
            
            // 执行业务逻辑
            Object result = point.proceed();
            
            // 更新为成功
            String responseData = objectMapper.writeValueAsString(result);
            idempotentService.updateSuccess(idempotentKey, responseData);
            
            log.info("幂等处理成功, key: {}", idempotentKey);
            return result;
            
        } catch (Exception e) {
            // 更新为失败
            idempotentService.updateFailed(idempotentKey, e.getMessage(), 0);
            log.error("幂等处理失败, key: {}", idempotentKey, e);
            throw e;
        } finally {
            if (lockValue != null) {
                lockService.unlock(idempotentKey, lockValue);
            }
        }
    }
    
    private Object handleExistRecord(IdempotentRecord record, String key) {
        if (record.getStatus().equals(IdempotentRecord.Status.SUCCESS.getCode())) {
            log.info("消息已成功处理,跳过: {}", key);
            // 返回之前的结果或者返回幂等标识
            return parseResponse(record.getResponseData());
        } else if (record.getStatus().equals(IdempotentRecord.Status.PROCESSING.getCode())) {
            log.warn("消息正在处理中,请勿重复提交: {}", key);
            throw new IdempotentException("消息正在处理中: " + key);
        } else {
            log.warn("消息之前处理失败,将重新处理: {}", key);
            // 可以选择重新处理或抛异常
            throw new IdempotentException("消息之前处理失败: " + key);
        }
    }
    
    private String parseKey(String keyExpression, Method method, Object[] args) {
        String[] paraNameArr = discoverer.getParameterNames(method);
        EvaluationContext context = new StandardEvaluationContext();
        
        if (paraNameArr != null) {
            for (int i = 0; i < paraNameArr.length; i++) {
                context.setVariable(paraNameArr[i], args[i]);
            }
        }
        
        Expression expression = parser.parseExpression(keyExpression);
        return expression.getValue(context, String.class);
    }
    
    private String extractBizId(Object[] args) {
        // 从参数中提取业务ID(订单ID)
        // 这里需要根据实际参数结构实现
        return ""; // TODO: 实现提取逻辑
    }
    
    private String extractEventType(Object[] args) {
        // 从参数中提取事件类型
        return ""; // TODO: 实现提取逻辑
    }
    
    private String extractMessageId(Object[] args) {
        // 从参数中提取消息ID
        return ""; // TODO: 实现提取逻辑
    }
    
    private Object parseResponse(String responseData) {
        // 解析响应数据
        return null;
    }
}

3.6 新的消费者实现

typescript 复制代码
package com.example.userorder.consumer;
​
import com.example.userorder.annotation.Idempotent;
import com.example.userorder.dto.OrderEventDTO;
import com.example.userorder.service.OrderEventHandlerService;
import com.example.userorder.service.OrderStateMachineService;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.support.Acknowledgment;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
​
@Slf4j
@Component
public class UserOrderEventStrategyConsumer {
    
    @Resource
    private OrderEventHandlerService eventHandlerService;
    
    @Resource
    private OrderStateMachineService stateMachineService;
    
    /**
     * 消费订单事件
     */
    @KafkaListener(
        topics = "${kafka.topic.order-events}",
        groupId = "${kafka.group.user-order}",
        containerFactory = "kafkaListenerContainerFactory"
    )
    public void consumeOrderEvent(ConsumerRecord<String, String> record, 
                                  Acknowledgment ack) {
        try {
            log.info("接收订单事件: offset={}, key={}, value={}", 
                record.offset(), record.key(), record.value());
            
            // 解析事件
            OrderEventDTO event = parseEvent(record.value());
            
            // 处理事件(带幂等)
            processEventWithIdempotent(event);
            
            // 手动提交offset
            ack.acknowledge();
            log.info("订单事件处理成功: orderId={}, eventType={}", 
                event.getOrderId(), event.getEventType());
            
        } catch (Exception e) {
            log.error("订单事件处理失败: offset={}", record.offset(), e);
            // 不提交offset,等待重试
            // 可以根据异常类型决定是否需要人工介入
            throw new RuntimeException("事件处理失败", e);
        }
    }
    
    /**
     * 带幂等的事件处理
     */
    @Idempotent(
        key = "'order:event:' + #event.orderId + ':' + #event.eventType + ':' + #event.messageId",
        expireTime = 7,
        timeUnit = java.util.concurrent.TimeUnit.DAYS,
        needLock = true,
        lockTimeout = 10
    )
    @Transactional(rollbackFor = Exception.class)
    public void processEventWithIdempotent(OrderEventDTO event) {
        // 1. 业务处理(新逻辑)
        eventHandlerService.handleEvent(event);
        
        // 2. 调用老的状态机接口(保持兼容)
        fireStateMachine(event);
    }
    
    /**
     * 触发状态机流转(调用老接口)
     */
    private void fireStateMachine(OrderEventDTO event) {
        try {
            // 状态机本身应该有幂等保护
            stateMachineService.fireEvent(
                event.getOrderId(), 
                event.getEventType(),
                event.getEventData()
            );
            log.info("状态机流转成功: orderId={}, event={}", 
                event.getOrderId(), event.getEventType());
        } catch (Exception e) {
            log.error("状态机流转失败: orderId={}, event={}", 
                event.getOrderId(), event.getEventType(), e);
            // 记录状态机流转失败,但不影响业务处理
            // 可以通过补偿任务重试
            throw e;
        }
    }
    
    private OrderEventDTO parseEvent(String eventJson) {
        // 解析JSON
        return null; // TODO: 实现JSON解析
    }
}

3.7 事件处理服务

kotlin 复制代码
package com.example.userorder.service;
​
import com.example.userorder.dto.OrderEventDTO;
import com.example.userorder.strategy.OrderEventStrategy;
import com.example.userorder.strategy.OrderEventStrategyFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.Resource;
​
@Slf4j
@Service
public class OrderEventHandlerService {
    
    @Resource
    private OrderEventStrategyFactory strategyFactory;
    
    /**
     * 处理订单事件
     */
    @Transactional(rollbackFor = Exception.class)
    public void handleEvent(OrderEventDTO event) {
        log.info("开始处理订单事件: orderId={}, eventType={}", 
            event.getOrderId(), event.getEventType());
        
        // 获取对应的处理策略
        OrderEventStrategy strategy = strategyFactory.getStrategy(event.getEventType());
        
        if (strategy == null) {
            log.warn("未找到事件处理策略: eventType={}", event.getEventType());
            return;
        }
        
        // 执行策略
        strategy.handle(event);
        
        log.info("订单事件处理完成: orderId={}, eventType={}", 
            event.getOrderId(), event.getEventType());
    }
}

3.8 状态机服务接口(调用老系统)

typescript 复制代码
package com.example.userorder.service;
​
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
​
/**
 * 状态机服务 - 调用老的orders系统的状态机接口
 */
@Slf4j
@Service
public class OrderStateMachineService {
    
    // 这里注入老系统的状态机服务(通过RPC/HTTP/Dubbo等)
    // @Resource
    // private OldOrderStateMachineService oldStateMachineService;
    
    /**
     * 触发状态机事件
     * 注意:老的状态机应该自己实现了幂等
     */
    @Transactional(rollbackFor = Exception.class)
    public void fireEvent(String orderId, String eventType, Object eventData) {
        log.info("调用老状态机接口: orderId={}, eventType={}", orderId, eventType);
        
        try {
            // 调用老系统的状态机接口
            // oldStateMachineService.fireEvent(orderId, eventType, eventData);
            
            // 或者通过HTTP调用
            // restTemplate.postForObject(url, request, Response.class);
            
            log.info("老状态机调用成功: orderId={}, eventType={}", orderId, eventType);
        } catch (Exception e) {
            log.error("老状态机调用失败: orderId={}, eventType={}", orderId, eventType, e);
            throw new RuntimeException("状态机流转失败", e);
        }
    }
}

4. 重试机制

4.1 消费者重试配置

typescript 复制代码
package com.example.userorder.config;
​
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.kafka.config.ConcurrentKafkaListenerContainerFactory;
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.listener.ContainerProperties;
import org.springframework.retry.backoff.ExponentialBackOffPolicy;
import org.springframework.retry.policy.SimpleRetryPolicy;
import org.springframework.retry.support.RetryTemplate;
​
@Slf4j
@Configuration
public class KafkaConsumerConfig {
    
    @Bean
    public ConcurrentKafkaListenerContainerFactory<String, String> 
        kafkaListenerContainerFactory(ConsumerFactory<String, String> consumerFactory) {
        
        ConcurrentKafkaListenerContainerFactory<String, String> factory = 
            new ConcurrentKafkaListenerContainerFactory<>();
        factory.setConsumerFactory(consumerFactory);
        
        // 手动提交offset
        factory.getContainerProperties().setAckMode(ContainerProperties.AckMode.MANUAL);
        
        // 配置重试
        factory.setCommonErrorHandler(new DefaultErrorHandler((record, exception) -> {
            log.error("消息消费失败达到最大重试次数: offset={}, exception={}", 
                record.offset(), exception.getMessage());
            // 发送到死信队列或记录到数据库
            // sendToDeadLetterQueue(record);
        }, new FixedBackOff(1000L, 3L))); // 1秒间隔,重试3次
        
        return factory;
    }
    
    /**
     * 业务重试模板
     */
    @Bean
    public RetryTemplate retryTemplate() {
        RetryTemplate retryTemplate = new RetryTemplate();
        
        // 重试策略: 最多重试3次
        SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
        retryPolicy.setMaxAttempts(3);
        retryTemplate.setRetryPolicy(retryPolicy);
        
        // 退避策略: 指数退避
        ExponentialBackOffPolicy backOffPolicy = new ExponentialBackOffPolicy();
        backOffPolicy.setInitialInterval(1000L); // 初始1秒
        backOffPolicy.setMaxInterval(10000L); // 最大10秒
        backOffPolicy.setMultiplier(2.0); // 每次翻倍
        retryTemplate.setBackOffPolicy(backOffPolicy);
        
        return retryTemplate;
    }
}

4.2 异步重试任务

java 复制代码
package com.example.userorder.task;
​
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import com.example.userorder.service.OrderEventHandlerService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.time.LocalDateTime;
import java.util.List;
​
/**
 * 失败消息重试任务
 */
@Slf4j
@Component
public class FailedMessageRetryTask {
    
    @Resource
    private IdempotentRecordMapper idempotentRecordMapper;
    
    @Resource
    private OrderEventHandlerService eventHandlerService;
    
    private static final int MAX_RETRY_COUNT = 5;
    
    /**
     * 每5分钟扫描失败的消息进行重试
     */
    @Scheduled(cron = "0 */5 * * * ?")
    public void retryFailedMessages() {
        log.info("开始扫描失败消息进行重试");
        
        try {
            // 查询失败且未超过最大重试次数的记录
            List<IdempotentRecord> failedRecords = 
                idempotentRecordMapper.selectFailedRecords(MAX_RETRY_COUNT, 100);
            
            log.info("查询到{}条失败消息待重试", failedRecords.size());
            
            for (IdempotentRecord record : failedRecords) {
                retryRecord(record);
            }
            
        } catch (Exception e) {
            log.error("失败消息重试任务异常", e);
        }
    }
    
    private void retryRecord(IdempotentRecord record) {
        try {
            log.info("重试失败消息: bizId={}, eventType={}, retryCount={}", 
                record.getBizId(), record.getEventType(), record.getRetryCount());
            
            // 解析请求数据并重新处理
            // OrderEventDTO event = parseRequestData(record.getRequestData());
            // eventHandlerService.handleEvent(event);
            
            // 更新为成功
            idempotentRecordMapper.updateStatus(
                record.getIdempotentKey(),
                IdempotentRecord.Status.SUCCESS.getCode(),
                null,
                null
            );
            
            log.info("失败消息重试成功: bizId={}", record.getBizId());
            
        } catch (Exception e) {
            log.error("失败消息重试失败: bizId={}", record.getBizId(), e);
            
            // 增加重试次数
            int newRetryCount = record.getRetryCount() + 1;
            idempotentRecordMapper.incrementRetryCount(
                record.getIdempotentKey(), 
                newRetryCount
            );
            
            // 如果达到最大重试次数,发送告警
            if (newRetryCount >= MAX_RETRY_COUNT) {
                sendAlert(record);
            }
        }
    }
    
    private void sendAlert(IdempotentRecord record) {
        log.error("消息重试达到最大次数,需要人工介入: bizId={}, eventType={}", 
            record.getBizId(), record.getEventType());
        // TODO: 发送告警(钉钉/邮件/短信等)
    }
}

5. 兜底处理

5.1 状态机流转兜底

java 复制代码
package com.example.userorder.task;
​
import com.example.userorder.entity.IdempotentRecord;
import com.example.userorder.mapper.IdempotentRecordMapper;
import com.example.userorder.service.OrderStateMachineService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.util.List;
​
/**
 * 状态机流转兜底任务
 */
@Slf4j
@Component
public class StateMachineCompensationTask {
    
    @Resource
    private IdempotentRecordMapper idempotentRecordMapper;
    
    @Resource
    private OrderStateMachineService stateMachineService;
    
    /**
     * 每小时检查一次业务处理成功但状态机可能未流转的情况
     */
    @Scheduled(cron = "0 0 * * * ?")
    public void compensateStateMachine() {
        log.info("开始执行状态机流转兜底任务");
        
        try {
            // 查询业务处理成功的记录
            List<IdempotentRecord> successRecords = 
                idempotentRecordMapper.selectSuccessRecords(100);
            
            for (IdempotentRecord record : successRecords) {
                compensateRecord(record);
            }
            
            log.info("状态机流转兜底任务完成");
        } catch (Exception e) {
            log.error("状态机流转兜底任务异常", e);
        }
    }
    
    private void compensateRecord(IdempotentRecord record) {
        try {
            // 检查状态机是否已流转
            // 如果未流转,重新触发
            log.info("检查并补偿状态机流转: bizId={}, eventType={}", 
                record.getBizId(), record.getEventType());
            
            // TODO: 实现状态检查和补偿逻辑
            // 1. 查询当前订单状态
            // 2. 判断是否需要触发状态机
            // 3. 如果需要,重新调用状态机接口
            
        } catch (Exception e) {
            log.error("状态机补偿失败: bizId={}", record.getBizId(), e);
        }
    }
}

5.2 数据一致性校验

typescript 复制代码
package com.example.userorder.task;
​
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
​
/**
 * 数据一致性校验任务
 */
@Slf4j
@Component
public class DataConsistencyCheckTask {
    
    /**
     * 每天凌晨2点执行数据一致性校验
     */
    @Scheduled(cron = "0 0 2 * * ?")
    public void checkConsistency() {
        log.info("开始执行数据一致性校验");
        
        try {
            // 1. 对比新老系统的订单状态
            checkOrderStatus();
            
            // 2. 检查是否有遗漏的事件
            checkMissingEvents();
            
            // 3. 检查状态机状态是否一致
            checkStateMachineStatus();
            
            log.info("数据一致性校验完成");
        } catch (Exception e) {
            log.error("数据一致性校验异常", e);
        }
    }
    
    private void checkOrderStatus() {
        // TODO: 实现订单状态对比
    }
    
    private void checkMissingEvents() {
        // TODO: 实现事件遗漏检查
    }
    
    private void checkStateMachineStatus() {
        // TODO: 实现状态机状态检查
    }
}

6. Mapper接口

less 复制代码
package com.example.userorder.mapper;
​
import com.example.userorder.entity.IdempotentRecord;
import org.apache.ibatis.annotations.*;
import java.time.LocalDateTime;
import java.util.List;
​
@Mapper
public interface IdempotentRecordMapper {
    
    @Select("SELECT * FROM idempotent_record WHERE idempotent_key = #{idempotentKey}")
    IdempotentRecord selectByIdempotentKey(@Param("idempotentKey") String idempotentKey);
    
    @Insert("INSERT INTO idempotent_record (biz_id, event_type, message_id, " +
            "idempotent_key, status, retry_count, request_data, expired_time) " +
            "VALUES (#{bizId}, #{eventType}, #{messageId}, #{idempotentKey}, " +
            "#{status}, #{retryCount}, #{requestData}, #{expiredTime})")
    @Options(useGeneratedKeys = true, keyProperty = "id")
    int insert(IdempotentRecord record);
    
    @Update("UPDATE idempotent_record SET status = #{status}, " +
            "error_msg = #{errorMsg}, response_data = #{responseData}, " +
            "updated_time = NOW() WHERE idempotent_key = #{idempotentKey}")
    int updateStatus(@Param("idempotentKey") String idempotentKey,
                     @Param("status") Integer status,
                     @Param("errorMsg") String errorMsg,
                     @Param("responseData") String responseData);
    
    @Update("UPDATE idempotent_record SET retry_count = #{retryCount}, " +
            "updated_time = NOW() WHERE idempotent_key = #{idempotentKey}")
    int incrementRetryCount(@Param("idempotentKey") String idempotentKey,
                           @Param("retryCount") Integer retryCount);
    
    @Select("SELECT * FROM idempotent_record WHERE status = 2 " +
            "AND retry_count < #{maxRetryCount} " +
            "AND updated_time < DATE_SUB(NOW(), INTERVAL 5 MINUTE) " +
            "LIMIT #{limit}")
    List<IdempotentRecord> selectFailedRecords(@Param("maxRetryCount") Integer maxRetryCount,
                                               @Param("limit") Integer limit);
    
    @Select("SELECT * FROM idempotent_record WHERE status = 1 " +
            "AND created_time > DATE_SUB(NOW(), INTERVAL 1 HOUR) " +
            "LIMIT #{limit}")
    List<IdempotentRecord> selectSuccessRecords(@Param("limit") Integer limit);
    
    @Delete("DELETE FROM idempotent_record WHERE expired_time < #{now}")
    int deleteExpired(@Param("now") LocalDateTime now);
}

7. 配置文件

yaml 复制代码
# application.yml
spring:
  kafka:
    bootstrap-servers: localhost:9092
    consumer:
      group-id: user-order-consumer
      auto-offset-reset: earliest
      enable-auto-commit: false  # 手动提交
      key-deserializer: org.apache.kafka.common.serialization.StringDeserializer
      value-deserializer: org.apache.kafka.common.serialization.StringDeserializer
      max-poll-records: 10
      properties:
        session.timeout.ms: 30000
        max.poll.interval.ms: 300000
    
  redis:
    host: localhost
    port: 6379
    database: 0
    timeout: 3000
    jedis:
      pool:
        max-active: 8
        max-idle: 8
        min-idle: 0
​
  datasource:
    url: jdbc:mysql://localhost:3306/user_order?useUnicode=true&characterEncoding=utf8
    username: root
    password: password
    driver-class-name: com.mysql.cj.jdbc.Driver
    
  task:
    scheduling:
      pool:
        size: 5
​
kafka:
  topic:
    order-events: order-events-topic
  group:
    user-order: user-order-consumer-group
​
# 幂等配置
idempotent:
  default-expire-days: 7
  lock-timeout-seconds: 10
  clean-expired-cron: 0 0 3 * * ?  # 每天凌晨3点清理过期记录

8. 迁移步骤

8.1 准备阶段

  1. 创建幂等表 : 执行SQL创建 idempotent_record
  2. 部署新服务 : 部署 user-order 服务但不启动消费
  3. 配置灰度: 配置流量灰度规则

8.2 灰度阶段

  1. 启动新消费者: 使用不同的消费者组并行消费
  2. 监控对比: 对比新老系统处理结果
  3. 修复问题: 发现问题及时修复

8.3 切换阶段

  1. 停止老消费者 : 停止 ordersOrderEventTransparentBroadcastConsumer
  2. 全量切换: 新消费者接管所有流量
  3. 监控告警: 加强监控和告警

8.4 收尾阶段

  1. 数据校验: 执行数据一致性校验
  2. 下线老代码: 确认无误后下线老代码
  3. 文档更新: 更新相关文档

9. 监控告警

9.1 监控指标

  • 消息消费延迟
  • 消息消费TPS
  • 幂等拦截率
  • 失败重试次数
  • 状态机流转成功率

9.2 告警规则

arduino 复制代码
package com.example.userorder.monitor;
​
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
​
@Slf4j
@Component
public class MonitorService {
    
    /**
     * 消费延迟告警
     */
    public void alertConsumerLag(String topic, long lag) {
        if (lag > 10000) {
            log.error("消费延迟过高: topic={}, lag={}", topic, lag);
            // 发送告警
        }
    }
    
    /**
     * 重试次数告警
     */
    public void alertRetryCount(String bizId, int retryCount) {
        if (retryCount > 3) {
            log.error("重试次数过多: bizId={}, retryCount={}", bizId, retryCount);
            // 发送告警
        }
    }
    
    /**
     * 状态机流转失败告警
     */
    public void alertStateMachineFailed(String orderId, String event) {
        log.error("状态机流转失败: orderId={}, event={}", orderId, event);
        // 发送告警
    }
}

10. 总结

10.1 幂等保证

消息幂等 : 通过唯一约束的幂等表 + 分布式锁 ✅ 业务幂等 : 通过事务 + 幂等记录状态 ✅ 状态机幂等: 调用老接口,老接口自身保证幂等

10.2 可靠性保证

重试机制 : Kafka重试 + 数据库记录重试 + 定时任务重试 ✅ 兜底补偿 : 定时任务检查并补偿失败的流转 ✅ 监控告警: 多维度监控 + 及时告警

10.3 注意事项

⚠️ 幂等键设计要合理,包含订单ID+事件类型+消息ID ⚠️ 分布式锁超时时间要大于业务处理时间 ⚠️ 重试次数要合理,避免无限重试 ⚠️ 定期清理过期的幂等记录 ⚠️ 灰度期间要做好数据对比和监控


11. 扩展优化

11.1 性能优化

  • 幂等表分库分表
  • Redis缓存热点幂等键
  • 异步处理非核心逻辑

11.2 功能增强

  • 支持消息延迟消费
  • 支持消息优先级
  • 支持动态路由策略

完整方案已编写完成,包含:

  • 数据库设计
  • 完整代码实现
  • 重试机制
  • 兜底处理
  • 监控告警
  • 迁移步骤
相关推荐
用户2345267009822 小时前
Python实现异步任务队列深度好文
后端·python
00后程序员3 小时前
如何防止 IPA 被反编译,从结构隐藏到符号混淆的多层防护方案
后端
SamDeepThinking3 小时前
在 MySQL 里,不建议使用长事务的根因
后端·mysql
文心快码BaiduComate3 小时前
用文心快码写个「隐私优先」的本地会议助手
前端·后端·程序员
q***96584 小时前
Spring Boot 集成 MyBatis 全面讲解
spring boot·后端·mybatis
m0_639817154 小时前
基于springboot教学资料管理系统【带源码和文档】
java·spring boot·后端
i***66504 小时前
SpringBoot实战(三十二)集成 ofdrw,实现 PDF 和 OFD 的转换、SM2 签署OFD
spring boot·后端·pdf
qq_12498707534 小时前
基于springboot的建筑业数据管理系统的设计与实现(源码+论文+部署+安装)
java·spring boot·后端·毕业设计
IT_陈寒5 小时前
Vite 5.0实战:10个你可能不知道的性能优化技巧与插件生态深度解析
前端·人工智能·后端