一、问题场景分析
1.1 常见的订单超时场景
-
未支付订单超时取消(电商购物车)
-
超时自动确认收货(物流订单)
-
优惠券过期失效
-
团购订单超时退款
-
抢购订单支付超时
1.2 核心挑战
-
高并发:海量订单同时超时
-
实时性:精确控制超时时间
-
可靠性:确保超时处理不丢失
-
可扩展:支持业务增长
二、解决方案对比
| 方案 | 实时性 | 可靠性 | 复杂度 | 适用场景 |
|---|---|---|---|---|
| 定时任务扫表 | 低 | 中 | 低 | 小规模系统 |
| 延迟队列 | 高 | 高 | 中 | 中等规模 |
| 时间轮算法 | 高 | 高 | 高 | 高并发系统 |
| Redis过期监听 | 中 | 中 | 低 | 快速验证 |
| 分布式调度 | 高 | 高 | 高 | 大规模分布式 |
三、具体实现方案
3.1 方案一:定时任务扫表(传统方案)
实现原理
java
@Component
@Slf4j
public class OrderTimeoutScanner {
@Autowired
private OrderService orderService;
/**
* 每分钟扫描一次超时订单
*/
@Scheduled(fixedDelay = 60000) // 每分钟执行
public void scanTimeoutOrders() {
try {
// 1. 查询待处理的超时订单(分批查询)
List<Order> timeoutOrders = orderService.findTimeoutOrders(
OrderStatus.PENDING_PAYMENT,
LocalDateTime.now().minusMinutes(30), // 30分钟未支付
0,
100 // 每次处理100条
);
// 2. 处理每条订单
for (Order order : timeoutOrders) {
processTimeoutOrder(order);
}
} catch (Exception e) {
log.error("扫描超时订单失败", e);
}
}
private void processTimeoutOrder(Order order) {
// 使用乐观锁防止重复处理
int updated = orderService.cancelTimeoutOrder(
order.getId(),
order.getVersion()
);
if (updated > 0) {
log.info("成功取消超时订单: {}", order.getId());
// 触发后续操作:释放库存、发送通知等
afterCancelOrder(order);
}
}
}
优化策略
java
@Service
public class OptimizedOrderScanner {
// 1. 分表分页扫描
public void batchScanTimeoutOrders() {
int shardCount = 10; // 分表数量
int pageSize = 100;
for (int shard = 0; shard < shardCount; shard++) {
int page = 0;
boolean hasMore = true;
while (hasMore) {
// 使用游标方式,避免深分页问题
List<Long> orderIds = orderDao.findTimeoutOrderIds(
shard,
LocalDateTime.now().minusMinutes(30),
page * pageSize,
pageSize
);
if (orderIds.isEmpty()) {
hasMore = false;
} else {
// 批量处理
batchProcessOrders(orderIds);
page++;
}
}
}
}
// 2. 异步批量处理
@Async("orderTimeoutExecutor")
public CompletableFuture<Void> batchProcessOrders(List<Long> orderIds) {
// 使用批量更新减少数据库压力
int affected = orderDao.batchCancelOrders(orderIds);
log.info("批量取消订单数量: {}", affected);
// 异步发送通知
notificationService.sendBatchCancelNotification(orderIds);
return CompletableFuture.completedFuture(null);
}
}
3.2 方案二:延迟队列(RabbitMQ实现)
架构设计
java
创建订单 → 发送延迟消息(30min) → 死信队列 → 消费处理
具体实现
java
@Configuration
public class RabbitMQDelayConfig {
// 1. 定义交换机和队列
@Bean
public DirectExchange orderExchange() {
return new DirectExchange("order.exchange");
}
@Bean
public Queue orderDelayQueue() {
Map<String, Object> args = new HashMap<>();
args.put("x-dead-letter-exchange", "order.exchange");
args.put("x-dead-letter-routing-key", "order.cancel");
args.put("x-message-ttl", 30 * 60 * 1000); // 30分钟
return new Queue("order.delay.queue", true, false, false, args);
}
@Bean
public Queue orderCancelQueue() {
return new Queue("order.cancel.queue", true);
}
@Bean
public Binding delayBinding() {
return BindingBuilder.bind(orderDelayQueue())
.to(orderExchange())
.with("order.create");
}
@Bean
public Binding cancelBinding() {
return BindingBuilder.bind(orderCancelQueue())
.to(orderExchange())
.with("order.cancel");
}
}
@Service
@Slf4j
public class OrderDelayService {
@Autowired
private RabbitTemplate rabbitTemplate;
/**
* 创建订单时发送延迟消息
*/
public void sendOrderTimeoutMessage(Order order) {
OrderTimeoutMessage message = new OrderTimeoutMessage();
message.setOrderId(order.getId());
message.setCreateTime(LocalDateTime.now());
message.setTimeoutMinutes(30);
rabbitTemplate.convertAndSend(
"order.exchange",
"order.create",
message,
msg -> {
// 设置消息持久化
msg.getMessageProperties().setDeliveryMode(MessageDeliveryMode.PERSISTENT);
return msg;
}
);
log.info("发送订单超时延迟消息: {}", order.getId());
}
/**
* 消费超时订单消息
*/
@RabbitListener(queues = "order.cancel.queue")
public void handleOrderTimeout(OrderTimeoutMessage message) {
try {
Order order = orderService.getOrder(message.getOrderId());
// 检查订单状态,防止重复处理
if (order.getStatus() == OrderStatus.PENDING_PAYMENT) {
boolean success = orderService.cancelOrder(order.getId());
if (success) {
log.info("处理超时订单成功: {}", order.getId());
// 发送取消通知
notificationService.sendOrderCancelNotification(order);
// 释放库存
inventoryService.releaseStock(order.getItems());
}
}
// 手动ACK
channel.basicAck(deliveryTag, false);
} catch (Exception e) {
log.error("处理超时订单失败: {}", message.getOrderId(), e);
// 重试机制
if (retryCount < 3) {
channel.basicNack(deliveryTag, false, true);
} else {
// 记录到死信队列
channel.basicNack(deliveryTag, false, false);
}
}
}
/**
* 订单支付成功时,删除延迟消息
*/
public void cancelOrderTimeout(Long orderId) {
// 方案1:使用Redis记录已支付订单,消费时过滤
redisTemplate.opsForSet().add("paid_orders", orderId.toString());
// 方案2:发送取消消息,延迟队列消费时处理
rabbitTemplate.convertAndSend(
"order.exchange",
"order.paid",
orderId
);
}
}
3.3 方案三:时间轮算法(Time Wheel)
Netty HashedWheelTimer实现
java
@Component
public class OrderTimeoutWheel {
// 时间轮:60秒一轮,每1秒一个刻度
private final HashedWheelTimer timer = new HashedWheelTimer(
1, TimeUnit.SECONDS, 60
);
// 存储任务引用,用于取消
private final ConcurrentHashMap<Long, Timeout> timeoutTasks =
new ConcurrentHashMap<>();
@Autowired
private RedisTemplate<String, String> redisTemplate;
/**
* 添加订单超时任务
*/
public void addOrderTimeoutTask(Order order) {
long orderId = order.getId();
long delaySeconds = 30 * 60; // 30分钟
// 存储到Redis,防止重启丢失
redisTemplate.opsForValue().set(
"order:timeout:" + orderId,
String.valueOf(System.currentTimeMillis() + delaySeconds * 1000),
delaySeconds + 60, TimeUnit.SECONDS // 多加60秒缓冲
);
// 创建时间轮任务
Timeout timeout = timer.newTimeout(new TimerTask() {
@Override
public void run(Timeout timeout) throws Exception {
processOrderTimeout(orderId);
}
}, delaySeconds, TimeUnit.SECONDS);
timeoutTasks.put(orderId, timeout);
}
/**
* 处理订单超时
*/
private void processOrderTimeout(Long orderId) {
// 双重检查:从数据库确认订单状态
Order order = orderService.getOrder(orderId);
if (order != null && order.getStatus() == OrderStatus.PENDING_PAYMENT) {
// 使用分布式锁,防止集群多节点重复处理
String lockKey = "lock:order:cancel:" + orderId;
boolean locked = redisTemplate.opsForValue()
.setIfAbsent(lockKey, "1", 30, TimeUnit.SECONDS);
if (locked) {
try {
boolean success = orderService.cancelOrder(orderId);
if (success) {
log.info("时间轮处理超时订单: {}", orderId);
// 清理任务
timeoutTasks.remove(orderId);
redisTemplate.delete("order:timeout:" + orderId);
}
} finally {
redisTemplate.delete(lockKey);
}
}
}
}
/**
* 取消超时任务(用户已支付)
*/
public void cancelOrderTimeoutTask(Long orderId) {
// 移除时间轮任务
Timeout timeout = timeoutTasks.remove(orderId);
if (timeout != null) {
timeout.cancel();
}
// 清理Redis记录
redisTemplate.delete("order:timeout:" + orderId);
}
/**
* 应用重启时恢复任务
*/
@PostConstruct
public void recoverTimeoutTasks() {
// 从Redis恢复未处理的超时任务
Set<String> keys = redisTemplate.keys("order:timeout:*");
for (String key : keys) {
String orderIdStr = key.substring("order:timeout:".length());
Long orderId = Long.parseLong(orderIdStr);
String expireTimeStr = redisTemplate.opsForValue().get(key);
long expireTime = Long.parseLong(expireTimeStr);
long delay = expireTime - System.currentTimeMillis();
if (delay > 0) {
// 重新添加到时间轮
Timeout timeout = timer.newTimeout(new TimerTask() {
@Override
public void run(Timeout timeout) throws Exception {
processOrderTimeout(orderId);
}
}, delay, TimeUnit.MILLISECONDS);
timeoutTasks.put(orderId, timeout);
} else {
// 立即处理已超时订单
processOrderTimeout(orderId);
}
}
}
}
3.4 方案四:Redis过期监听
配置实现
java
@Configuration
public class RedisConfig {
@Bean
public RedisMessageListenerContainer container(
RedisConnectionFactory connectionFactory,
OrderTimeoutListener listener) {
RedisMessageListenerContainer container = new RedisMessageListenerContainer();
container.setConnectionFactory(connectionFactory);
// 监听所有db的过期事件
container.addMessageListener(listener,
new PatternTopic("__keyevent@*__:expired"));
return container;
}
}
@Component
@Slf4j
public class OrderTimeoutListener implements MessageListener {
@Autowired
private OrderService orderService;
@Override
public void onMessage(Message message, byte[] pattern) {
String expiredKey = new String(message.getBody());
// 只处理订单超时key
if (expiredKey.startsWith("order:timeout:")) {
String orderIdStr = expiredKey.substring("order:timeout:".length());
Long orderId = Long.parseLong(orderIdStr);
// 异步处理,避免阻塞监听线程
CompletableFuture.runAsync(() -> {
processOrderTimeout(orderId);
});
}
}
private void processOrderTimeout(Long orderId) {
// 使用分布式锁
String lockKey = "lock:order:expire:" + orderId;
boolean locked = redisTemplate.opsForValue()
.setIfAbsent(lockKey, "1", 10, TimeUnit.SECONDS);
if (locked) {
try {
Order order = orderService.getOrder(orderId);
if (order != null && order.getStatus() == OrderStatus.PENDING_PAYMENT) {
orderService.cancelOrder(orderId);
log.info("Redis过期监听处理订单: {}", orderId);
}
} finally {
redisTemplate.delete(lockKey);
}
}
}
}
@Service
public class OrderRedisTimeoutService {
@Autowired
private RedisTemplate<String, String> redisTemplate;
/**
* 设置订单超时
*/
public void setOrderTimeout(Long orderId, int timeoutMinutes) {
String key = "order:timeout:" + orderId;
// 存储订单信息(JSON格式)
OrderInfo orderInfo = new OrderInfo();
orderInfo.setOrderId(orderId);
orderInfo.setCreateTime(LocalDateTime.now());
redisTemplate.opsForValue().set(
key,
JSON.toJSONString(orderInfo),
timeoutMinutes,
TimeUnit.MINUTES
);
}
/**
* 取消订单超时
*/
public void cancelOrderTimeout(Long orderId) {
redisTemplate.delete("order:timeout:" + orderId);
}
}
3.5 方案五:RocketMQ延迟消息
实现方案
java
@Service
@Slf4j
public class RocketMQOrderTimeoutService {
@Autowired
private RocketMQTemplate rocketMQTemplate;
/**
* 发送延迟消息
*/
public void sendOrderTimeoutMessage(Order order) {
OrderTimeoutMessage message = new OrderTimeoutMessage();
message.setOrderId(order.getId());
message.setTimeoutLevel(16); // Level 16对应30分钟
Message<OrderTimeoutMessage> rocketMsg = MessageBuilder
.withPayload(message)
.setHeader(MessageConst.PROPERTY_KEYS, order.getId().toString())
.build();
// 发送延迟消息
rocketMQTemplate.syncSend(
"order-timeout-topic",
rocketMsg,
3000,
16 // 延迟级别
);
log.info("发送RocketMQ延迟消息: {}", order.getId());
}
/**
* 消费延迟消息
*/
@RocketMQMessageListener(
topic = "order-timeout-topic",
consumerGroup = "order-timeout-consumer"
)
public class OrderTimeoutConsumer implements RocketMQListener<OrderTimeoutMessage> {
@Override
public void onMessage(OrderTimeoutMessage message) {
Long orderId = message.getOrderId();
try {
// 查询订单最新状态
Order order = orderService.getOrder(orderId);
if (order == null) {
log.warn("订单不存在: {}", orderId);
return;
}
// 只有待支付状态才取消
if (order.getStatus() == OrderStatus.PENDING_PAYMENT) {
boolean success = orderService.cancelOrder(orderId);
if (success) {
log.info("RocketMQ处理超时订单: {}", orderId);
// 发送取消通知
notificationService.sendOrderCancelNotification(order);
}
} else {
log.info("订单状态已变更,无需处理: {}", orderId);
}
} catch (Exception e) {
log.error("处理超时订单异常: {}", orderId, e);
// 可根据业务需求重试
throw e;
}
}
}
/**
* 订单支付成功,取消延迟消息
* 注意:RocketMQ不支持取消已发送的延迟消息
* 需要在消费端做幂等性校验
*/
public void onOrderPaid(Long orderId) {
// 方案1:在Redis中记录已支付订单
redisTemplate.opsForValue().set(
"order:paid:" + orderId,
"1",
2, TimeUnit.HOURS
);
// 方案2:发送已支付消息,让消费者忽略
rocketMQTemplate.syncSend(
"order-paid-topic",
orderId
);
}
}
四、高级优化方案
4.1 分布式延迟调度框架
架构设计
java
/**
* 基于Redis ZSet的分布式延迟队列
*/
@Component
@Slf4j
public class DistributedDelayQueue {
private static final String DELAY_QUEUE_KEY = "delay:queue:order";
@Autowired
private RedisTemplate<String, String> redisTemplate;
@Autowired
private ThreadPoolTaskExecutor delayQueueExecutor;
/**
* 添加延迟任务
*/
public void addDelayTask(Long orderId, long delaySeconds) {
double score = System.currentTimeMillis() + delaySeconds * 1000;
redisTemplate.opsForZSet().add(
DELAY_QUEUE_KEY,
orderId.toString(),
score
);
log.info("添加延迟任务,订单ID: {}, 延迟: {}秒", orderId, delaySeconds);
}
/**
* 启动延迟队列处理器
*/
@PostConstruct
public void startDelayQueueProcessor() {
delayQueueExecutor.execute(() -> {
while (!Thread.currentThread().isInterrupted()) {
try {
// 获取到期的任务
Set<String> expiredTasks = getExpiredTasks();
if (!expiredTasks.isEmpty()) {
// 批量处理
batchProcessTasks(expiredTasks);
} else {
// 无任务,短暂休眠
Thread.sleep(100);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
log.error("延迟队列处理器异常", e);
}
}
});
}
/**
* 获取到期任务
*/
private Set<String> getExpiredTasks() {
long now = System.currentTimeMillis();
// 使用原子操作获取并移除到期任务
String luaScript =
"local expired = redis.call('zrangebyscore', KEYS[1], 0, ARGV[1], 'limit', 0, 100) " +
"if #expired > 0 then " +
" redis.call('zremrangebyrank', KEYS[1], 0, #expired-1) " +
"end " +
"return expired";
List<String> expired = redisTemplate.execute(
new DefaultRedisScript<>(luaScript, List.class),
Collections.singletonList(DELAY_QUEUE_KEY),
String.valueOf(now)
);
return new HashSet<>(expired);
}
/**
* 批量处理任务
*/
private void batchProcessTasks(Set<String> orderIds) {
// 分组处理,提高并发度
orderIds.parallelStream().forEach(orderIdStr -> {
Long orderId = Long.parseLong(orderIdStr);
// 使用分布式锁防止重复处理
String lockKey = "lock:delay:task:" + orderId;
boolean locked = redisTemplate.opsForValue()
.setIfAbsent(lockKey, "1", 30, TimeUnit.SECONDS);
if (locked) {
try {
processOrderTimeout(orderId);
} finally {
redisTemplate.delete(lockKey);
}
}
});
}
}
4.2 混合方案:分级超时处理
java
/**
* 分级超时处理策略
* 1. 15分钟:发送提醒
* 2. 30分钟:自动取消
* 3. 60分钟:强制清理
*/
@Component
public class MultiLevelTimeoutStrategy {
@Autowired
private HashedWheelTimer timer;
@Autowired
private NotificationService notificationService;
/**
* 设置多级超时任务
*/
public void setupMultiLevelTimeout(Order order) {
long orderId = order.getId();
// 一级:15分钟后提醒
timer.newTimeout(timeout -> {
sendReminder(orderId);
}, 15, TimeUnit.MINUTES);
// 二级:30分钟后取消订单
timer.newTimeout(timeout -> {
cancelOrderIfNotPaid(orderId);
}, 30, TimeUnit.MINUTES);
// 三级:60分钟后清理
timer.newTimeout(timeout -> {
cleanupOrder(orderId);
}, 60, TimeUnit.MINUTES);
}
private void sendReminder(Long orderId) {
Order order = orderService.getOrder(orderId);
if (order.getStatus() == OrderStatus.PENDING_PAYMENT) {
notificationService.sendPaymentReminder(order);
}
}
private void cancelOrderIfNotPaid(Long orderId) {
// 获取订单最新状态(带分布式锁)
boolean canceled = orderService.cancelIfNotPaid(orderId);
if (canceled) {
log.info("30分钟超时取消订单: {}", orderId);
}
}
private void cleanupOrder(Long orderId) {
// 清理订单相关数据
orderService.cleanupExpiredOrder(orderId);
}
}
五、生产环境建议
5.1 选择合适的方案
-
初创公司:使用Redis ZSet或RabbitMQ延迟队列
-
中等规模:时间轮 + Redis持久化
-
大型电商:自研分布式延迟调度系统
-
云服务用户:使用云厂商提供的消息队列延迟功能
5.2 监控与告警
java
@Component
@Slf4j
public class OrderTimeoutMonitor {
@Autowired
private MetricsService metricsService;
/**
* 监控关键指标
*/
public void monitorTimeoutMetrics() {
// 1. 超时订单数量
long timeoutCount = orderDao.countTimeoutOrders();
metricsService.gauge("order.timeout.count", timeoutCount);
// 2. 处理延迟
long processDelay = calculateProcessDelay();
metricsService.gauge("order.timeout.process.delay", processDelay);
// 3. 成功率
double successRate = calculateProcessSuccessRate();
metricsService.gauge("order.timeout.success.rate", successRate);
// 设置告警阈值
if (timeoutCount > 10000) {
alertService.sendAlert("超时订单积压警告", "数量: " + timeoutCount);
}
if (successRate < 0.95) {
alertService.sendAlert("超时处理失败率过高", "成功率: " + successRate);
}
}
/**
* 慢查询监控
*/
@Aspect
@Component
public class TimeoutProcessMonitorAspect {
@Around("execution(* com..*.processOrderTimeout(..))")
public Object monitorProcessTime(ProceedingJoinPoint joinPoint) throws Throwable {
long startTime = System.currentTimeMillis();
try {
return joinPoint.proceed();
} finally {
long cost = System.currentTimeMillis() - startTime;
if (cost > 1000) { // 超过1秒记录为慢处理
log.warn("订单超时处理缓慢,耗时: {}ms", cost);
metricsService.counter("order.timeout.slow.process").increment();
}
}
}
}
}
5.3 故障恢复机制
java
@Component
public class OrderTimeoutRecovery {
/**
* 定时补偿任务
*/
@Scheduled(cron = "0 */5 * * * *") // 每5分钟执行
public void recoverFailedTimeoutOrders() {
log.info("开始补偿处理失败的超时订单");
// 查找处理失败的订单
List<Long> failedOrders = findFailedTimeoutOrders();
// 分批重试
int batchSize = 50;
for (int i = 0; i < failedOrders.size(); i += batchSize) {
List<Long> batch = failedOrders.subList(i,
Math.min(i + batchSize, failedOrders.size()));
retryBatchOrders(batch);
}
}
/**
* 幂等性保证
*/
@Transactional
public boolean cancelOrderWithIdempotent(Long orderId, String requestId) {
// 检查是否已处理
boolean processed = checkIfProcessed(requestId);
if (processed) {
return true;
}
// 使用数据库乐观锁
int updated = orderDao.cancelOrderWithVersion(
orderId,
OrderStatus.PENDING_PAYMENT,
OrderStatus.CANCELLED_TIMEOUT
);
if (updated > 0) {
// 记录处理标记
recordProcessed(requestId);
return true;
}
return false;
}
}
六、总结建议
6.1 技术选型决策树
java
订单量 < 1万/天 → 定时任务扫表
1万 < 订单量 < 10万/天 → RabbitMQ延迟队列
10万 < 订单量 < 100万/天 → 时间轮 + Redis
订单量 > 100万/天 → 自研分布式延迟调度
6.2 最佳实践组合
-
主方案:时间轮算法处理大部分订单
-
备份方案:Redis ZSet作为持久化存储
-
补偿机制:定时任务扫描兜底
-
监控告警:实时监控处理状态
6.3 关键注意事项
-
幂等性设计:所有方案必须支持重复处理
-
分布式锁:集群环境下防止重复执行
-
可观测性:完善监控、日志、追踪
-
优雅降级:单点故障时自动切换方案
-
数据一致性:确保订单状态变更的原子性
根据业务规模、团队技术栈和SLA要求,选择合适的方案组合,并建立完善的监控和故障恢复机制。