延时消息是消息队列的重要特性,RocketMQ 通过巧妙的设计实现了 18 个延时级别。本文深入分析其底层实现机制,帮你理解延时消息的工作原理。
延时消息的应用场景
延时消息在实际业务中应用广泛:
- 订单超时取消:用户下单后 30 分钟未支付自动取消
- 消息重试:处理失败的消息延时后重新投递
- 定时任务:替代传统定时器实现任务调度
- 延时通知:会员到期前 7 天发送续费提醒
RocketMQ 延时级别设计
RocketMQ 预设了 18 个延时级别,不支持任意时间延时:
java
/**
* RocketMQ 默认延时级别对照表
* 级别 1 -> 1秒
* 级别 2 -> 5秒
* 级别 3 -> 10秒
* 级别 4 -> 30秒
* 级别 5 -> 1分钟
* 级别 6 -> 2分钟
* 级别 7 -> 3分钟
* 级别 8 -> 4分钟
* 级别 9 -> 5分钟
* 级别 10 -> 6分钟
* 级别 11 -> 7分钟
* 级别 12 -> 8分钟
* 级别 13 -> 9分钟
* 级别 14 -> 10分钟
* 级别 15 -> 20分钟
* 级别 16 -> 30分钟
* 级别 17 -> 1小时
* 级别 18 -> 2小时
*/
private String messageDelayLevel = "1s 5s 10s 30s 1m 2m 3m 4m 5m 6m 7m 8m 9m 10m 20m 30m 1h 2h";
核心实现原理
1. 延时消息存储机制

2. 源码实现分析
消息接收处理
java
public class CommitLog {
private static final Logger log = LoggerFactory.getLogger(CommitLog.class);
public PutMessageResult putMessage(final MessageExtBrokerInner msg) {
// 处理延时消息
if (msg.getDelayTimeLevel() > 0) {
// 验证延时级别合法性
if (msg.getDelayTimeLevel() > this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel()) {
msg.setDelayTimeLevel(this.defaultMessageStore.getScheduleMessageService().getMaxDelayLevel());
}
// 修改消息Topic为延时Topic
topic = TopicValidator.RMQ_SYS_SCHEDULE_TOPIC;
// 根据延时级别计算队列ID(延时级别-1)
queueId = ScheduleMessageService.delayLevel2QueueId(msg.getDelayTimeLevel());
// 备份原始Topic和QueueId
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_TOPIC, msg.getTopic());
MessageAccessor.putProperty(msg, MessageConst.PROPERTY_REAL_QUEUE_ID,
String.valueOf(msg.getQueueId()));
msg.setPropertiesString(MessageDecoder.messageProperties2String(msg.getProperties()));
msg.setTopic(topic);
msg.setQueueId(queueId);
}
// 存储消息
long beginTime = this.defaultMessageStore.getSystemClock().now();
PutMessageResult result = this.commitLog.putMessage(msg);
long elapsedTime = this.defaultMessageStore.getSystemClock().now() - beginTime;
if (elapsedTime > 500) {
log.warn("putMessage cost time(ms)={}, bodyLength={}", elapsedTime, msg.getBody().length);
}
return result;
}
}
延时任务调度
java
public class ScheduleMessageService extends ConfigManager {
private static final Logger log = LoggerFactory.getLogger(ScheduleMessageService.class);
// 每个延时级别对应一个定时任务
private final ConcurrentMap<Integer, Long> delayLevelTable =
new ConcurrentHashMap<>(32);
// 定时任务执行器
private final Timer timer = new Timer("ScheduleMessageTimerThread", true);
// 延时消息线程池配置
private int deliverThreadPoolNums = Runtime.getRuntime().availableProcessors();
public void start() {
// 加载延时级别配置
if (started.compareAndSet(false, true)) {
this.load();
// 为每个延时级别创建定时任务
for (Map.Entry<Integer, Long> entry : this.delayLevelTable.entrySet()) {
Integer level = entry.getKey();
Long timeDelay = entry.getValue();
// 计算队列偏移量
Long offset = this.offsetTable.get(level);
if (null == offset) {
offset = 0L;
}
if (timeDelay != null) {
// 启动定时任务
this.timer.schedule(new DeliverDelayedMessageTimerTask(level, offset),
FIRST_DELAY_TIME);
}
}
// 定期持久化延时进度
this.timer.scheduleAtFixedRate(new TimerTask() {
@Override
public void run() {
try {
if (started.get()) {
ScheduleMessageService.this.persist();
}
} catch (Throwable e) {
log.error("scheduleAtFixedRate flush exception", e);
}
}
}, 10000, this.defaultMessageStore.getMessageStoreConfig().getFlushDelayOffsetInterval());
}
}
}
消息投递任务
java
class DeliverDelayedMessageTimerTask extends TimerTask {
private static final Logger log = LoggerFactory.getLogger(DeliverDelayedMessageTimerTask.class);
private final int delayLevel;
private final long offset;
@Override
public void run() {
try {
if (isStarted()) {
this.executeOnTimeup();
}
} catch (Exception e) {
log.error("DeliverDelayedMessageTimerTask exception [delayLevel={}]", delayLevel, e);
// 异常后延时重新调度
ScheduleMessageService.this.timer.schedule(
new DeliverDelayedMessageTimerTask(this.delayLevel, this.offset),
DELAY_FOR_A_PERIOD);
}
}
public void executeOnTimeup() {
// 查询延时队列消息
ConsumeQueue cq = ScheduleMessageService.this.defaultMessageStore
.findConsumeQueue(TopicValidator.RMQ_SYS_SCHEDULE_TOPIC,
delayLevel2QueueId(delayLevel));
if (cq == null) {
this.scheduleNextTimerTask(this.offset, DELAY_FOR_A_WHILE);
return;
}
// 获取消息
SelectMappedBufferResult bufferCQ = cq.getIndexBuffer(this.offset);
if (bufferCQ != null) {
try {
long nextOffset = this.offset;
int i = 0;
ConsumeQueueExt.CqExtUnit cqExtUnit = new ConsumeQueueExt.CqExtUnit();
for (; i < bufferCQ.getSize() && isStarted(); i += ConsumeQueue.CQ_STORE_UNIT_SIZE) {
// 读取消息索引
long offsetPy = bufferCQ.getByteBuffer().getLong();
int sizePy = bufferCQ.getByteBuffer().getInt();
long tagsCode = bufferCQ.getByteBuffer().getLong();
// 检查消息是否到期
long now = System.currentTimeMillis();
long deliverTimestamp = this.correctDeliverTimestamp(now, tagsCode);
nextOffset = this.offset + (i / ConsumeQueue.CQ_STORE_UNIT_SIZE);
long countdown = deliverTimestamp - now;
if (countdown > 0) {
// 未到期,重新调度
this.scheduleNextTimerTask(nextOffset, countdown);
return;
}
// 读取完整消息
MessageExt msgExt = ScheduleMessageService.this.defaultMessageStore
.lookMessageByOffset(offsetPy, sizePy);
if (msgExt != null) {
try {
// 恢复原始Topic和QueueId
MessageExtBrokerInner msgInner =
this.messageTimeup(msgExt);
if (TopicValidator.NOT_ALLOWED_SEND_TOPIC.equals(msgInner.getTopic())) {
log.error("[BUG] the topic {} of schedule msg is not allowed to send",
msgInner.getTopic());
continue;
}
// 重新投递消息
PutMessageResult putMessageResult =
ScheduleMessageService.this.writeMessageStore
.putMessage(msgInner);
if (putMessageResult != null &&
putMessageResult.getPutMessageStatus() == PutMessageStatus.PUT_OK) {
// 统计延时消息投递成功
if (ScheduleMessageService.this.defaultMessageStore
.getMessageStoreConfig().isEnableScheduleMessageStats()) {
ScheduleMessageService.this.defaultMessageStore
.getBrokerStatsManager().incQueueGetNums(
MixAll.SCHEDULE_CONSUMER_GROUP,
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC,
delayLevel - 1, 1);
}
} else {
log.error("ScheduleMessageService put message failed [topic={}, msgId={}]",
msgInner.getTopic(), msgInner.getMsgId());
// 投递失败,稍后重试
this.scheduleNextTimerTask(nextOffset, DELAY_FOR_A_WHILE);
return;
}
} catch (Exception e) {
log.error("ScheduleMessageService put message exception", e);
}
}
}
// 更新进度
nextOffset = this.offset + (i / ConsumeQueue.CQ_STORE_UNIT_SIZE);
this.scheduleNextTimerTask(nextOffset, DELAY_FOR_A_WHILE);
} finally {
bufferCQ.release();
}
} else {
// 队列为空,稍后重试
this.scheduleNextTimerTask(this.offset, DELAY_FOR_A_WHILE);
}
}
private MessageExtBrokerInner messageTimeup(MessageExt msgExt) {
MessageExtBrokerInner msgInner = new MessageExtBrokerInner();
msgInner.setBody(msgExt.getBody());
msgInner.setFlag(msgExt.getFlag());
// 恢复原始Topic
msgInner.setTopic(msgExt.getProperty(MessageConst.PROPERTY_REAL_TOPIC));
// 恢复原始QueueId
String queueIdStr = msgExt.getProperty(MessageConst.PROPERTY_REAL_QUEUE_ID);
int queueId = Integer.parseInt(queueIdStr);
msgInner.setQueueId(queueId);
// 清理延时相关属性
msgInner.setPropertiesString(
MessageDecoder.messageProperties2String(msgExt.getProperties()));
msgInner.setTagsCode(MessageExtBrokerInner.tagsString2tagsCode(msgInner.getTags()));
// 重置延时级别
msgInner.setDelayTimeLevel(0);
// 设置重新投递时间
msgInner.setBornTimestamp(msgExt.getBornTimestamp());
msgInner.setBornHost(msgExt.getBornHost());
msgInner.setStoreHost(msgExt.getStoreHost());
msgInner.setReconsumeTimes(msgExt.getReconsumeTimes());
// 设置消息ID
msgInner.setWaitStoreMsgOK(false);
MessageAccessor.clearProperty(msgInner, MessageConst.PROPERTY_DELAY_TIME_LEVEL);
return msgInner;
}
}
延时消息的限制与优化
1. 系统限制说明
java
@Configuration
@ConfigurationProperties(prefix = "rocketmq.broker")
public class BrokerConfiguration {
// 延时消息线程池大小
private int scheduleThreadPoolNums = 16;
// 延时消息拉取间隔
private int flushDelayOffsetInterval = 10000;
// 是否启用延时消息统计
private boolean enableScheduleMessageStats = true;
// 单个延时队列最大消息数(默认无限制)
private long maxDelayQueueSize = Long.MAX_VALUE;
// 延时消息最大堆积量告警阈值
private long delayQueueWarnThreshold = 100000;
}
2. 性能影响分析
java
@Component
@Slf4j
public class DelayMessagePerformanceMonitor {
private static final Logger log = LoggerFactory.getLogger(DelayMessagePerformanceMonitor.class);
private final MeterRegistry meterRegistry;
private final long delayQueueWarnThreshold = 100000L;
/**
* 监控延时消息对Broker的影响
*/
@Scheduled(fixedDelay = 60000)
public void monitorPerformance() {
// 监控内存使用
long heapUsed = ManagementFactory.getMemoryMXBean()
.getHeapMemoryUsage().getUsed();
// 监控延时队列大小
Map<Integer, Long> delayQueueSize = getDelayQueueSize();
for (Map.Entry<Integer, Long> entry : delayQueueSize.entrySet()) {
int level = entry.getKey();
long size = entry.getValue();
// 记录指标
meterRegistry.gauge("rocketmq.delay.queue.size",
Tags.of("level", String.valueOf(level)), size);
// 告警检查
if (size > delayQueueWarnThreshold) {
log.warn("延时队列消息堆积告警 [level={}, size={}, threshold={}]",
level, size, delayQueueWarnThreshold);
alertService.sendAlert("延时队列堆积",
String.format("级别%d的延时队列堆积%d条消息", level, size));
}
}
// 监控延时消息投递延迟
meterRegistry.gauge("rocketmq.delay.delivery.latency",
getAverageDeliveryLatency());
}
private Map<Integer, Long> getDelayQueueSize() {
if (defaultMessageStore == null) {
log.warn("defaultMessageStore is not initialized");
return Collections.emptyMap();
}
Map<Integer, Long> result = new ConcurrentHashMap<>();
try {
// 查询每个延时级别的队列大小
for (int level = 1; level <= 18; level++) {
ConsumeQueue cq = defaultMessageStore.findConsumeQueue(
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC, level - 1);
if (cq != null) {
result.put(level, cq.getMaxOffsetInQueue() - cq.getMinOffsetInQueue());
}
}
} catch (Exception e) {
log.error("获取延时队列大小失败", e);
}
return result;
}
}
实战案例:订单超时取消
1. 完整实现方案
java
@Component
@Slf4j
public class OrderService {
private static final Logger log = LoggerFactory.getLogger(OrderService.class);
@Autowired
private RocketMQTemplate rocketMQTemplate;
@Autowired
private OrderRepository orderRepository;
@Autowired
private CompensationService compensationService;
/**
* 创建订单(支持RocketMQ 4.9.0+,Spring Boot Starter 2.2.0+)
*/
@Transactional
public CreateOrderResponse createOrder(CreateOrderRequest request) {
Order order = null;
Optional.ofNullable(request.getUserId())
.ifPresent(userId -> MDC.put("userId", userId));
try {
// 1. 创建订单
order = new Order();
order.setId(generateOrderId());
order.setUserId(request.getUserId());
order.setAmount(request.getAmount());
order.setStatus(OrderStatus.UNPAID);
order.setCreateTime(new Date());
orderRepository.save(order);
MDC.put("orderId", order.getId());
// 2. 发送延时消息(30分钟后检查订单状态)
OrderCancelMessage message = new OrderCancelMessage();
message.setOrderId(order.getId());
message.setUserId(order.getUserId());
message.setCreateTime(System.currentTimeMillis());
message.setMessageId(UUID.randomUUID().toString());
// 设置延时级别:16代表30分钟(基于默认配置)
Message<OrderCancelMessage> msg = MessageBuilder
.withPayload(message)
.setHeader(RocketMQHeaders.DELAY_LEVEL, 16)
.setHeader(RocketMQHeaders.KEYS, order.getId())
.build();
SendResult sendResult = rocketMQTemplate.syncSend("order-cancel-topic", msg);
if (sendResult.getSendStatus() != SendStatus.SEND_OK) {
throw new MessagingException("延时消息发送失败");
}
log.info("订单创建成功 [orderId={}, delayLevel={}, msgId={}, topic={}]",
order.getId(), 16, sendResult.getMsgId(), "order-cancel-topic");
return CreateOrderResponse.success(order.getId());
} catch (MessagingException e) {
log.error("发送延时消息失败 [orderId={}]",
order != null ? order.getId() : "null", e);
// 补偿逻辑:记录失败消息,后续重试
if (order != null) {
compensationService.recordFailedDelayMessage(order.getId(),
"order-cancel-topic", 16);
}
throw new OrderProcessException("订单创建成功但延时消息发送失败", e);
} catch (Exception e) {
log.error("创建订单失败", e);
throw new OrderProcessException("订单创建失败", e);
} finally {
MDC.clear();
}
}
}
/**
* 订单超时取消消费者
*/
@Component
@Slf4j
@RocketMQMessageListener(
topic = "order-cancel-topic",
consumerGroup = "order-cancel-consumer",
consumeMode = ConsumeMode.CONCURRENTLY,
consumeThreadMax = 20,
maxReconsumeTimes = 3
)
public class OrderCancelConsumer implements RocketMQListener<OrderCancelMessage> {
private static final Logger log = LoggerFactory.getLogger(OrderCancelConsumer.class);
@Autowired
private OrderRepository orderRepository;
@Autowired
private RedisTemplate<String, String> redisTemplate;
@Autowired
private InventoryService inventoryService;
@Autowired
private DelayMessageCancellationService cancellationService;
@Override
public void onMessage(OrderCancelMessage message) {
String messageId = message.getMessageId();
String orderId = message.getOrderId();
MDC.put("messageId", messageId);
MDC.put("orderId", orderId);
if (message.getUserId() != null) {
MDC.put("userId", message.getUserId());
}
try {
// 检查消息是否已取消
if (cancellationService.isCancelled(messageId)) {
log.info("消息已被取消,跳过处理 [messageId={}, orderId={}]", messageId, orderId);
return;
}
// 幂等性检查
String lockKey = "order:cancel:lock:" + messageId;
Boolean acquired = redisTemplate.opsForValue()
.setIfAbsent(lockKey, "1", 24, TimeUnit.HOURS);
if (!acquired) {
log.info("消息已被消费,跳过 [messageId={}, orderId={}]", messageId, orderId);
return;
}
// 查询订单状态
Order order = orderRepository.findById(orderId)
.orElse(null);
if (order == null) {
log.warn("订单不存在 [orderId={}]", orderId);
return;
}
// 再次检查订单状态,确保幂等
if (order.getStatus() != OrderStatus.UNPAID) {
log.info("订单状态已变更,无需取消 [orderId={}, status={}]",
orderId, order.getStatus());
return;
}
// 计算实际延时
long actualDelay = System.currentTimeMillis() - message.getCreateTime();
log.info("处理订单超时取消 [orderId={}, actualDelay={}ms]", orderId, actualDelay);
// 执行取消操作
cancelOrder(order);
// 记录成功消费
recordConsumeSuccess(messageId, orderId);
} catch (Exception e) {
log.error("处理订单超时取消失败 [orderId={}]", orderId, e);
// 消费失败,删除锁,允许重试
redisTemplate.delete("order:cancel:lock:" + messageId);
throw new RuntimeException("订单取消失败", e);
} finally {
MDC.clear();
}
}
@Transactional
private void cancelOrder(Order order) {
// 1. 更新订单状态
order.setStatus(OrderStatus.CANCELLED);
order.setCancelTime(new Date());
order.setCancelReason("超时未支付");
orderRepository.save(order);
// 2. 恢复库存
inventoryService.restoreInventory(order.getOrderItems());
// 3. 发送取消通知
sendCancelNotification(order);
log.info("订单超时未支付,已自动取消 [orderId={}, userId={}]",
order.getId(), order.getUserId());
}
private void recordConsumeSuccess(String messageId, String orderId) {
// 记录消费成功,用于监控和审计
ConsumeRecord record = new ConsumeRecord();
record.setMessageId(messageId);
record.setOrderId(orderId);
record.setConsumeTime(new Date());
record.setStatus("SUCCESS");
consumeRecordRepository.save(record);
}
}
2. 补偿机制实现
java
@Component
@Slf4j
public class DelayMessageCompensationService {
private static final Logger log = LoggerFactory.getLogger(DelayMessageCompensationService.class);
@Autowired
private RocketMQTemplate rocketMQTemplate;
@Autowired
private JdbcTemplate jdbcTemplate;
/**
* 记录发送失败的延时消息
*/
@Transactional
public void recordFailedDelayMessage(String businessId, String topic, int delayLevel) {
String sql = "INSERT INTO delay_message_compensation " +
"(business_id, topic, delay_level, status, create_time, retry_count) " +
"VALUES (?, ?, ?, 'PENDING', ?, 0)";
jdbcTemplate.update(sql, businessId, topic, delayLevel, new Date());
}
/**
* 定时补偿任务
*/
@Scheduled(fixedDelay = 300000) // 5分钟执行一次
public void compensateFailedMessages() {
// 查询待补偿的消息
String sql = "SELECT * FROM delay_message_compensation " +
"WHERE status = 'PENDING' AND retry_count < 3 " +
"AND create_time > ? ORDER BY create_time LIMIT 100";
Date startTime = new Date(System.currentTimeMillis() - 24 * 3600 * 1000); // 24小时内
List<CompensationRecord> records = jdbcTemplate.query(sql,
new Object[]{startTime}, new BeanPropertyRowMapper<>(CompensationRecord.class));
for (CompensationRecord record : records) {
try {
// 重新发送延时消息
resendDelayMessage(record);
// 更新状态
updateCompensationStatus(record.getId(), "SUCCESS");
} catch (Exception e) {
log.error("补偿延时消息失败 [id={}, businessId={}]",
record.getId(), record.getBusinessId(), e);
// 增加重试次数
incrementRetryCount(record.getId());
}
}
}
private void resendDelayMessage(CompensationRecord record) {
// 根据业务ID查询最新数据
Order order = orderRepository.findById(record.getBusinessId())
.orElseThrow(() -> new BusinessException("订单不存在"));
// 只有未支付订单才需要补偿
if (order.getStatus() == OrderStatus.UNPAID) {
// 计算剩余延时时间
long elapsed = System.currentTimeMillis() - record.getCreateTime().getTime();
long remainingDelay = getDelayMillis(record.getDelayLevel()) - elapsed;
if (remainingDelay <= 0) {
// 已超时,直接处理
orderCancelService.cancelOrder(order.getId());
} else {
// 重新计算延时级别
int newDelayLevel = selectDelayLevel(remainingDelay);
OrderCancelMessage message = new OrderCancelMessage();
message.setOrderId(order.getId());
message.setUserId(order.getUserId());
message.setCreateTime(record.getCreateTime().getTime());
message.setMessageId(UUID.randomUUID().toString());
Message<OrderCancelMessage> msg = MessageBuilder
.withPayload(message)
.setHeader(RocketMQHeaders.DELAY_LEVEL, newDelayLevel)
.build();
rocketMQTemplate.syncSend(record.getTopic(), msg);
log.info("补偿延时消息成功 [businessId={}, newDelayLevel={}]",
record.getBusinessId(), newDelayLevel);
}
}
}
}
延时消息高级特性
1. 延时消息取消机制
java
@Component
@Slf4j
public class DelayMessageCancellationService {
private static final Logger log = LoggerFactory.getLogger(DelayMessageCancellationService.class);
@Autowired
private JdbcTemplate jdbcTemplate;
@Autowired
private RedisTemplate<String, String> redisTemplate;
/**
* 取消已发送的延时消息
* 注意:RocketMQ 原生不支持取消延时消息,需要通过业务层面实现
*/
public boolean cancelDelayMessage(String businessId, String messageId) {
// 1. 在数据库中标记消息为已取消
int updated = jdbcTemplate.update(
"UPDATE delay_message_record SET status = 'CANCELLED' WHERE business_id = ? AND message_id = ?",
businessId, messageId
);
if (updated > 0) {
// 2. 在 Redis 中设置取消标记,消费时检查
String cancelKey = "delay:cancel:" + messageId;
redisTemplate.opsForValue().set(cancelKey, "1", 25, TimeUnit.HOURS);
log.info("延时消息已标记为取消 [businessId={}, messageId={}]", businessId, messageId);
return true;
}
return false;
}
/**
* 消费时检查是否已取消
*/
public boolean isCancelled(String messageId) {
return redisTemplate.hasKey("delay:cancel:" + messageId);
}
}
2. 批量延时消息优化
java
@Component
@Slf4j
public class BatchDelayMessageService {
private static final Logger log = LoggerFactory.getLogger(BatchDelayMessageService.class);
@Autowired
private RocketMQTemplate rocketMQTemplate;
/**
* 批量发送延时消息(优化版)
*/
public BatchSendResult sendDelayMessageBatchOptimized(
String topic, List<?> payloads, int delayLevel) {
BatchSendResult result = new BatchSendResult();
// 1. 按消息大小分组,避免超过 RocketMQ 限制(4MB)
List<List<?>> batches = groupBySize(payloads, 1024 * 1024); // 1MB per batch
// 2. 使用线程池并发发送
ExecutorService executor = Executors.newFixedThreadPool(
Math.min(batches.size(), 10));
try {
List<Future<SendResult>> futures = new ArrayList<>();
for (List<?> batch : batches) {
futures.add(executor.submit(() -> {
List<Message> messages = batch.stream()
.map(payload -> MessageBuilder
.withPayload(payload)
.setHeader(RocketMQHeaders.DELAY_LEVEL, delayLevel)
.build())
.collect(Collectors.toList());
return rocketMQTemplate.syncSend(topic, messages);
}));
}
// 3. 收集结果
for (Future<SendResult> future : futures) {
try {
SendResult sendResult = future.get(30, TimeUnit.SECONDS);
result.addSuccess(sendResult);
} catch (Exception e) {
result.addFailure(e);
log.error("批量发送延时消息失败", e);
}
}
} finally {
executor.shutdown();
try {
if (!executor.awaitTermination(60, TimeUnit.SECONDS)) {
executor.shutdownNow();
}
} catch (InterruptedException e) {
executor.shutdownNow();
Thread.currentThread().interrupt();
}
}
return result;
}
private List<List<?>> groupBySize(List<?> payloads, int maxSizeBytes) {
List<List<?>> batches = new ArrayList<>();
List<Object> currentBatch = new ArrayList<>();
int currentSize = 0;
for (Object payload : payloads) {
int payloadSize = JSON.toJSONString(payload).getBytes(StandardCharsets.UTF_8).length;
if (currentSize + payloadSize > maxSizeBytes && !currentBatch.isEmpty()) {
batches.add(new ArrayList<>(currentBatch));
currentBatch.clear();
currentSize = 0;
}
currentBatch.add(payload);
currentSize += payloadSize;
}
if (!currentBatch.isEmpty()) {
batches.add(currentBatch);
}
return batches;
}
}
3. 延时消息降级策略
java
@Component
@Slf4j
public class DelayMessageDegradeService {
private static final Logger log = LoggerFactory.getLogger(DelayMessageDegradeService.class);
@Value("${rocketmq.delay.degrade.enabled:false}")
private boolean degradeEnabled;
@Autowired
private DelayMessageTemplate delayMessageTemplate;
@Autowired
private RocketMQTemplate rocketMQTemplate;
@Autowired
private LocalDelayQueue localDelayQueue;
/**
* 带降级的延时消息发送
*/
public void sendWithDegrade(String topic, Object payload, int delayLevel) {
try {
// 1. 检查是否需要降级
if (shouldDegrade()) {
log.warn("延时消息服务降级,改为直接投递 [topic={}]", topic);
// 直接发送,不延时
rocketMQTemplate.send(topic, payload);
return;
}
// 2. 正常发送延时消息
delayMessageTemplate.sendDelayMessage(topic, payload, delayLevel);
} catch (Exception e) {
log.error("发送延时消息失败,尝试降级处理", e);
// 3. 失败后的降级处理
if (degradeEnabled) {
try {
// 使用本地延时队列
localDelayQueue.offer(new DelayedMessage(topic, payload, delayLevel));
log.info("延时消息已加入本地队列");
} catch (Exception ex) {
log.error("本地队列也失败,消息丢失风险", ex);
// 最后的兜底:记录到数据库
saveToDatabase(topic, payload, delayLevel);
}
}
}
}
private boolean shouldDegrade() {
// 检查 RocketMQ 健康状态
if (!checkRocketMQHealth()) {
return true;
}
// 检查延时队列堆积情况
if (isDelayQueueOverloaded()) {
return true;
}
// 检查系统资源使用情况
if (isSystemOverloaded()) {
return true;
}
return false;
}
private boolean isSystemOverloaded() {
// 检查CPU使用率
double cpuUsage = ManagementFactory.getOperatingSystemMXBean()
.getProcessCpuLoad();
if (cpuUsage < 0) {
cpuUsage = 0;
}
if (cpuUsage > 0.8) {
return true;
}
// 检查内存使用率
MemoryMXBean memoryBean = ManagementFactory.getMemoryMXBean();
MemoryUsage heapUsage = memoryBean.getHeapMemoryUsage();
double memoryUsage = (double) heapUsage.getUsed() / heapUsage.getMax();
return memoryUsage > 0.85;
}
}
4. 消息追踪机制
java
@Component
@Slf4j
public class DelayMessageTracer {
private static final Logger log = LoggerFactory.getLogger(DelayMessageTracer.class);
@Autowired
private TraceRepository traceRepository;
@Autowired
private ApplicationEventPublisher eventPublisher;
/**
* 延时消息追踪
*/
public void traceDelayMessage(String messageId, String topic, int delayLevel) {
DelayMessageTrace trace = new DelayMessageTrace();
trace.setMessageId(messageId);
trace.setTopic(topic);
trace.setDelayLevel(delayLevel);
trace.setSendTime(System.currentTimeMillis());
trace.setExpectedDeliveryTime(calculateExpectedDeliveryTime(delayLevel));
// 存储追踪信息
traceRepository.save(trace);
// 发送追踪事件
eventPublisher.publishEvent(new DelayMessageSentEvent(trace));
}
/**
* 查询消息追踪信息
*/
public DelayMessageTraceInfo getTraceInfo(String messageId) {
DelayMessageTrace trace = traceRepository.findByMessageId(messageId);
if (trace == null) {
return null;
}
DelayMessageTraceInfo info = new DelayMessageTraceInfo();
info.setMessageId(messageId);
info.setSendTime(new Date(trace.getSendTime()));
info.setExpectedDeliveryTime(new Date(trace.getExpectedDeliveryTime()));
// 查询实际投递时间
ConsumeRecord consumeRecord = consumeRecordRepository.findByMessageId(messageId);
if (consumeRecord != null) {
info.setActualDeliveryTime(consumeRecord.getConsumeTime());
info.setDeliveryStatus("DELIVERED");
// 计算延时精度
long deviation = Math.abs(consumeRecord.getConsumeTime().getTime()
- trace.getExpectedDeliveryTime());
info.setDeviationMillis(deviation);
} else {
info.setDeliveryStatus("PENDING");
}
return info;
}
}
监控与运维
1. 延时消息监控指标
java
@Component
@Slf4j
public class DelayMessageMetrics {
private static final Logger log = LoggerFactory.getLogger(DelayMessageMetrics.class);
private final MeterRegistry meterRegistry;
@Autowired
private DefaultMQAdminExt mqAdminExt;
/**
* 记录延时消息发送
*/
public void recordDelayMessageSend(int delayLevel, String topic) {
meterRegistry.counter("rocketmq.delay.message.send",
"level", String.valueOf(delayLevel),
"topic", topic).increment();
}
/**
* 记录延时精度
*/
public void recordDelayAccuracy(int delayLevel, long actualDelay, long expectedDelay) {
long deviation = Math.abs(actualDelay - expectedDelay);
meterRegistry.gauge("rocketmq.delay.accuracy.deviation",
Tags.of("level", String.valueOf(delayLevel)), deviation);
// 延时精度告警(偏差超过10秒)
if (deviation > 10000) {
log.warn("延时消息精度偏差过大 [level={}, expected={}ms, actual={}ms, deviation={}ms]",
delayLevel, expectedDelay, actualDelay, deviation);
}
}
/**
* 更全面的监控指标
*/
public void recordDetailedMetrics(DelayMessageEvent event) {
// 1. 延时级别分布
meterRegistry.counter("delay.message.level.distribution",
"level", String.valueOf(event.getDelayLevel())).increment();
// 2. 消息大小分布
meterRegistry.histogram("delay.message.size.bytes")
.record(event.getMessageSize());
// 3. 投递延迟分布(P50, P95, P99)
meterRegistry.timer("delay.message.delivery.latency",
"level", String.valueOf(event.getDelayLevel()))
.record(event.getActualDelay(), TimeUnit.MILLISECONDS);
// 4. 重试次数统计
if (event.getRetryCount() > 0) {
meterRegistry.counter("delay.message.retry",
"level", String.valueOf(event.getDelayLevel()),
"retry_count", String.valueOf(event.getRetryCount()))
.increment();
}
// 5. 失败原因分类
if (event.isFailed()) {
meterRegistry.counter("delay.message.failure",
"level", String.valueOf(event.getDelayLevel()),
"reason", event.getFailureReason())
.increment();
}
}
/**
* 监控延时队列堆积
*/
@Scheduled(fixedDelay = 60000)
public void monitorDelayQueueLag() {
try {
// 查询延时Topic的消费进度
TopicStatsTable topicStats = mqAdminExt.examineTopicStats(
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC);
for (Map.Entry<MessageQueue, TopicOffset> entry : topicStats.getOffsetTable().entrySet()) {
MessageQueue mq = entry.getKey();
TopicOffset offset = entry.getValue();
long lag = offset.getMaxOffset() - offset.getMinOffset();
int delayLevel = mq.getQueueId() + 1;
// 记录指标
meterRegistry.gauge("rocketmq.delay.queue.lag",
Tags.of("level", String.valueOf(delayLevel)), lag);
// 堆积告警
if (lag > 10000) {
String alertMsg = String.format(
"延时消息堆积告警 [level=%d, lag=%d, queue=%s]",
delayLevel, lag, mq);
log.warn(alertMsg);
alertService.sendAlert("延时消息堆积", alertMsg);
}
}
} catch (Exception e) {
log.error("监控延时消息堆积失败", e);
}
}
}
2. 延时消息健康检查
java
@RestController
@RequestMapping("/health")
public class DelayMessageHealthController {
@Autowired
private DelayMessageHealthChecker healthChecker;
@GetMapping("/delay-message")
public HealthCheckResponse checkDelayMessageHealth() {
return healthChecker.check();
}
}
@Component
@Slf4j
public class DelayMessageHealthChecker {
private static final Logger log = LoggerFactory.getLogger(DelayMessageHealthChecker.class);
@Autowired
private RocketMQTemplate rocketMQTemplate;
@Autowired
private RedisTemplate<String, String> redisTemplate;
@Autowired
private DefaultMQAdminExt mqAdminExt;
public HealthCheckResponse check() {
HealthCheckResponse response = new HealthCheckResponse();
response.setCheckTime(new Date());
try {
// 1. 检查延时消息发送功能
boolean sendHealthy = checkDelayMessageSend();
response.setSendHealthy(sendHealthy);
// 2. 检查延时队列状态
Map<Integer, QueueStatus> queueStatus = checkDelayQueueStatus();
response.setQueueStatus(queueStatus);
// 3. 检查延时精度
DelayAccuracyStatus accuracyStatus = checkDelayAccuracy();
response.setAccuracyStatus(accuracyStatus);
// 4. 综合健康状态
response.setHealthy(sendHealthy && isQueueHealthy(queueStatus)
&& accuracyStatus.isAcceptable());
} catch (Exception e) {
log.error("延时消息健康检查失败", e);
response.setHealthy(false);
response.setErrorMessage(e.getMessage());
}
return response;
}
private boolean checkDelayMessageSend() {
try {
// 发送测试消息
String testId = "health_check_" + System.currentTimeMillis();
Message<String> testMsg = MessageBuilder
.withPayload(testId)
.setHeader(RocketMQHeaders.DELAY_LEVEL, 1) // 1秒延时
.build();
SendResult result = rocketMQTemplate.syncSend("health-check-topic", testMsg);
// 记录发送时间
redisTemplate.opsForValue().set(
"delay_health_check:" + testId,
String.valueOf(System.currentTimeMillis()),
5, TimeUnit.MINUTES);
return result.getSendStatus() == SendStatus.SEND_OK;
} catch (Exception e) {
log.error("延时消息发送测试失败", e);
return false;
}
}
private Map<Integer, QueueStatus> checkDelayQueueStatus() throws Exception {
Map<Integer, QueueStatus> statusMap = new HashMap<>();
TopicStatsTable topicStats = mqAdminExt.examineTopicStats(
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC);
for (Map.Entry<MessageQueue, TopicOffset> entry : topicStats.getOffsetTable().entrySet()) {
MessageQueue mq = entry.getKey();
TopicOffset offset = entry.getValue();
int delayLevel = mq.getQueueId() + 1;
long lag = offset.getMaxOffset() - offset.getMinOffset();
QueueStatus status = new QueueStatus();
status.setDelayLevel(delayLevel);
status.setLag(lag);
status.setHealthy(lag < 10000); // 堆积小于1万认为健康
status.setLastUpdateTime(offset.getLastUpdateTimestamp());
statusMap.put(delayLevel, status);
}
return statusMap;
}
}
生产环境最佳实践
1. 延时消息配置优化
java
@Configuration
@EnableConfigurationProperties(DelayMessageProperties.class)
public class DelayMessageAutoConfiguration {
@Bean
public DelayMessageTemplate delayMessageTemplate(
RocketMQTemplate rocketMQTemplate,
DelayMessageProperties properties) {
return new DelayMessageTemplate(rocketMQTemplate, properties);
}
@Bean
@ConditionalOnProperty(name = "rocketmq.delay.monitor.enabled", havingValue = "true")
public DelayMessageMonitor delayMessageMonitor() {
return new DelayMessageMonitor();
}
@Bean
public DelayMessageConfigValidator delayMessageConfigValidator(
BrokerConfig brokerConfig,
DelayMessageProperties properties) {
return new DelayMessageConfigValidator(brokerConfig, properties);
}
}
@ConfigurationProperties(prefix = "rocketmq.delay")
@Data
public class DelayMessageProperties {
// 是否启用延时消息持久化
private boolean enablePersistence = true;
// 延时消息重试次数
private int maxRetryTimes = 3;
// 监控告警阈值(消息堆积数量)
private long alertThreshold = 10000;
// 自定义延时级别映射
private Map<String, Integer> customDelayMapping = new HashMap<>();
// 是否启用延时精度监控
private boolean enableAccuracyMonitor = true;
// 延时精度告警阈值(毫秒)
private long accuracyAlertThreshold = 10000;
@PostConstruct
public void init() {
// 初始化常用延时映射
customDelayMapping.put("5min", 9);
customDelayMapping.put("10min", 14);
customDelayMapping.put("30min", 16);
customDelayMapping.put("1hour", 17);
customDelayMapping.put("2hour", 18);
}
}
@Component
public class DelayMessageConfigValidator {
private static final Logger log = LoggerFactory.getLogger(DelayMessageConfigValidator.class);
private final BrokerConfig brokerConfig;
private final DelayMessageProperties properties;
@PostConstruct
public void validateConfig() {
// 验证 Broker 配置
String delayLevels = brokerConfig.getMessageDelayLevel();
if (StringUtils.isBlank(delayLevels)) {
throw new IllegalStateException("延时级别配置不能为空");
}
// 验证自定义映射
Map<String, Integer> customMapping = properties.getCustomDelayMapping();
for (Map.Entry<String, Integer> entry : customMapping.entrySet()) {
if (entry.getValue() < 1 || entry.getValue() > 18) {
throw new IllegalStateException(
"非法的延时级别映射: " + entry.getKey() + " -> " + entry.getValue());
}
}
log.info("延时消息配置验证通过");
}
}
2. 延时消息工具类
java
/**
* 延时消息服务
*
* <h3>使用示例:</h3>
* <pre>{@code
* // 1. 使用预定义延时时间
* delayMessageTemplate.sendDelayMessage("topic", payload, "30min");
*
* // 2. 使用延时级别
* delayMessageTemplate.sendDelayMessage("topic", payload, 16);
*
* // 3. 批量发送
* delayMessageTemplate.sendDelayMessageBatch("topic", payloads, 16);
* }</pre>
*
* <h3>注意事项:</h3>
* <ul>
* <li>延时时间存在1-2秒的误差</li>
* <li>大量同级别延时消息可能造成性能问题</li>
* <li>消息大小不能超过4MB</li>
* <li>需要实现消费端幂等性</li>
* </ul>
*
* <h3>性能指标:</h3>
* <ul>
* <li>单机TPS: 5000-10000(取决于消息大小)</li>
* <li>延时精度: ±2秒</li>
* <li>可靠性: 99.99%(开启持久化)</li>
* </ul>
*
* @author Your Name
* @since 1.0.0
* @see DelayMessageProperties
* @see DelayMessageMonitor
*/
@Component
@Slf4j
public class DelayMessageTemplate {
private static final Logger log = LoggerFactory.getLogger(DelayMessageTemplate.class);
private final RocketMQTemplate rocketMQTemplate;
private final DelayMessageProperties properties;
private final DelayMessageMetrics metrics;
/**
* 发送延时消息(支持自定义延时时间)
*/
public SendResult sendDelayMessage(String topic, Object payload, String delayTime) {
// 解析延时时间
Integer delayLevel = properties.getCustomDelayMapping().get(delayTime);
if (delayLevel == null) {
throw new IllegalArgumentException("不支持的延时时间: " + delayTime);
}
return sendDelayMessage(topic, payload, delayLevel);
}
/**
* 发送延时消息(指定延时级别)
*/
public SendResult sendDelayMessage(String topic, Object payload, int delayLevel) {
// 参数校验
validateDelayMessage(topic, payload, delayLevel);
try {
Message<?> message = MessageBuilder
.withPayload(payload)
.setHeader(RocketMQHeaders.DELAY_LEVEL, delayLevel)
.setHeader("SEND_TIME", System.currentTimeMillis())
.build();
SendResult result = rocketMQTemplate.syncSend(topic, message);
// 记录指标
metrics.recordDelayMessageSend(delayLevel, topic);
// 持久化关键消息
if (properties.isEnablePersistence()) {
persistDelayMessage(topic, payload, delayLevel, result);
}
log.info("发送延时消息成功 [topic={}, delayLevel={}, msgId={}]",
topic, delayLevel, result.getMsgId());
return result;
} catch (Exception e) {
log.error("发送延时消息失败 [topic={}, delayLevel={}]", topic, delayLevel, e);
throw new DelayMessageException("发送延时消息失败", e);
}
}
/**
* 批量发送延时消息
*/
public List<SendResult> sendDelayMessageBatch(String topic, List<?> payloads, int delayLevel) {
if (payloads == null || payloads.isEmpty()) {
return Collections.emptyList();
}
// 分批发送,避免消息过大
List<SendResult> results = new ArrayList<>();
List<List<?>> batches = Lists.partition(payloads, 100);
for (List<?> batch : batches) {
for (Object payload : batch) {
try {
SendResult result = sendDelayMessage(topic, payload, delayLevel);
results.add(result);
} catch (Exception e) {
log.error("批量发送延时消息失败", e);
// 继续发送其他消息
}
}
}
return results;
}
private void validateDelayMessage(String topic, Object payload, int delayLevel) {
// 验证延时级别
if (delayLevel < 1 || delayLevel > 18) {
throw new IllegalArgumentException("非法的延时级别: " + delayLevel);
}
// 验证消息大小
int messageSize = JSON.toJSONString(payload).getBytes(StandardCharsets.UTF_8).length;
if (messageSize > 4 * 1024 * 1024) {
throw new IllegalArgumentException("消息体过大: " + messageSize);
}
// 验证Topic
if (TopicValidator.isSystemTopic(topic)) {
throw new IllegalArgumentException("不能发送到系统Topic: " + topic);
}
}
}
3. 性能优化与熔断
java
@Component
@Slf4j
public class DelayMessagePerformanceOptimizer {
private static final Logger log = LoggerFactory.getLogger(DelayMessagePerformanceOptimizer.class);
@Value("${rocketmq.delay.warmup.enabled:true}")
private boolean warmUpEnabled;
/**
* 预热延时消息服务
*/
@PostConstruct
public void warmUp() {
if (!warmUpEnabled) {
return;
}
log.info("开始预热延时消息服务");
// 1. 预创建延时队列
for (int level = 1; level <= 18; level++) {
try {
ConsumeQueue cq = defaultMessageStore.findConsumeQueue(
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC, level - 1);
if (cq != null) {
cq.load();
}
} catch (Exception e) {
log.error("预热延时队列失败 [level={}]", level, e);
}
}
// 2. 预分配内存
ByteBuffer buffer = ByteBuffer.allocateDirect(1024 * 1024); // 1MB
buffer.clear();
log.info("延时消息服务预热完成");
}
/**
* 优化延时消息投递性能
*/
@Bean
public ThreadPoolTaskExecutor delayMessageExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(Runtime.getRuntime().availableProcessors());
executor.setMaxPoolSize(Runtime.getRuntime().availableProcessors() * 2);
executor.setQueueCapacity(10000);
executor.setThreadNamePrefix("delay-msg-");
executor.setRejectedExecutionHandler(new CallerRunsPolicy());
executor.setWaitForTasksToCompleteOnShutdown(true);
executor.setAwaitTerminationSeconds(60);
executor.initialize();
return executor;
}
}
@Component
@Slf4j
public class DelayMessageCircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(DelayMessageCircuitBreaker.class);
private final CircuitBreaker circuitBreaker;
public DelayMessageCircuitBreaker() {
CircuitBreakerConfig config = CircuitBreakerConfig.custom()
.failureRateThreshold(50)
.waitDurationInOpenState(Duration.ofMillis(1000))
.slidingWindowSize(10)
.build();
this.circuitBreaker = CircuitBreaker.of("delayMessage", config);
circuitBreaker.getEventPublisher()
.onStateTransition(event ->
log.warn("延时消息熔断器状态变更: {}", event));
}
/**
* 带熔断的延时消息发送
*/
public SendResult sendWithCircuitBreaker(String topic, Object payload, int delayLevel) {
return circuitBreaker.executeSupplier(() -> {
return delayMessageTemplate.sendDelayMessage(topic, payload, delayLevel);
}, throwable -> {
log.error("延时消息发送触发熔断", throwable);
// 降级处理
return handleFallback(topic, payload, delayLevel);
});
}
private SendResult handleFallback(String topic, Object payload, int delayLevel) {
// 记录到本地队列或数据库,后续补偿
compensationService.recordFailedMessage(topic, payload, delayLevel);
SendResult result = new SendResult();
result.setSendStatus(SendStatus.SLAVE_NOT_AVAILABLE);
result.setMsgId("FALLBACK_" + UUID.randomUUID().toString());
return result;
}
}
4. 消息去重与背压处理
java
@Component
@Slf4j
public class DelayMessageDeduplicationService {
private static final Logger log = LoggerFactory.getLogger(DelayMessageDeduplicationService.class);
// 使用 AtomicReference 包装布隆过滤器,确保线程安全
private final AtomicReference<BloomFilter<String>> bloomFilterRef;
@Autowired
private RedisTemplate<String, String> redisTemplate;
public DelayMessageDeduplicationService() {
// 初始化布隆过滤器
this.bloomFilterRef = new AtomicReference<>(createBloomFilter());
}
/**
* 创建新的布隆过滤器
*/
private BloomFilter<String> createBloomFilter() {
return BloomFilter.create(
Funnels.stringFunnel(StandardCharsets.UTF_8),
1000000, // 预期元素数量
0.01 // 误判率
);
}
/**
* 检查消息是否重复
*/
public boolean isDuplicate(String messageId, String businessKey) {
String dedupeKey = messageId + ":" + businessKey;
// 1. 先检查布隆过滤器(快速判断)
BloomFilter<String> currentFilter = bloomFilterRef.get();
if (!currentFilter.mightContain(dedupeKey)) {
// 如果布隆过滤器说不存在,那肯定不存在
currentFilter.put(dedupeKey);
return false;
}
// 2. 布隆过滤器说可能存在,需要进一步检查Redis(精确判断)
String redisKey = "delay:dedupe:" + dedupeKey;
Boolean exists = redisTemplate.opsForValue()
.setIfAbsent(redisKey, "1", 24, TimeUnit.HOURS);
return !Boolean.TRUE.equals(exists);
}
/**
* 定期重置布隆过滤器
*/
@Scheduled(cron = "0 0 0 * * ?") // 每天凌晨执行
public void resetBloomFilter() {
// 创建新的布隆过滤器
BloomFilter<String> newFilter = createBloomFilter();
// 原子性地替换旧的过滤器
BloomFilter<String> oldFilter = bloomFilterRef.getAndSet(newFilter);
// 记录日志
log.info("布隆过滤器已重置,旧过滤器包含约 {} 个元素",
oldFilter.approximateElementCount());
// 可选:如果需要保留部分数据,可以从Redis中恢复
// restoreRecentDataToBloomFilter(newFilter);
}
/**
* 获取当前布隆过滤器的统计信息
*/
public BloomFilterStats getStats() {
BloomFilter<String> currentFilter = bloomFilterRef.get();
return BloomFilterStats.builder()
.approximateElementCount(currentFilter.approximateElementCount())
.expectedFpp(currentFilter.expectedFpp())
.build();
}
/**
* 可选:从Redis恢复最近的数据到新的布隆过滤器
*/
private void restoreRecentDataToBloomFilter(BloomFilter<String> newFilter) {
try {
// 扫描Redis中最近24小时的去重键
Set<String> keys = redisTemplate.keys("delay:dedupe:*");
if (keys != null && !keys.isEmpty()) {
int restored = 0;
for (String key : keys) {
// 提取原始的去重键
String dedupeKey = key.substring("delay:dedupe:".length());
newFilter.put(dedupeKey);
restored++;
// 避免一次性加载过多数据
if (restored >= 100000) {
log.warn("恢复数据达到上限,停止恢复");
break;
}
}
log.info("从Redis恢复了 {} 个去重键到布隆过滤器", restored);
}
} catch (Exception e) {
log.error("恢复数据到布隆过滤器失败", e);
}
}
/**
* 布隆过滤器统计信息
*/
@Data
@Builder
public static class BloomFilterStats {
private long approximateElementCount;
private double expectedFpp;
}
}
@Component
@Slf4j
public class DelayMessageBackPressureHandler {
private static final Logger log = LoggerFactory.getLogger(DelayMessageBackPressureHandler.class);
private final AtomicLong pendingMessages = new AtomicLong(0);
private final long maxPendingMessages = 100000;
/**
* 检查是否需要背压
*/
public boolean shouldApplyBackPressure() {
long pending = pendingMessages.get();
if (pending > maxPendingMessages) {
log.warn("延时消息堆积过多,启动背压机制 [pending={}]", pending);
return true;
}
return false;
}
/**
* 应用背压策略
*/
public void applyBackPressure(String topic, Object payload, int delayLevel) {
// 1. 降低发送速率
try {
Thread.sleep(100); // 简单的速率限制
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
// 2. 如果堆积严重,拒绝新消息
if (pendingMessages.get() > maxPendingMessages * 1.5) {
throw new BackPressureException("延时消息队列已满,请稍后重试");
}
// 3. 记录到溢出队列
overflowQueue.offer(new DelayedMessage(topic, payload, delayLevel));
}
}
5. 任意时间延时实现
java
@Component
@Slf4j
public class ArbitraryDelayMessageService {
private static final Logger log = LoggerFactory.getLogger(ArbitraryDelayMessageService.class);
@Autowired
private RocketMQTemplate rocketMQTemplate;
@Autowired
private DelayMessageTemplate delayMessageTemplate;
/**
* 发送任意时间的延时消息
* @param delaySeconds 延时秒数(支持任意值)
*/
public SendResult sendArbitraryDelayMessage(String topic, Object payload, long delaySeconds) {
if (delaySeconds <= 0) {
// 立即发送
return rocketMQTemplate.syncSend(topic, payload);
}
// 1. 如果延时时间匹配标准级别,直接使用
Integer standardLevel = findMatchingLevel(delaySeconds);
if (standardLevel != null) {
return delayMessageTemplate.sendDelayMessage(topic, payload, standardLevel);
}
// 2. 否则使用多级延时策略
return sendMultiLevelDelayMessage(topic, payload, delaySeconds);
}
private SendResult sendMultiLevelDelayMessage(String topic, Object payload, long delaySeconds) {
// 分解为多个标准延时级别
List<DelaySegment> segments = decomposeDelay(delaySeconds);
if (segments.isEmpty()) {
throw new IllegalArgumentException("无法分解延时时间: " + delaySeconds);
}
// 使用第一个段发送
DelaySegment firstSegment = segments.get(0);
// 如果还有剩余段,包装成递归消息
if (segments.size() > 1) {
MultiLevelDelayMessage wrapper = new MultiLevelDelayMessage();
wrapper.setOriginalTopic(topic);
wrapper.setOriginalPayload(payload);
wrapper.setRemainingSegments(segments.subList(1, segments.size()));
return delayMessageTemplate.sendDelayMessage("MULTI_LEVEL_TOPIC", wrapper, firstSegment.getLevel());
} else {
return delayMessageTemplate.sendDelayMessage(topic, payload, firstSegment.getLevel());
}
}
/**
* 分解延时时间为多个标准级别
*/
private List<DelaySegment> decomposeDelay(long delaySeconds) {
List<DelaySegment> segments = new ArrayList<>();
long remaining = delaySeconds;
// 从大到小尝试匹配
int[] levels = {18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
long[] delays = {7200, 3600, 1800, 1200, 600, 540, 480, 420, 360, 300, 240, 180, 120, 60, 30, 10, 5, 1};
for (int i = 0; i < levels.length && remaining > 0; i++) {
if (remaining >= delays[i]) {
segments.add(new DelaySegment(levels[i], delays[i]));
remaining -= delays[i];
// 限制段数,避免过度分解
if (segments.size() >= 3) {
// 将剩余时间加到最后一段
if (remaining > 0 && i < levels.length - 1) {
segments.add(new DelaySegment(1, remaining));
}
break;
}
}
}
return segments;
}
private Integer findMatchingLevel(long delaySeconds) {
Map<Long, Integer> standardDelays = new HashMap<>();
standardDelays.put(1L, 1);
standardDelays.put(5L, 2);
standardDelays.put(10L, 3);
standardDelays.put(30L, 4);
standardDelays.put(60L, 5);
standardDelays.put(120L, 6);
standardDelays.put(180L, 7);
standardDelays.put(240L, 8);
standardDelays.put(300L, 9);
standardDelays.put(360L, 10);
standardDelays.put(420L, 11);
standardDelays.put(480L, 12);
standardDelays.put(540L, 13);
standardDelays.put(600L, 14);
standardDelays.put(1200L, 15);
standardDelays.put(1800L, 16);
standardDelays.put(3600L, 17);
standardDelays.put(7200L, 18);
return standardDelays.get(delaySeconds);
}
}
6. 延时消息诊断工具
java
@RestController
@RequestMapping("/delay-message/diagnostic")
@Slf4j
public class DelayMessageDiagnosticController {
private static final Logger log = LoggerFactory.getLogger(DelayMessageDiagnosticController.class);
@Autowired
private DelayMessageRepository repository;
@Autowired
private ConsumeRecordRepository consumeRepository;
@Autowired
private RetryRecordRepository retryRepository;
@Autowired
private DefaultMessageStore defaultMessageStore;
/**
* 诊断延时消息问题
*/
@GetMapping("/diagnose/{messageId}")
public DiagnosticResult diagnoseMessage(@PathVariable String messageId) {
DiagnosticResult result = new DiagnosticResult();
result.setMessageId(messageId);
result.setDiagnoseTime(new Date());
try {
// 1. 检查消息是否存在
DelayMessageRecord record = repository.findByMessageId(messageId);
if (record == null) {
result.addIssue("消息不存在于数据库");
return result;
}
// 2. 检查消息状态
result.setMessageStatus(record.getStatus());
if ("CANCELLED".equals(record.getStatus())) {
result.addIssue("消息已被取消");
}
// 3. 检查是否在延时队列中
boolean inDelayQueue = checkInDelayQueue(messageId, record.getDelayLevel());
result.setInDelayQueue(inDelayQueue);
// 4. 检查是否已消费
ConsumeRecord consumeRecord = consumeRepository.findByMessageId(messageId);
if (consumeRecord != null) {
result.setConsumed(true);
result.setConsumeTime(consumeRecord.getConsumeTime());
}
// 5. 计算延时精度
if (consumeRecord != null && record.getExpectedDeliveryTime() != null) {
long deviation = Math.abs(consumeRecord.getConsumeTime().getTime()
- record.getExpectedDeliveryTime());
result.setDeviationMillis(deviation);
if (deviation > 10000) {
result.addIssue(String.format("延时精度偏差过大: %d ms", deviation));
}
}
// 6. 检查重试记录
List<RetryRecord> retries = retryRepository.findByMessageId(messageId);
result.setRetryCount(retries.size());
if (retries.size() > 0) {
result.setLastRetryTime(retries.get(retries.size() - 1).getRetryTime());
}
// 7. 生成诊断建议
generateSuggestions(result, record);
} catch (Exception e) {
log.error("诊断消息失败 [messageId={}]", messageId, e);
result.addIssue("诊断过程出错: " + e.getMessage());
}
return result;
}
private void generateSuggestions(DiagnosticResult result, DelayMessageRecord record) {
// 根据诊断结果生成建议
if (!result.isConsumed() && record.getCreateTime() != null) {
long age = System.currentTimeMillis() - record.getCreateTime().getTime();
if (age > TimeUnit.HOURS.toMillis(24)) {
result.addSuggestion("消息超过24小时未消费,建议检查消费者状态");
}
}
if (result.getRetryCount() > 3) {
result.addSuggestion("重试次数过多,建议检查消费逻辑是否存在问题");
}
if (result.getDeviationMillis() > 30000) {
result.addSuggestion("延时精度严重偏差,建议检查Broker负载情况");
}
}
private boolean checkInDelayQueue(String messageId, int delayLevel) {
try {
ConsumeQueue cq = defaultMessageStore.findConsumeQueue(
TopicValidator.RMQ_SYS_SCHEDULE_TOPIC, delayLevel - 1);
if (cq != null) {
// 这里需要遍历队列查找消息,实际实现较复杂
return true;
}
} catch (Exception e) {
log.error("检查延时队列失败", e);
}
return false;
}
}
延时消息与事务消息对比
特性 | 延时消息 | 事务消息 |
---|---|---|
使用场景 | 定时任务、超时处理 | 分布式事务 |
时间精度 | 秒级(有 1-2 秒误差) | 立即投递 |
可靠性 | 高 | 非常高 |
性能影响 | 中等 | 较大 |
实现复杂度 | 简单 | 复杂 |
支持回查 | 不支持 | 支持 |
总结

生产环境部署建议
-
资源配置:
- Broker 内存建议 8GB 以上
- 延时消息线程池大小设置为 CPU 核数的 2 倍
- 预留足够的磁盘空间存储延时消息
-
监控告警:
- 设置延时队列堆积告警阈值
- 监控延时消息投递精度
- 定期检查补偿任务执行情况
-
优化建议:
- 避免大量同级别延时消息
- 合理使用批量发送
- 实现消费端幂等性
核心要点总结
关键点 | 建议 |
---|---|
延时级别选择 | 根据业务需求选择最接近的级别,避免过度延时 |
消息可靠性 | 关键业务消息需要持久化记录,实现故障恢复 |
消费幂等性 | 使用分布式锁或业务状态检查确保幂等 |
监控告警 | 监控延时消息堆积情况,及时发现问题 |
性能优化 | 避免同一时间大量相同级别的延时消息 |
容量规划 | 评估延时消息对 Broker 内存的影响,合理规划资源 |
安全控制 | 实施访问控制和限流策略,防止滥用 |
故障处理 | 建立完善的补偿机制和降级策略 |
完整的生产环境配置示例
yaml
# application-prod.yml
spring:
application:
name: delay-message-service
rocketmq:
name-server: 192.168.1.100:9876;192.168.1.101:9876;192.168.1.102:9876
producer:
group: delay-message-producer-group
send-message-timeout: 5000
compress-message-body-threshold: 4096
max-message-size: 4194304
retry-times-when-send-failed: 3
retry-times-when-send-async-failed: 3
retry-next-server: true
# 延时消息配置
delay:
# 是否启用延时消息持久化
enable-persistence: true
# 延时消息重试次数
max-retry-times: 3
# 监控告警阈值
alert-threshold: 10000
# 是否启用延时精度监控
enable-accuracy-monitor: true
# 延时精度告警阈值(毫秒)
accuracy-alert-threshold: 10000
# 是否启用降级
degrade:
enabled: true
# CPU使用率阈值
cpu-threshold: 0.8
# 内存使用率阈值
memory-threshold: 0.85
# 队列堆积阈值
queue-lag-threshold: 100000
# 访问控制
acl:
enabled: true
# 全局QPS限制
global-qps-limit: 10000
# 单用户QPS限制
user-qps-limit: 100
# 监控配置
monitor:
enabled: true
# 健康检查间隔
health-check-interval: 60000
# 指标收集间隔
metrics-collect-interval: 30000
# 性能优化
performance:
# 是否启用预热
warmup-enabled: true
# 对象池大小
object-pool-size: 1000
# 批量发送大小
batch-size: 100
# 异步线程池大小
async-thread-pool-size: 20
# 数据源配置
spring:
datasource:
url: jdbc:mysql://192.168.1.200:3306/delay_message?useSSL=false&serverTimezone=Asia/Shanghai
username: ${DB_USERNAME:root}
password: ${DB_PASSWORD:password}
hikari:
maximum-pool-size: 20
minimum-idle: 5
connection-timeout: 30000
idle-timeout: 600000
max-lifetime: 1800000
# Redis配置
spring:
redis:
cluster:
nodes:
- 192.168.1.110:6379
- 192.168.1.111:6379
- 192.168.1.112:6379
password: ${REDIS_PASSWORD:password}
lettuce:
pool:
max-active: 20
max-idle: 10
min-idle: 5
max-wait: -1ms
cluster:
refresh:
adaptive: true
period: 60000ms
# 监控配置
management:
endpoints:
web:
exposure:
include: health,info,metrics,prometheus
metrics:
export:
prometheus:
enabled: true
tags:
application: ${spring.application.name}
environment: production
# 日志配置
logging:
level:
root: INFO
com.example.delaymessage: INFO
org.apache.rocketmq: WARN
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n"
file:
name: /var/log/delay-message/app.log
max-size: 100MB
max-history: 30
常见问题处理
- 延时消息未按时投递
bash
# 1. 检查延时队列状态
curl http://localhost:8080/delay-message/admin/statistics
# 2. 查看具体消息状态
curl http://localhost:8080/delay-message/diagnostic/diagnose/{messageId}
# 3. 手动触发重投递
curl -X POST http://localhost:8080/delay-message/admin/redeliver \
-H "Content-Type: application/json" \
-d '{"messageIds":["xxx"]}'
- 内存溢出处理
bash
# 1. 生成堆转储
jmap -dump:format=b,file=heap.dump <pid>
# 2. 分析堆转储
jhat -port 7000 heap.dump
# 3. 临时增加堆内存
export JAVA_OPTS="-Xmx6g"
- 消息堆积处理
bash
# 1. 临时增加消费线程
curl -X POST http://localhost:8080/delay-message/admin/consumer/threads \
-d '{"delayLevel":16,"threadCount":50}'
# 2. 启用并行消费
curl -X POST http://localhost:8080/delay-message/admin/consumer/parallel \
-d '{"delayLevel":16,"enabled":true}'