一、消息轨迹核心概念
1.1 轨迹数据模型
text
复制
下载
消息轨迹三要素:
┌─────────────────────────────────────┐
│ 轨迹点(TracePoint) │
├─────────────────────────────────────┤
│ 1. 生产者轨迹点 │
│ • 消息发送开始时间 │
│ • 消息发送结束时间 │
│ • 发送状态(成功/失败) │
│ • 消息ID、Topic、Tags │
│ • 生产者地址 │
│ │
│ 2. Broker轨迹点 │
│ • 消息存储开始时间 │
│ • 消息存储结束时间 │
│ • 存储状态 │
│ • 存储消息位置(CommitLog偏移) │
│ • Broker地址 │
│ │
│ 3. 消费者轨迹点 │
│ • 消息拉取时间 │
│ • 消息消费开始时间 │
│ • 消息消费结束时间 │
│ • 消费状态(成功/失败/重试) │
│ • 消费者组、消费实例 │
└─────────────────────────────────────┘
二、消息轨迹数据采集
2.1 生产者轨迹采集
java
复制
下载
public class ProducerTraceHook implements SendMessageHook {
private final TraceDispatcher traceDispatcher;
private final ThreadLocal<TraceContext> traceContext = new ThreadLocal<>();
@Override
public String hookName() {
return "ProducerTraceHook";
}
@Override
public void sendMessageBefore(SendMessageContext context) {
// 1. 创建轨迹上下文
TraceContext traceContext = new TraceContext();
traceContext.setTraceType(TraceType.Pub);
traceContext.setRequestId(UUID.randomUUID().toString());
traceContext.setTimeStamp(System.currentTimeMillis());
// 2. 记录消息信息
TraceBean traceBean = new TraceBean();
traceBean.setTopic(context.getMessage().getTopic());
traceBean.setMsgId(context.getMessage().getMsgId());
traceBean.setTags(context.getMessage().getTags());
traceBean.setKeys(context.getMessage().getKeys());
traceBean.setStoreHost(context.getBrokerAddr());
traceBean.setBodyLength(context.getMessage().getBody().length);
traceBean.setTransactionId(context.getTransactionId());
traceContext.setTraceBeans(Arrays.asList(traceBean));
this.traceContext.set(traceContext);
// 3. 开始计时
traceContext.setStartTime(System.currentTimeMillis());
}
@Override
public void sendMessageAfter(SendMessageContext context) {
TraceContext traceContext = this.traceContext.get();
if (traceContext == null) {
return;
}
try {
// 1. 更新轨迹信息
traceContext.setTimeStamp(System.currentTimeMillis());
traceContext.setCostTime(traceContext.getTimeStamp() - traceContext.getStartTime());
// 2. 设置发送结果
TraceBean traceBean = traceContext.getTraceBeans().get(0);
traceBean.setMsgId(context.getMessage().getMsgId());
traceBean.setOffsetMsgId(context.getSendResult().getOffsetMsgId());
traceBean.setStoreTime(traceContext.getTimeStamp());
traceBean.setStatus(context.getSendResult().getSendStatus().name());
if (context.getSendResult().getSendStatus() == SendStatus.SEND_OK) {
traceBean.setSuccess(true);
} else {
traceBean.setSuccess(false);
traceBean.setException(context.getException());
}
// 3. 设置Broker信息
traceBean.setBrokerAddr(context.getBrokerAddr());
// 4. 异步提交轨迹数据
traceDispatcher.append(traceContext);
} finally {
this.traceContext.remove();
}
}
// 轨迹数据对象
public static class TraceContext {
private String requestId;
private TraceType traceType;
private long timeStamp;
private long startTime;
private long costTime;
private List<TraceBean> traceBeans;
private Map<String, String> context = new HashMap<>();
// getters and setters
}
public static class TraceBean {
private String topic;
private String msgId;
private String offsetMsgId;
private String tags;
private String keys;
private String storeHost;
private String brokerAddr;
private long storeTime;
private int bodyLength;
private boolean success;
private String status;
private String transactionId;
private Throwable exception;
// getters and setters
}
}
2.2 Broker轨迹采集
java
复制
下载
public class BrokerTraceHook extends SendMessageProcessor {
private final TraceDispatcher traceDispatcher;
@Override
public RemotingCommand processRequest(ChannelHandlerContext ctx,
RemotingCommand request) throws Exception {
// 1. 解析请求
SendMessageRequestHeader requestHeader = parseRequestHeader(request);
// 2. 创建轨迹上下文
TraceContext traceContext = new TraceContext();
traceContext.setTraceType(TraceType.SubBefore);
traceContext.setRequestId(generateRequestId());
traceContext.setTimeStamp(System.currentTimeMillis());
// 3. 记录Broker处理信息
TraceBean traceBean = new TraceBean();
traceBean.setTopic(requestHeader.getTopic());
traceBean.setMsgId(generateMsgId(ctx, request));
traceBean.setStoreHost(ctx.channel().remoteAddress().toString());
traceBean.setBodyLength(request.getBody().length);
traceContext.setTraceBeans(Arrays.asList(traceBean));
// 4. 存储消息前记录
traceDispatcher.append(traceContext);
try {
// 5. 处理消息存储
RemotingCommand response = super.processRequest(ctx, request);
// 6. 存储后记录
if (response != null) {
SendMessageResponseHeader responseHeader =
(SendMessageResponseHeader) response.decodeCommandCustomHeader(
SendMessageResponseHeader.class);
TraceContext afterContext = new TraceContext();
afterContext.setTraceType(TraceType.SubAfter);
afterContext.setRequestId(traceContext.getRequestId());
afterContext.setTimeStamp(System.currentTimeMillis());
afterContext.setCostTime(afterContext.getTimeStamp() - traceContext.getTimeStamp());
TraceBean afterBean = traceBean.clone();
afterBean.setOffsetMsgId(responseHeader.getMsgId());
afterBean.setStoreTime(System.currentTimeMillis());
afterBean.setSuccess(true);
afterBean.setStatus("STORE_SUCCESS");
afterContext.setTraceBeans(Arrays.asList(afterBean));
// 7. 提交存储成功轨迹
traceDispatcher.append(afterContext);
}
return response;
} catch (Exception e) {
// 8. 存储失败记录
TraceContext errorContext = new TraceContext();
errorContext.setTraceType(TraceType.SubAfter);
errorContext.setRequestId(traceContext.getRequestId());
errorContext.setTimeStamp(System.currentTimeMillis());
errorContext.setCostTime(errorContext.getTimeStamp() - traceContext.getTimeStamp());
TraceBean errorBean = traceBean.clone();
errorBean.setSuccess(false);
errorBean.setStatus("STORE_FAILED");
errorBean.setException(e);
errorContext.setTraceBeans(Arrays.asList(errorBean));
traceDispatcher.append(errorContext);
throw e;
}
}
// 消费者轨迹拦截
@Override
public RemotingCommand processPullRequest(ChannelHandlerContext ctx,
PullMessageRequestHeader requestHeader) {
// 1. 创建消费轨迹上下文
TraceContext traceContext = new TraceContext();
traceContext.setTraceType(TraceType.ConsumeBefore);
traceContext.setRequestId(generateRequestId());
traceContext.setTimeStamp(System.currentTimeMillis());
// 2. 记录消费信息
TraceBean traceBean = new TraceBean();
traceBean.setTopic(requestHeader.getTopic());
traceBean.setConsumerGroup(requestHeader.getConsumerGroup());
traceBean.setClientHost(ctx.channel().remoteAddress().toString());
traceContext.setTraceBeans(Arrays.asList(traceBean));
traceContext.getContext().put("queueId", String.valueOf(requestHeader.getQueueId()));
traceContext.getContext().put("queueOffset", String.valueOf(requestHeader.getQueueOffset()));
// 3. 提交消费前轨迹
traceDispatcher.append(traceContext);
try {
// 4. 处理拉取请求
RemotingCommand response = super.processPullRequest(ctx, requestHeader);
// 5. 记录拉取结果
if (response != null && response.getCode() == ResponseCode.SUCCESS) {
PullMessageResponseHeader responseHeader =
(PullMessageResponseHeader) response.decodeCommandCustomHeader(
PullMessageResponseHeader.class);
TraceContext afterContext = new TraceContext();
afterContext.setTraceType(TraceType.ConsumeAfter);
afterContext.setRequestId(traceContext.getRequestId());
afterContext.setTimeStamp(System.currentTimeMillis());
afterContext.setCostTime(afterContext.getTimeStamp() - traceContext.getTimeStamp());
TraceBean afterBean = traceBean.clone();
afterBean.setMsgId(responseHeader.getMsgId());
afterBean.setOffsetMsgId(responseHeader.getOffsetMsgId());
afterBean.setSuccess(true);
afterBean.setStatus("PULL_SUCCESS");
afterBean.setSuggestWhichBrokerId(responseHeader.getSuggestWhichBrokerId());
afterContext.setTraceBeans(Arrays.asList(afterBean));
traceDispatcher.append(afterContext);
}
return response;
} catch (Exception e) {
// 6. 记录拉取失败
TraceContext errorContext = new TraceContext();
errorContext.setTraceType(TraceType.ConsumeAfter);
errorContext.setRequestId(traceContext.getRequestId());
errorContext.setTimeStamp(System.currentTimeMillis());
errorContext.setCostTime(errorContext.getTimeStamp() - traceContext.getTimeStamp());
TraceBean errorBean = traceBean.clone();
errorBean.setSuccess(false);
errorBean.setStatus("PULL_FAILED");
errorBean.setException(e);
errorContext.setTraceBeans(Arrays.asList(errorBean));
traceDispatcher.append(errorContext);
throw e;
}
}
}
2.3 消费者轨迹采集
java
复制
下载
public class ConsumerTraceHook implements ConsumeMessageHook {
private final TraceDispatcher traceDispatcher;
private final ThreadLocal<TraceContext> traceContext = new ThreadLocal<>();
@Override
public String hookName() {
return "ConsumerTraceHook";
}
@Override
public void consumeMessageBefore(ConsumeMessageContext context) {
// 1. 创建消费轨迹上下文
TraceContext traceContext = new TraceContext();
traceContext.setTraceType(TraceType.Consume);
traceContext.setRequestId(generateRequestId());
traceContext.setTimeStamp(System.currentTimeMillis());
traceContext.setStartTime(System.currentTimeMillis());
// 2. 记录消费消息信息
List<TraceBean> traceBeans = new ArrayList<>();
for (MessageExt msg : context.getMsgList()) {
TraceBean traceBean = new TraceBean();
traceBean.setTopic(msg.getTopic());
traceBean.setMsgId(msg.getMsgId());
traceBean.setOffsetMsgId(msg.getOffsetMsgId());
traceBean.setTags(msg.getTags());
traceBean.setKeys(msg.getKeys());
traceBean.setStoreHost(msg.getStoreHost());
traceBean.setBodyLength(msg.getBody().length);
traceBean.setQueueId(msg.getQueueId());
traceBean.setQueueOffset(msg.getQueueOffset());
traceBeans.add(traceBean);
}
traceContext.setTraceBeans(traceBeans);
traceContext.getContext().put("consumerGroup", context.getConsumerGroup());
traceContext.getContext().put("consumeMode", context.getConsumeMode().name());
this.traceContext.set(traceContext);
}
@Override
public void consumeMessageAfter(ConsumeMessageContext context) {
TraceContext traceContext = this.traceContext.get();
if (traceContext == null) {
return;
}
try {
// 1. 更新轨迹信息
traceContext.setTimeStamp(System.currentTimeMillis());
traceContext.setCostTime(traceContext.getTimeStamp() - traceContext.getStartTime());
// 2. 更新消费结果
List<TraceBean> traceBeans = traceContext.getTraceBeans();
for (int i = 0; i < traceBeans.size(); i++) {
TraceBean traceBean = traceBeans.get(i);
if (i < context.getSuccessMsgIndexList().size()) {
// 消费成功
traceBean.setSuccess(true);
traceBean.setStatus("CONSUME_SUCCESS");
} else {
// 消费失败
traceBean.setSuccess(false);
traceBean.setStatus("CONSUME_FAILED");
if (context.getDelayLevelWhenNextConsume() > 0) {
traceBean.setStatus("CONSUME_RETRY");
traceContext.getContext().put("delayLevel",
String.valueOf(context.getDelayLevelWhenNextConsume()));
}
}
// 记录消费时间
traceBean.setStoreTime(traceContext.getTimeStamp());
}
// 3. 设置消费者信息
traceContext.getContext().put("consumerInstance",
context.getMq().getBrokerName() + "@" + context.getMq().getQueueId());
// 4. 如果有异常,记录异常信息
if (context.getException() != null) {
traceContext.getContext().put("exception",
context.getException().getMessage());
}
// 5. 异步提交轨迹数据
traceDispatcher.append(traceContext);
} finally {
this.traceContext.remove();
}
}
// 事务消息轨迹
public void traceTransactionMessage(MessageExt msg,
LocalTransactionState transactionState,
Throwable exception) {
TraceContext traceContext = new TraceContext();
traceContext.setTraceType(TraceType.Transaction);
traceContext.setRequestId(generateRequestId());
traceContext.setTimeStamp(System.currentTimeMillis());
TraceBean traceBean = new TraceBean();
traceBean.setTopic(msg.getTopic());
traceBean.setMsgId(msg.getMsgId());
traceBean.setOffsetMsgId(msg.getOffsetMsgId());
traceBean.setTransactionId(msg.getTransactionId());
traceBean.setStoreTime(System.currentTimeMillis());
traceBean.setStatus(transactionState.name());
traceBean.setSuccess(transactionState == LocalTransactionState.COMMIT_MESSAGE);
if (exception != null) {
traceBean.setException(exception);
}
traceContext.setTraceBeans(Arrays.asList(traceBean));
traceDispatcher.append(traceContext);
}
}
三、轨迹数据存储与传输
3.1 轨迹数据分发器
java
复制
下载
public class TraceDispatcher {
private final TraceTransferService traceTransferService;
private final ThreadPoolExecutor asyncTraceExecutor;
private final LinkedBlockingQueue<TraceContext> traceQueue;
private volatile boolean stopped = false;
public TraceDispatcher() {
// 1. 初始化轨迹队列
this.traceQueue = new LinkedBlockingQueue<>(10000);
// 2. 初始化异步处理线程池
this.asyncTraceExecutor = new ThreadPoolExecutor(
2, // 核心线程数
4, // 最大线程数
60, TimeUnit.SECONDS,
new LinkedBlockingQueue<>(1000),
new ThreadFactory() {
private AtomicInteger threadIndex = new AtomicInteger(0);
@Override
public Thread newThread(Runnable r) {
return new Thread(r, "TraceDispatcherThread_" +
threadIndex.incrementAndGet());
}
},
new ThreadPoolExecutor.CallerRunsPolicy()
);
// 3. 初始化轨迹传输服务
this.traceTransferService = new TraceTransferService();
// 4. 启动轨迹消费线程
startDispatcher();
}
// 添加轨迹数据到队列
public void append(TraceContext context) {
if (stopped) {
return;
}
try {
// 非阻塞方式添加,避免影响主流程
boolean success = traceQueue.offer(context, 10, TimeUnit.MILLISECONDS);
if (!success) {
// 队列满时,采样记录日志
if (System.currentTimeMillis() % 100 == 0) {
log.warn("Trace queue is full, dropped trace context: {}",
context.getRequestId());
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
// 启动轨迹分发器
private void startDispatcher() {
asyncTraceExecutor.submit(() -> {
while (!stopped) {
try {
// 1. 从队列获取轨迹数据
TraceContext context = traceQueue.poll(100, TimeUnit.MILLISECONDS);
if (context != null) {
// 2. 批量处理
List<TraceContext> batchContexts = new ArrayList<>();
batchContexts.add(context);
// 尝试批量获取更多数据
traceQueue.drainTo(batchContexts, 99); // 最多100条一批
// 3. 异步处理批量数据
asyncTraceExecutor.submit(() -> {
processBatchTrace(batchContexts);
});
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
log.error("Trace dispatcher error", e);
}
}
});
}
// 批量处理轨迹数据
private void processBatchTrace(List<TraceContext> contexts) {
try {
// 1. 序列化轨迹数据
List<String> traceDataList = new ArrayList<>();
for (TraceContext context : contexts) {
String traceData = serializeTraceContext(context);
traceDataList.add(traceData);
}
// 2. 批量发送到存储
if (!traceDataList.isEmpty()) {
traceTransferService.batchSend(traceDataList);
}
} catch (Exception e) {
log.error("Process batch trace error", e);
// 3. 失败重试
retryFailedTraces(contexts);
}
}
// 序列化轨迹上下文
private String serializeTraceContext(TraceContext context) {
JSONObject json = new JSONObject();
// 基础信息
json.put("requestId", context.getRequestId());
json.put("traceType", context.getTraceType().name());
json.put("timestamp", context.getTimeStamp());
json.put("costTime", context.getCostTime());
// 轨迹点数据
JSONArray traceBeans = new JSONArray();
for (TraceBean bean : context.getTraceBeans()) {
JSONObject beanJson = new JSONObject();
beanJson.put("topic", bean.getTopic());
beanJson.put("msgId", bean.getMsgId());
beanJson.put("offsetMsgId", bean.getOffsetMsgId());
beanJson.put("tags", bean.getTags());
beanJson.put("keys", bean.getKeys());
beanJson.put("storeHost", bean.getStoreHost());
beanJson.put("brokerAddr", bean.getBrokerAddr());
beanJson.put("storeTime", bean.getStoreTime());
beanJson.put("bodyLength", bean.getBodyLength());
beanJson.put("success", bean.isSuccess());
beanJson.put("status", bean.getStatus());
beanJson.put("transactionId", bean.getTransactionId());
if (bean.getException() != null) {
beanJson.put("exception", bean.getException().getMessage());
}
traceBeans.add(beanJson);
}
json.put("traceBeans", traceBeans);
// 上下文信息
if (context.getContext() != null && !context.getContext().isEmpty()) {
json.put("context", context.getContext());
}
return json.toJSONString();
}
// 失败重试机制
private void retryFailedTraces(List<TraceContext> contexts) {
ScheduledExecutorService retryExecutor = Executors.newSingleThreadScheduledExecutor(
r -> new Thread(r, "TraceRetryThread")
);
// 延迟重试:1s, 5s, 10s, 30s, 60s
long[] delays = {1000, 5000, 10000, 30000, 60000};
for (int i = 0; i < delays.length; i++) {
final int attempt = i + 1;
final long delay = delays[i];
retryExecutor.schedule(() -> {
try {
log.info("Retry trace data, attempt: {}", attempt);
processBatchTrace(contexts);
} catch (Exception e) {
log.error("Trace retry failed, attempt: {}", attempt, e);
// 最后一次重试失败,记录到本地文件
if (attempt == delays.length) {
writeToLocalFile(contexts);
}
}
}, delay, TimeUnit.MILLISECONDS);
}
retryExecutor.shutdown();
}
// 写入本地文件作为最终保障
private void writeToLocalFile(List<TraceContext> contexts) {
String logDir = System.getProperty("user.home") + "/logs/rocketmq_trace/";
File dir = new File(logDir);
if (!dir.exists()) {
dir.mkdirs();
}
String fileName = logDir + "trace_failed_" +
System.currentTimeMillis() + ".log";
try (FileWriter writer = new FileWriter(fileName, true)) {
for (TraceContext context : contexts) {
writer.write(serializeTraceContext(context));
writer.write("\n");
}
log.info("Trace data written to local file: {}", fileName);
} catch (IOException e) {
log.error("Write trace to local file failed", e);
}
}
public void shutdown() {
stopped = true;
asyncTraceExecutor.shutdown();
}
}
篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了:Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc
需要全套面试笔记及答案
【点击此处即可/免费获取】
3.2 轨迹数据传输服务
java
复制
下载
public class TraceTransferService {
private final DefaultMQProducer traceProducer;
private final String traceTopic;
private final AtomicLong sendCounter = new AtomicLong(0);
private final AtomicLong failCounter = new AtomicLong(0);
public TraceTransferService() {
try {
// 1. 初始化轨迹生产者
this.traceTopic = System.getProperty("rocketmq.trace.topic", "RMQ_SYS_TRACE_TOPIC");
this.traceProducer = new DefaultMQProducer("TRACE_PRODUCER_GROUP");
traceProducer.setNamesrvAddr(System.getProperty("rocketmq.namesrv.addr"));
traceProducer.setSendMsgTimeout(3000);
traceProducer.setRetryTimesWhenSendFailed(2);
traceProducer.setCompressMsgBodyOverHowmuch(1024 * 4); // 4KB以上压缩
// 2. 启用轨迹追踪(避免无限递归)
traceProducer.getDefaultMQProducerImpl().registerSendMessageHook(
new SendMessageHook() {
@Override
public String hookName() {
return "TraceProducerExcludeHook";
}
@Override
public void sendMessageBefore(SendMessageContext context) {
// 为轨迹消息添加特殊标记,避免被追踪
context.getMessage().putUserProperty("_IS_TRACE_MSG_", "true");
}
@Override
public void sendMessageAfter(SendMessageContext context) {
// do nothing
}
}
);
// 3. 启动生产者
traceProducer.start();
// 4. 启动监控线程
startMonitor();
} catch (Exception e) {
throw new RuntimeException("Init trace transfer service failed", e);
}
}
// 批量发送轨迹数据
public void batchSend(List<String> traceDataList) throws Exception {
if (traceDataList == null || traceDataList.isEmpty()) {
return;
}
// 1. 创建消息列表
List<Message> messageList = new ArrayList<>();
for (String traceData : traceDataList) {
Message message = new Message(traceTopic,
traceData.getBytes(StandardCharsets.UTF_8));
// 设置消息属性
message.putUserProperty("TRACE_TYPE", "TRACE_DATA");
message.putUserProperty("TIMESTAMP", String.valueOf(System.currentTimeMillis()));
messageList.add(message);
}
// 2. 批量发送
SendResult sendResult = traceProducer.send(messageList);
// 3. 更新统计
sendCounter.addAndGet(messageList.size());
if (sendResult.getSendStatus() != SendStatus.SEND_OK) {
failCounter.addAndGet(messageList.size());
throw new RuntimeException("Send trace data failed: " + sendResult.getSendStatus());
}
}
// 单个发送(兼容模式)
public void send(String traceData) throws Exception {
Message message = new Message(traceTopic,
traceData.getBytes(StandardCharsets.UTF_8));
message.putUserProperty("TRACE_TYPE", "TRACE_DATA");
SendResult sendResult = traceProducer.send(message);
sendCounter.incrementAndGet();
if (sendResult.getSendStatus() != SendStatus.SEND_OK) {
failCounter.incrementAndGet();
throw new RuntimeException("Send trace data failed: " + sendResult.getSendStatus());
}
}
// 监控线程
private void startMonitor() {
ScheduledExecutorService monitorExecutor = Executors.newSingleThreadScheduledExecutor(
r -> new Thread(r, "TraceTransferMonitor")
);
// 每30秒上报一次统计信息
monitorExecutor.scheduleAtFixedRate(() -> {
try {
long sent = sendCounter.get();
long failed = failCounter.get();
long successRate = sent > 0 ? (sent - failed) * 100 / sent : 100;
log.info("Trace transfer statistics - Sent: {}, Failed: {}, Success Rate: {}%",
sent, failed, successRate);
// 重置计数器(每日)
if (System.currentTimeMillis() % 86400000 < 30000) { // 每天重置一次
sendCounter.set(0);
failCounter.set(0);
}
} catch (Exception e) {
log.error("Trace monitor error", e);
}
}, 30, 30, TimeUnit.SECONDS);
}
// 关闭服务
public void shutdown() {
if (traceProducer != null) {
traceProducer.shutdown();
}
}
}
四、轨迹数据存储方案
4.1 轨迹Topic设计
java
复制
下载
public class TraceTopicDesign {
// 轨迹Topic配置
public static class TraceTopicConfig {
// Topic名称
public static final String TRACE_TOPIC = "RMQ_SYS_TRACE_TOPIC";
// 队列数量(根据集群规模调整)
public static final int TRACE_QUEUE_NUM = 16;
// 存储策略
public static final int TRACE_FILE_SIZE = 1024 * 1024 * 1024; // 1GB
public static final int TRACE_FILE_NUM = 10;
// 过期时间(默认7天)
public static final long TRACE_EXPIRE_TIME = 7 * 24 * 3600 * 1000L;
}
// 创建轨迹Topic
public void createTraceTopic(DefaultMQAdminExt adminExt) throws Exception {
// 1. 创建Topic配置
TopicConfig topicConfig = new TopicConfig();
topicConfig.setTopicName(TraceTopicConfig.TRACE_TOPIC);
topicConfig.setWriteQueueNums(TraceTopicConfig.TRACE_QUEUE_NUM);
topicConfig.setReadQueueNums(TraceTopicConfig.TRACE_QUEUE_NUM);
topicConfig.setPerm(PermName.PERM_READ | PermName.PERM_WRITE);
topicConfig.setTopicSysFlag(0);
topicConfig.setOrder(false);
// 2. 设置Broker配置
Map<String, String> attributes = new HashMap<>();
attributes.put("fileReservedTime", "168"); // 7天
attributes.put("deleteWhen", "04");
attributes.put("cleanResourcePolicy", "DELETE");
topicConfig.setAttributes(attributes);
// 3. 在所有Broker上创建Topic
ClusterInfo clusterInfo = adminExt.examineBrokerClusterInfo();
for (BrokerData brokerData : clusterInfo.getBrokerAddrTable().values()) {
String brokerAddr = brokerData.getBrokerAddrs().get(MixAll.MASTER_ID);
if (brokerAddr != null) {
adminExt.createAndUpdateTopicConfig(brokerAddr, topicConfig);
}
}
}
// 轨迹数据消费者
public class TraceDataConsumer {
private final DefaultMQPullConsumer traceConsumer;
private final TraceDataStorage storage;
private final Map<MessageQueue, Long> offsetTable = new ConcurrentHashMap<>();
public TraceDataConsumer() throws Exception {
// 1. 初始化消费者
this.traceConsumer = new DefaultMQPullConsumer("TRACE_CONSUMER_GROUP");
traceConsumer.setNamesrvAddr(System.getProperty("rocketmq.namesrv.addr"));
traceConsumer.setBrokerSuspendMaxTimeMillis(20000);
traceConsumer.setConsumerTimeoutMillisWhenSuspend(30000);
traceConsumer.setConsumerPullTimeoutMillis(30000);
// 2. 初始化存储
this.storage = new TraceDataStorage();
// 3. 启动消费者
traceConsumer.start();
// 4. 启动消费线程
startConsumeThreads();
}
// 启动消费线程(每个队列一个线程)
private void startConsumeThreads() {
try {
// 获取Topic的所有队列
Set<MessageQueue> mqs = traceConsumer.fetchSubscribeMessageQueues(
TraceTopicConfig.TRACE_TOPIC);
for (MessageQueue mq : mqs) {
// 为每个队列启动一个消费线程
Thread consumeThread = new Thread(() -> {
consumeTraceData(mq);
}, "TraceConsumeThread-" + mq.getQueueId());
consumeThread.start();
}
} catch (Exception e) {
throw new RuntimeException("Start trace consumer failed", e);
}
}
// 消费轨迹数据
private void consumeTraceData(MessageQueue mq) {
while (true) {
try {
// 1. 获取消费偏移量
long offset = getMessageQueueOffset(mq);
// 2. 拉取消息
PullResult pullResult = traceConsumer.pullBlockIfNotFound(
mq,
"*", // 订阅所有Tag
offset,
32, // 批量拉取数量
5000 // 超时时间
);
// 3. 处理拉取结果
switch (pullResult.getPullStatus()) {
case FOUND:
// 处理消息
processMessages(pullResult.getMsgFoundList());
// 更新偏移量
long nextOffset = pullResult.getNextBeginOffset();
putMessageQueueOffset(mq, nextOffset);
// 提交偏移量
traceConsumer.updateConsumeOffset(mq, nextOffset);
break;
case NO_NEW_MSG:
// 没有新消息,等待
Thread.sleep(1000);
break;
case NO_MATCHED_MSG:
case OFFSET_ILLEGAL:
// 处理异常情况
handlePullException(mq, pullResult.getPullStatus());
break;
}
} catch (Exception e) {
log.error("Consume trace data error, mq: {}", mq, e);
try {
Thread.sleep(5000);
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
break;
}
}
}
}
// 处理轨迹消息
private void processMessages(List<MessageExt> messages) {
if (messages == null || messages.isEmpty()) {
return;
}
// 批量存储
List<TraceRecord> records = new ArrayList<>();
for (MessageExt msg : messages) {
try {
// 解析轨迹数据
TraceRecord record = parseTraceData(msg);
if (record != null) {
records.add(record);
}
} catch (Exception e) {
log.error("Parse trace data error, msgId: {}", msg.getMsgId(), e);
}
}
// 批量存储到数据库
if (!records.isEmpty()) {
storage.batchInsert(records);
}
}
// 解析轨迹数据
private TraceRecord parseTraceData(MessageExt msg) {
try {
String body = new String(msg.getBody(), StandardCharsets.UTF_8);
JSONObject json = JSON.parseObject(body);
TraceRecord record = new TraceRecord();
record.setRequestId(json.getString("requestId"));
record.setTraceType(json.getString("traceType"));
record.setTimestamp(json.getLong("timestamp"));
record.setCostTime(json.getLong("costTime"));
record.setMsgId(msg.getMsgId());
record.setStoreTime(System.currentTimeMillis());
// 解析轨迹点
JSONArray traceBeans = json.getJSONArray("traceBeans");
List<TracePoint> tracePoints = new ArrayList<>();
for (int i = 0; i < traceBeans.size(); i++) {
JSONObject beanJson = traceBeans.getJSONObject(i);
TracePoint point = new TracePoint();
point.setTopic(beanJson.getString("topic"));
point.setMsgId(beanJson.getString("msgId"));
point.setOffsetMsgId(beanJson.getString("offsetMsgId"));
point.setTags(beanJson.getString("tags"));
point.setKeys(beanJson.getString("keys"));
point.setStoreHost(beanJson.getString("storeHost"));
point.setBrokerAddr(beanJson.getString("brokerAddr"));
point.setStoreTime(beanJson.getLong("storeTime"));
point.setBodyLength(beanJson.getInteger("bodyLength"));
point.setSuccess(beanJson.getBoolean("success"));
point.setStatus(beanJson.getString("status"));
point.setTransactionId(beanJson.getString("transactionId"));
tracePoints.add(point);
}
record.setTracePoints(tracePoints);
// 解析上下文
JSONObject contextJson = json.getJSONObject("context");
if (contextJson != null) {
Map<String, String> context = new HashMap<>();
for (String key : contextJson.keySet()) {
context.put(key, contextJson.getString(key));
}
record.setContext(context);
}
return record;
} catch (Exception e) {
throw new RuntimeException("Parse trace data failed", e);
}
}
// 获取队列偏移量
private long getMessageQueueOffset(MessageQueue mq) {
Long offset = offsetTable.get(mq);
if (offset != null) {
return offset;
}
try {
// 从Broker获取消费进度
long brokerOffset = traceConsumer.fetchConsumeOffset(mq, false);
if (brokerOffset >= 0) {
offsetTable.put(mq, brokerOffset);
return brokerOffset;
}
} catch (Exception e) {
log.error("Get consume offset error", e);
}
// 默认从最新位置开始消费
return 0;
}
// 更新队列偏移量
private void putMessageQueueOffset(MessageQueue mq, long offset) {
offsetTable.put(mq, offset);
}
// 处理拉取异常
private void handlePullException(MessageQueue mq, PullStatus status) {
log.warn("Pull trace data exception, mq: {}, status: {}", mq, status);
// 重置偏移量
offsetTable.remove(mq);
}
}
}
4.2 Elasticsearch轨迹存储
java
复制
下载
public class ElasticsearchTraceStorage {
private final RestHighLevelClient elasticsearchClient;
private final String indexPrefix;
private final ObjectMapper objectMapper;
public ElasticsearchTraceStorage() {
// 1. 初始化ES客户端
this.elasticsearchClient = new RestHighLevelClient(
RestClient.builder(
new HttpHost("localhost", 9200, "http")
)
);
// 2. 索引前缀(按日期分片)
this.indexPrefix = "rocketmq-trace-";
// 3. JSON映射器
this.objectMapper = new ObjectMapper();
objectMapper.setDateFormat(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSZ"));
// 4. 创建索引模板
createIndexTemplate();
}
// 创建ES索引模板
private void createIndexTemplate() {
try {
// 索引模板定义
String templateSource = """
{
"index_patterns": ["rocketmq-trace-*"],
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "5s",
"analysis": {
"analyzer": {
"trace_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["lowercase"]
}
}
}
},
"mappings": {
"properties": {
"requestId": {
"type": "keyword"
},
"traceType": {
"type": "keyword"
},
"timestamp": {
"type": "date",
"format": "epoch_millis"
},
"costTime": {
"type": "long"
},
"msgId": {
"type": "keyword"
},
"offsetMsgId": {
"type": "keyword"
},
"topic": {
"type": "keyword"
},
"tags": {
"type": "text",
"analyzer": "trace_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"keys": {
"type": "keyword"
},
"storeHost": {
"type": "keyword"
},
"brokerAddr": {
"type": "keyword"
},
"success": {
"type": "boolean"
},
"status": {
"type": "keyword"
},
"consumerGroup": {
"type": "keyword"
},
"transactionId": {
"type": "keyword"
},
"exception": {
"type": "text"
},
"tracePoints": {
"type": "nested",
"properties": {
"msgId": {"type": "keyword"},
"offsetMsgId": {"type": "keyword"},
"topic": {"type": "keyword"},
"status": {"type": "keyword"},
"storeTime": {"type": "date"}
}
},
"context": {
"type": "object",
"enabled": true
}
}
},
"aliases": {
"rocketmq-trace": {}
}
}
""";
PutIndexTemplateRequest request = new PutIndexTemplateRequest("rocketmq-trace-template");
request.source(templateSource, XContentType.JSON);
request.patterns(Arrays.asList("rocketmq-trace-*"));
elasticsearchClient.indices().putTemplate(request, RequestOptions.DEFAULT);
log.info("Elasticsearch index template created");
} catch (Exception e) {
log.error("Create index template failed", e);
}
}
// 批量插入轨迹数据
public void batchInsert(List<TraceRecord> records) {
if (records == null || records.isEmpty()) {
return;
}
try {
// 1. 创建批量请求
BulkRequest bulkRequest = new BulkRequest();
// 2. 为每条记录创建索引请求
for (TraceRecord record : records) {
// 按日期创建索引名
String indexName = getIndexName(record.getTimestamp());
// 创建文档ID
String docId = record.getRequestId() + "_" + record.getTraceType();
// 转换为JSON
String json = objectMapper.writeValueAsString(record);
// 添加索引请求
IndexRequest indexRequest = new IndexRequest(indexName);
indexRequest.id(docId);
indexRequest.source(json, XContentType.JSON);
bulkRequest.add(indexRequest);
}
// 3. 执行批量插入
BulkResponse bulkResponse = elasticsearchClient.bulk(bulkRequest, RequestOptions.DEFAULT);
// 4. 检查执行结果
if (bulkResponse.hasFailures()) {
log.error("Bulk insert has failures: {}", bulkResponse.buildFailureMessage());
}
} catch (Exception e) {
log.error("Batch insert trace records failed", e);
// 5. 失败重试
retryFailedRecords(records);
}
}
// 根据时间戳获取索引名
private String getIndexName(long timestamp) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String dateStr = sdf.format(new Date(timestamp));
return indexPrefix + dateStr;
}
// 查询轨迹数据
public List<TraceRecord> queryTrace(String msgId, String topic,
long startTime, long endTime) {
try {
// 1. 构建查询请求
SearchRequest searchRequest = new SearchRequest("rocketmq-trace-*");
// 2. 构建查询条件
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
if (msgId != null) {
boolQuery.must(QueryBuilders.termQuery("msgId", msgId));
}
if (topic != null) {
boolQuery.must(QueryBuilders.termQuery("topic", topic));
}
boolQuery.must(QueryBuilders.rangeQuery("timestamp")
.gte(startTime)
.lte(endTime));
// 3. 构建搜索源
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(boolQuery);
sourceBuilder.from(0);
sourceBuilder.size(100); // 最多返回100条
sourceBuilder.sort("timestamp", SortOrder.DESC);
// 4. 添加高亮
HighlightBuilder highlightBuilder = new HighlightBuilder();
highlightBuilder.field("msgId");
highlightBuilder.field("status");
sourceBuilder.highlighter(highlightBuilder);
// 5. 执行查询
searchRequest.source(sourceBuilder);
SearchResponse response = elasticsearchClient.search(searchRequest, RequestOptions.DEFAULT);
// 6. 解析结果
return parseSearchResponse(response);
} catch (Exception e) {
log.error("Query trace data failed", e);
return Collections.emptyList();
}
}
// 聚合查询:按状态统计
public Map<String, Long> aggregateByStatus(String topic, long startTime, long endTime) {
try {
SearchRequest searchRequest = new SearchRequest("rocketmq-trace-*");
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
boolQuery.must(QueryBuilders.termQuery("topic", topic));
boolQuery.must(QueryBuilders.rangeQuery("timestamp")
.gte(startTime)
.lte(endTime));
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(boolQuery);
sourceBuilder.size(0); // 不需要返回文档
// 聚合:按状态分组统计
TermsAggregationBuilder statusAgg = AggregationBuilders.terms("status_count")
.field("status")
.size(100);
sourceBuilder.aggregation(statusAgg);
searchRequest.source(sourceBuilder);
SearchResponse response = elasticsearchClient.search(searchRequest, RequestOptions.DEFAULT);
// 解析聚合结果
Map<String, Long> result = new HashMap<>();
Terms statusTerms = response.getAggregations().get("status_count");
for (Terms.Bucket bucket : statusTerms.getBuckets()) {
result.put(bucket.getKeyAsString(), bucket.getDocCount());
}
return result;
} catch (Exception e) {
log.error("Aggregate trace data failed", e);
return Collections.emptyMap();
}
}
// 失败重试
private void retryFailedRecords(List<TraceRecord> records) {
// 实现重试逻辑
ScheduledExecutorService retryExecutor = Executors.newSingleThreadScheduledExecutor();
retryExecutor.schedule(() -> {
try {
log.info("Retry inserting {} trace records", records.size());
batchInsert(records);
} catch (Exception e) {
log.error("Retry insert failed", e);
}
}, 5, TimeUnit.SECONDS);
retryExecutor.shutdown();
}
// 关闭客户端
public void close() {
try {
if (elasticsearchClient != null) {
elasticsearchClient.close();
}
} catch (IOException e) {
log.error("Close elasticsearch client failed", e);
}
}
}
篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了:Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc
需要全套面试笔记及答案
【点击此处即可/免费获取】
五、轨迹数据查询分析
5.1 轨迹查询服务
java
复制
下载
@RestController
@RequestMapping("/api/trace")
public class TraceQueryController {
@Autowired
private TraceQueryService traceQueryService;
@Autowired
private TraceStatisticsService statisticsService;
// 1. 根据消息ID查询完整轨迹
@GetMapping("/message/{msgId}")
public ApiResponse<TraceDetail> getMessageTrace(@PathVariable String msgId) {
try {
TraceDetail traceDetail = traceQueryService.getMessageTrace(msgId);
return ApiResponse.success(traceDetail);
} catch (Exception e) {
return ApiResponse.error(500, "Query message trace failed: " + e.getMessage());
}
}
// 2. 根据业务Key查询轨迹
@GetMapping("/key")
public ApiResponse<List<TraceRecord>> queryByKey(
@RequestParam String key,
@RequestParam(required = false) String topic,
@RequestParam(defaultValue = "0") Long startTime,
@RequestParam(defaultValue = "0") Long endTime) {
try {
if (endTime == 0) {
endTime = System.currentTimeMillis();
}
if (startTime == 0) {
startTime = endTime - 24 * 3600 * 1000L; // 默认查24小时
}
List<TraceRecord> records = traceQueryService.queryByKey(key, topic, startTime, endTime);
return ApiResponse.success(records);
} catch (Exception e) {
return ApiResponse.error(500, "Query trace by key failed: " + e.getMessage());
}
}
// 3. 根据时间范围查询轨迹
@GetMapping("/time-range")
public ApiResponse<PageResult<TraceRecord>> queryByTimeRange(
@RequestParam Long startTime,
@RequestParam Long endTime,
@RequestParam(defaultValue = "1") Integer page,
@RequestParam(defaultValue = "20") Integer size) {
try {
PageResult<TraceRecord> result = traceQueryService.queryByTimeRange(
startTime, endTime, page, size);
return ApiResponse.success(result);
} catch (Exception e) {
return ApiResponse.error(500, "Query trace by time range failed: " + e.getMessage());
}
}
// 4. 轨迹统计接口
@GetMapping("/statistics")
public ApiResponse<TraceStatistics> getStatistics(
@RequestParam(required = false) String topic,
@RequestParam(defaultValue = "0") Long startTime,
@RequestParam(defaultValue = "0") Long endTime) {
try {
if (endTime == 0) {
endTime = System.currentTimeMillis();
}
if (startTime == 0) {
startTime = endTime - 3600 * 1000L; // 默认查1小时
}
TraceStatistics statistics = statisticsService.getStatistics(topic, startTime, endTime);
return ApiResponse.success(statistics);
} catch (Exception e) {
return ApiResponse.error(500, "Get trace statistics failed: " + e.getMessage());
}
}
// 5. 轨迹链路追踪
@GetMapping("/link/{requestId}")
public ApiResponse<TraceLink> getTraceLink(@PathVariable String requestId) {
try {
TraceLink traceLink = traceQueryService.getTraceLink(requestId);
return ApiResponse.success(traceLink);
} catch (Exception e) {
return ApiResponse.error(500, "Get trace link failed: " + e.getMessage());
}
}
// 6. 导出轨迹数据
@GetMapping("/export")
public void exportTraceData(
HttpServletResponse response,
@RequestParam Long startTime,
@RequestParam Long endTime,
@RequestParam(required = false) String format) throws IOException {
try {
String exportFormat = format != null ? format : "csv";
List<TraceRecord> records = traceQueryService.queryByTimeRange(
startTime, endTime, 1, 10000).getData(); // 最多导出10000条
if ("csv".equals(exportFormat)) {
exportToCsv(response, records);
} else if ("json".equals(exportFormat)) {
exportToJson(response, records);
} else {
response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Unsupported format");
}
} catch (Exception e) {
response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
"Export trace data failed: " + e.getMessage());
}
}
private void exportToCsv(HttpServletResponse response, List<TraceRecord> records)
throws IOException {
response.setContentType("text/csv");
response.setHeader("Content-Disposition",
"attachment; filename=\"trace_data_" + System.currentTimeMillis() + ".csv\"");
try (PrintWriter writer = response.getWriter()) {
// 写入CSV头部
writer.println("RequestId,TraceType,Timestamp,MsgId,Topic,Status,CostTime,Success");
// 写入数据
for (TraceRecord record : records) {
writer.println(String.format("%s,%s,%d,%s,%s,%s,%d,%s",
record.getRequestId(),
record.getTraceType(),
record.getTimestamp(),
record.getMsgId(),
record.getTopic(),
record.getStatus(),
record.getCostTime(),
record.isSuccess()
));
}
}
}
private void exportToJson(HttpServletResponse response, List<TraceRecord> records)
throws IOException {
response.setContentType("application/json");
response.setHeader("Content-Disposition",
"attachment; filename=\"trace_data_" + System.currentTimeMillis() + ".json\"");
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(response.getOutputStream(), records);
}
}
5.2 轨迹可视化服务
java
复制
下载
@Service
public class TraceVisualizationService {
@Autowired
private TraceQueryService traceQueryService;
@Autowired
private ElasticsearchTraceStorage esStorage;
// 1. 生成消息轨迹图
public TraceGraph generateMessageTraceGraph(String msgId) {
TraceGraph graph = new TraceGraph();
// 查询消息轨迹
TraceDetail traceDetail = traceQueryService.getMessageTrace(msgId);
// 构建节点
List<GraphNode> nodes = new ArrayList<>();
Map<String, GraphNode> nodeMap = new HashMap<>();
// 生产者节点
GraphNode producerNode = new GraphNode();
producerNode.setId("producer");
producerNode.setLabel("Producer");
producerNode.setType(NodeType.PRODUCER);
producerNode.setProperties(Map.of(
"host", traceDetail.getProducerHost(),
"time", traceDetail.getSendTime()
));
nodes.add(producerNode);
nodeMap.put("producer", producerNode);
// Broker节点
GraphNode brokerNode = new GraphNode();
brokerNode.setId("broker");
brokerNode.setLabel("Broker");
brokerNode.setType(NodeType.BROKER);
brokerNode.setProperties(Map.of(
"host", traceDetail.getBrokerHost(),
"storeTime", traceDetail.getStoreTime()
));
nodes.add(brokerNode);
nodeMap.put("broker", brokerNode);
// 消费者节点(可能有多个)
for (ConsumerTrace consumerTrace : traceDetail.getConsumerTraces()) {
String consumerId = "consumer_" + consumerTrace.getConsumerGroup();
if (!nodeMap.containsKey(consumerId)) {
GraphNode consumerNode = new GraphNode();
consumerNode.setId(consumerId);
consumerNode.setLabel(consumerTrace.getConsumerGroup());
consumerNode.setType(NodeType.CONSUMER);
consumerNode.setProperties(Map.of(
"group", consumerTrace.getConsumerGroup(),
"instance", consumerTrace.getInstanceHost()
));
nodes.add(consumerNode);
nodeMap.put(consumerId, consumerNode);
}
}
// 构建边
List<GraphEdge> edges = new ArrayList<>();
// 生产者 -> Broker
GraphEdge produceEdge = new GraphEdge();
produceEdge.setId("edge_produce");
produceEdge.setSource("producer");
produceEdge.setTarget("broker");
produceEdge.setLabel("SEND");
produceEdge.setProperties(Map.of(
"status", traceDetail.getSendStatus(),
"cost", traceDetail.getSendCost()
));
edges.add(produceEdge);
// Broker -> 消费者
for (ConsumerTrace consumerTrace : traceDetail.getConsumerTraces()) {
String consumerId = "consumer_" + consumerTrace.getConsumerGroup();
GraphEdge consumeEdge = new GraphEdge();
consumeEdge.setId("edge_consume_" + consumerId);
consumeEdge.setSource("broker");
consumeEdge.setTarget(consumerId);
consumeEdge.setLabel("CONSUME");
consumeEdge.setProperties(Map.of(
"status", consumerTrace.getConsumeStatus(),
"cost", consumerTrace.getConsumeCost(),
"retryTimes", consumerTrace.getRetryTimes()
));
edges.add(consumeEdge);
}
graph.setNodes(nodes);
graph.setEdges(edges);
return graph;
}
// 2. 生成拓扑图
public TopologyGraph generateTopologyGraph(String topic, long startTime, long endTime) {
TopologyGraph topology = new TopologyGraph();
// 查询时间段内的轨迹数据
List<TraceRecord> records = traceQueryService.queryByTimeRange(
startTime, endTime, 1, 1000).getData();
// 统计各个组件的调用关系
Map<String, TopologyNode> nodes = new HashMap<>();
Map<String, TopologyLink> links = new HashMap<>();
for (TraceRecord record : records) {
// 添加生产者节点
String producerKey = "producer_" + record.getProducerHost();
if (!nodes.containsKey(producerKey)) {
TopologyNode producerNode = new TopologyNode();
producerNode.setId(producerKey);
producerNode.setName(record.getProducerHost());
producerNode.setType(NodeType.PRODUCER);
producerNode.setCount(0);
nodes.put(producerKey, producerNode);
}
nodes.get(producerKey).incrementCount();
// 添加Broker节点
String brokerKey = "broker_" + record.getBrokerAddr();
if (!nodes.containsKey(brokerKey)) {
TopologyNode brokerNode = new TopologyNode();
brokerNode.setId(brokerKey);
brokerNode.setName(record.getBrokerAddr());
brokerNode.setType(NodeType.BROKER);
brokerNode.setCount(0);
nodes.put(brokerKey, brokerNode);
}
nodes.get(brokerKey).incrementCount();
// 添加生产者->Broker的链接
String linkKey = producerKey + "->" + brokerKey;
if (!links.containsKey(linkKey)) {
TopologyLink link = new TopologyLink();
link.setSource(producerKey);
link.setTarget(brokerKey);
link.setValue(0);
links.put(linkKey, link);
}
links.get(linkKey).incrementValue();
// 处理消费者信息
for (TracePoint point : record.getTracePoints()) {
if (point.getConsumerGroup() != null) {
String consumerKey = "consumer_" + point.getConsumerGroup();
if (!nodes.containsKey(consumerKey)) {
TopologyNode consumerNode = new TopologyNode();
consumerNode.setId(consumerKey);
consumerNode.setName(point.getConsumerGroup());
consumerNode.setType(NodeType.CONSUMER);
consumerNode.setCount(0);
nodes.put(consumerKey, consumerNode);
}
nodes.get(consumerKey).incrementCount();
// 添加Broker->消费者的链接
String consumeLinkKey = brokerKey + "->" + consumerKey;
if (!links.containsKey(consumeLinkKey)) {
TopologyLink link = new TopologyLink();
link.setSource(brokerKey);
link.setTarget(consumerKey);
link.setValue(0);
links.put(consumeLinkKey, link);
}
links.get(consumeLinkKey).incrementValue();
}
}
}
topology.setNodes(new ArrayList<>(nodes.values()));
topology.setLinks(new ArrayList<>(links.values()));
return topology;
}
// 3. 生成时间线视图
public TimelineView generateTimelineView(String msgId) {
TimelineView timeline = new TimelineView();
TraceDetail traceDetail = traceQueryService.getMessageTrace(msgId);
List<TimelineEvent> events = new ArrayList<>();
// 发送事件
TimelineEvent sendEvent = new TimelineEvent();
sendEvent.setId("send");
sendEvent.setTime(traceDetail.getSendTime());
sendEvent.setType(EventType.SEND);
sendEvent.setTitle("消息发送");
sendEvent.setDescription(String.format("生产者: %s", traceDetail.getProducerHost()));
sendEvent.setDuration(traceDetail.getSendCost());
sendEvent.setStatus(traceDetail.getSendStatus());
events.add(sendEvent);
// 存储事件
TimelineEvent storeEvent = new TimelineEvent();
storeEvent.setId("store");
storeEvent.setTime(traceDetail.getStoreTime());
storeEvent.setType(EventType.STORE);
storeEvent.setTitle("消息存储");
storeEvent.setDescription(String.format("Broker: %s", traceDetail.getBrokerHost()));
storeEvent.setStatus("SUCCESS");
events.add(storeEvent);
// 消费事件(可能有多个)
for (ConsumerTrace consumerTrace : traceDetail.getConsumerTraces()) {
TimelineEvent consumeEvent = new TimelineEvent();
consumeEvent.setId("consume_" + consumerTrace.getConsumerGroup());
consumeEvent.setTime(consumerTrace.getConsumeTime());
consumeEvent.setType(EventType.CONSUME);
consumeEvent.setTitle("消息消费");
consumeEvent.setDescription(String.format("消费者组: %s",
consumerTrace.getConsumerGroup()));
consumeEvent.setDuration(consumerTrace.getConsumeCost());
consumeEvent.setStatus(consumerTrace.getConsumeStatus());
events.add(consumeEvent);
}
// 按时间排序
events.sort(Comparator.comparing(TimelineEvent::getTime));
timeline.setMsgId(msgId);
timeline.setTopic(traceDetail.getTopic());
timeline.setEvents(events);
// 计算总耗时
if (!events.isEmpty()) {
long startTime = events.get(0).getTime();
long endTime = events.get(events.size() - 1).getTime();
timeline.setTotalCost(endTime - startTime);
}
return timeline;
}
// 4. 生成统计图表数据
public ChartData generateStatisticsChart(String topic, String chartType,
long startTime, long endTime) {
ChartData chartData = new ChartData();
switch (chartType) {
case "send_status":
// 发送状态统计
Map<String, Long> sendStats = esStorage.aggregateByStatus(topic, startTime, endTime);
chartData.setLabels(new ArrayList<>(sendStats.keySet()));
chartData.setValues(new ArrayList<>(sendStats.values()));
chartData.setTitle("消息发送状态统计");
break;
case "time_distribution":
// 时间分布统计
chartData = generateTimeDistributionChart(topic, startTime, endTime);
break;
case "topic_distribution":
// Topic分布统计
chartData = generateTopicDistributionChart(startTime, endTime);
break;
default:
throw new IllegalArgumentException("Unsupported chart type: " + chartType);
}
return chartData;
}
private ChartData generateTimeDistributionChart(String topic, long startTime, long endTime) {
ChartData chartData = new ChartData();
chartData.setTitle("消息时间分布");
// 按小时分组统计
long interval = 3600 * 1000L; // 1小时
int bucketCount = (int) ((endTime - startTime) / interval) + 1;
List<String> labels = new ArrayList<>();
List<Long> values = new ArrayList<>();
for (int i = 0; i < bucketCount; i++) {
long bucketStart = startTime + i * interval;
long bucketEnd = bucketStart + interval;
// 格式化标签
SimpleDateFormat sdf = new SimpleDateFormat("HH:mm");
labels.add(sdf.format(new Date(bucketStart)));
// 查询该时间段内的消息数量
long count = traceQueryService.countByTimeRange(topic, bucketStart, bucketEnd);
values.add(count);
}
chartData.setLabels(labels);
chartData.setValues(values);
return chartData;
}
private ChartData generateTopicDistributionChart(long startTime, long endTime) {
ChartData chartData = new ChartData();
chartData.setTitle("Topic消息量分布");
// 这里需要从ES聚合查询各个Topic的消息数量
// 简化实现:查询所有轨迹,然后按Topic分组统计
List<TraceRecord> records = traceQueryService.queryByTimeRange(
startTime, endTime, 1, 10000).getData();
Map<String, Long> topicCountMap = new HashMap<>();
for (TraceRecord record : records) {
String topic = record.getTopic();
topicCountMap.put(topic, topicCountMap.getOrDefault(topic, 0L) + 1);
}
// 取前10个Topic
List<Map.Entry<String, Long>> sortedEntries = topicCountMap.entrySet().stream()
.sorted(Map.Entry.<String, Long>comparingByValue().reversed())
.limit(10)
.collect(Collectors.toList());
List<String> labels = new ArrayList<>();
List<Long> values = new ArrayList<>();
for (Map.Entry<String, Long> entry : sortedEntries) {
labels.add(entry.getKey());
values.add(entry.getValue());
}
chartData.setLabels(labels);
chartData.setValues(values);
return chartData;
}
}
篇幅限制下面就只能给大家展示小册部分内容了。整理了一份核心面试笔记包括了:Java面试、Spring、JVM、MyBatis、Redis、MySQL、并发编程、微服务、Linux、Springboot、SpringCloud、MQ、Kafc
需要全套面试笔记及答案
【点击此处即可/免费获取】
六、性能优化与监控
6.1 轨迹采集性能优化
java
复制
下载
public class TracePerformanceOptimizer {
// 1. 采样策略
private volatile double samplingRate = 0.01; // 默认1%采样率
// 采样决策
public boolean shouldTrace(String msgId) {
// 基于消息ID哈希的采样
int hashCode = msgId.hashCode();
int mod = 10000;
int value = Math.abs(hashCode % mod);
return value < (int)(samplingRate * mod);
}
// 动态调整采样率
public void adjustSamplingRate(double systemLoad) {
if (systemLoad > 0.8) {
// 系统负载高,降低采样率
samplingRate = Math.max(0.001, samplingRate * 0.5);
} else if (systemLoad < 0.3) {
// 系统负载低,提高采样率
samplingRate = Math.min(1.0, samplingRate * 2);
}
}
// 2. 批量处理优化
public class BatchTraceProcessor {
private final LinkedBlockingQueue<TraceContext> queue;
private final int batchSize;
private final long maxWaitTime;
public BatchTraceProcessor(int capacity, int batchSize, long maxWaitTime) {
this.queue = new LinkedBlockingQueue<>(capacity);
this.batchSize = batchSize;
this.maxWaitTime = maxWaitTime;
}
public void processBatch() throws InterruptedException {
List<TraceContext> batch = new ArrayList<>(batchSize);
// 等待第一个元素
TraceContext first = queue.poll(maxWaitTime, TimeUnit.MILLISECONDS);
if (first != null) {
batch.add(first);
// 尝试批量获取更多元素
queue.drainTo(batch, batchSize - 1);
// 处理批量数据
processBatchInternal(batch);
}
}
private void processBatchInternal(List<TraceContext> batch) {
// 批量序列化
List<String> traceDataList = batch.stream()
.map(this::serializeTrace)
.collect(Collectors.toList());
// 批量压缩(如果数据量大)
if (getTotalSize(traceDataList) > 1024 * 4) { // 4KB以上压缩
traceDataList = compressBatch(traceDataList);
}
// 批量发送
batchSendToStorage(traceDataList);
}
}
// 3. 异步处理优化
public class AsyncTraceExecutor {
private final ThreadPoolExecutor executor;
private final TraceBuffer buffer;
public AsyncTraceExecutor() {
this.executor = new ThreadPoolExecutor(
2, 4, 60, TimeUnit.SECONDS,
new LinkedBlockingQueue<>(1000),
new ThreadFactory() {
private final AtomicInteger counter = new AtomicInteger(0);
@Override
public Thread newThread(Runnable r) {
Thread thread = new Thread(r,
"TraceAsyncThread-" + counter.incrementAndGet());
thread.setDaemon(true);
return thread;
}
},
new ThreadPoolExecutor.CallerRunsPolicy()
);
// 监控线程池状态
monitorExecutorStats();
}
public Future<?> submitTraceTask(Runnable task) {
return executor.submit(task);
}
private void monitorExecutorStats() {
ScheduledExecutorService monitor = Executors.newSingleThreadScheduledExecutor();
monitor.scheduleAtFixedRate(() -> {
int activeCount = executor.getActiveCount();
long completedTasks = executor.getCompletedTaskCount();
int queueSize = executor.getQueue().size();
// 监控指标
Metrics.record("trace.executor.active", activeCount);
Metrics.record("trace.executor.completed", completedTasks);
Metrics.record("trace.executor.queue", queueSize);
// 动态调整线程池
adjustThreadPool(activeCount, queueSize);
}, 10, 10, TimeUnit.SECONDS);
}
private void adjustThreadPool(int activeCount, int queueSize) {
int corePoolSize = executor.getCorePoolSize();
if (queueSize > 500 && activeCount >= corePoolSize) {
// 队列积压严重,增加线程
int newSize = Math.min(corePoolSize * 2, executor.getMaximumPoolSize());
executor.setCorePoolSize(newSize);
} else if (queueSize < 100 && activeCount < corePoolSize / 2) {
// 队列空闲,减少线程
int newSize = Math.max(2, corePoolSize / 2);
executor.setCorePoolSize(newSize);
}
}
}
// 4. 内存优化
public class TraceBuffer {
private final ObjectPool<TraceContext> objectPool;
private final SoftReference<List<TraceContext>> cache;
public TraceBuffer() {
// 使用对象池复用TraceContext对象
this.objectPool = new GenericObjectPool<>(
new BasePooledObjectFactory<TraceContext>() {
@Override
public TraceContext create() {
return new TraceContext();
}
@Override
public PooledObject<TraceContext> wrap(TraceContext obj) {
return new DefaultPooledObject<>(obj);
}
}
);
// 设置对象池参数
objectPool.setMaxTotal(1000);
objectPool.setMaxIdle(100);
}
public TraceContext borrowObject() throws Exception {
TraceContext context = objectPool.borrowObject();
context.clear(); // 清空旧数据
return context;
}
public void returnObject(TraceContext context) {
try {
objectPool.returnObject(context);
} catch (Exception e) {
// 忽略归还异常
}
}
}
}
6.2 轨迹系统监控
java
复制
下载
public class TraceSystemMonitor {
private final MetricsCollector metricsCollector;
private final AlertManager alertManager;
private final HealthChecker healthChecker;
public TraceSystemMonitor() {
this.metricsCollector = new MetricsCollector();
this.alertManager = new AlertManager();
this.healthChecker = new HealthChecker();
// 启动监控
startMonitoring();
}
private void startMonitoring() {
ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(3);
// 1. 性能指标监控(每10秒)
scheduler.scheduleAtFixedRate(this::collectPerformanceMetrics,
10, 10, TimeUnit.SECONDS);
// 2. 健康检查(每30秒)
scheduler.scheduleAtFixedRate(this::checkSystemHealth,
30, 30, TimeUnit.SECONDS);
// 3. 容量监控(每5分钟)
scheduler.scheduleAtFixedRate(this::checkCapacity,
5, 5, TimeUnit.MINUTES);
}
private void collectPerformanceMetrics() {
try {
// 1. 轨迹处理吞吐量
long traceProcessed = metricsCollector.getCounter("trace.processed.count");
long traceFailed = metricsCollector.getCounter("trace.failed.count");
double throughput = traceProcessed / 10.0; // 每10秒的吞吐量
metricsCollector.recordGauge("trace.throughput", throughput);
metricsCollector.recordGauge("trace.success.rate",
traceProcessed > 0 ? (traceProcessed - traceFailed) * 100.0 / traceProcessed : 100);
// 2. 处理延迟
List<Long> latencies = metricsCollector.getHistogram("trace.processing.latency");
if (!latencies.isEmpty()) {
Collections.sort(latencies);
metricsCollector.recordGauge("trace.latency.p50", latencies.get(latencies.size() / 2));
metricsCollector.recordGauge("trace.latency.p95", latencies.get((int)(latencies.size() * 0.95)));
metricsCollector.recordGauge("trace.latency.p99", latencies.get((int)(latencies.size() * 0.99)));
}
// 3. 队列监控
int queueSize = metricsCollector.getGauge("trace.queue.size");
metricsCollector.recordGauge("trace.queue.size.current", queueSize);
if (queueSize > 1000) {
alertManager.sendAlert("TRACE_QUEUE_OVERFLOW",
"Trace queue size exceeds threshold: " + queueSize);
}
} catch (Exception e) {
log.error("Collect performance metrics failed", e);
}
}
private void checkSystemHealth() {
try {
// 1. 检查存储连接
boolean storageHealthy = healthChecker.checkStorageHealth();
metricsCollector.recordGauge("storage.health", storageHealthy ? 1 : 0);
if (!storageHealthy) {
alertManager.sendAlert("STORAGE_UNHEALTHY",
"Trace storage system is unhealthy");
}
// 2. 检查队列处理状态
boolean queueProcessingHealthy = healthChecker.checkQueueProcessing();
metricsCollector.recordGauge("queue.processing.health",
queueProcessingHealthy ? 1 : 0);
// 3. 检查线程池状态
Map<String, Object> threadPoolStats = healthChecker.checkThreadPools();
metricsCollector.recordGauge("threadpool.active.count",
(Integer) threadPoolStats.get("activeCount"));
metricsCollector.recordGauge("threadpool.queue.size",
(Integer) threadPoolStats.get("queueSize"));
} catch (Exception e) {
log.error("Check system health failed", e);
alertManager.sendAlert("HEALTH_CHECK_FAILED",
"Health check failed: " + e.getMessage());
}
}
private void checkCapacity() {
try {
// 1. 磁盘容量检查
DiskUsage diskUsage = healthChecker.checkDiskUsage();
metricsCollector.recordGauge("disk.usage.percent", diskUsage.getUsagePercent());
if (diskUsage.getUsagePercent() > 90) {
alertManager.sendAlert("DISK_USAGE_HIGH",
String.format("Disk usage is %d%%", diskUsage.getUsagePercent()));
}
// 2. 内存使用检查
MemoryUsage memoryUsage = healthChecker.checkMemoryUsage();
metricsCollector.recordGauge("memory.usage.percent", memoryUsage.getUsagePercent());
if (memoryUsage.getUsagePercent() > 80) {
alertManager.sendAlert("MEMORY_USAGE_HIGH",
String.format("Memory usage is %d%%", memoryUsage.getUsagePercent()));
}
// 3. 索引容量检查
IndexStats indexStats = healthChecker.checkIndexStats();
metricsCollector.recordGauge("index.document.count", indexStats.getDocumentCount());
metricsCollector.recordGauge("index.store.size", indexStats.getStoreSize());
if (indexStats.getStoreSize() > 100 * 1024 * 1024 * 1024L) { // 100GB
alertManager.sendAlert("INDEX_SIZE_LARGE",
String.format("Index size is %dGB",
indexStats.getStoreSize() / (1024 * 1024 * 1024)));
}
} catch (Exception e) {
log.error("Check capacity failed", e);
}
}
// 轨迹质量监控
public void monitorTraceQuality() {
// 1. 轨迹完整性检查
long totalTraces = metricsCollector.getCounter("trace.total");
long completeTraces = metricsCollector.getCounter("trace.complete");
double completenessRate = totalTraces > 0 ?
completeTraces * 100.0 / totalTraces : 100;
metricsCollector.recordGauge("trace.completeness.rate", completenessRate);
if (completenessRate < 95) {
alertManager.sendAlert("TRACE_COMPLETENESS_LOW",
String.format("Trace completeness rate is %.2f%%", completenessRate));
}
// 2. 轨迹延迟监控
long traceDelay = metricsCollector.getGauge("trace.delay");
metricsCollector.recordGauge("trace.delay.current", traceDelay);
if (traceDelay > 60000) { // 1分钟延迟
alertManager.sendAlert("TRACE_DELAY_HIGH",
String.format("Trace delay is %dms", traceDelay));
}
}
}
七、最佳实践与配置
7.1 配置优化建议
properties
复制
下载
# RocketMQ轨迹配置
rocketmq.trace.topic=RMQ_SYS_TRACE_TOPIC
rocketmq.trace.enabled=true
rocketmq.trace.samplingRate=0.01
rocketmq.trace.batchSize=100
rocketmq.trace.maxQueueSize=10000
rocketmq.trace.asyncDispatch=true
rocketmq.trace.compressThreshold=4096
# 存储配置
trace.storage.type=elasticsearch
trace.storage.elasticsearch.hosts=localhost:9200
trace.storage.elasticsearch.index.prefix=rocketmq-trace-
trace.storage.elasticsearch.index.shards=3
trace.storage.elasticsearch.index.replicas=1
# 保留策略
trace.retention.days=7
trace.retention.cleanup.enabled=true
trace.retention.cleanup.schedule="0 0 2 * * ?" # 每天凌晨2点执行
# 监控配置
trace.monitor.enabled=true
trace.monitor.metrics.export=prometheus
trace.monitor.alert.enabled=true
trace.monitor.alert.webhook=http://alert-system/webhook
7.2 部署架构
text
复制
下载
高可用轨迹系统架构:
┌─────────────────────────────────────────────────────┐
│ 应用层 │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ 生产者 │ │ Broker │ │ 消费者 │ │
│ │ Trace │ │ Trace │ │ Trace │ │
│ │ Hook │ │ Hook │ │ Hook │ │
│ └──────────┘ └──────────┘ └──────────┘ │
│ ↓ ↓ ↓ │
└─────────────────────────────────────────────────────┘
↓
┌─────────────────────────────────────────────────────┐
│ 轨迹收集层 │
│ ┌────────────────────────────────────────────┐ │
│ │ Trace Dispatcher │ │
│ │ • 异步队列缓冲 │ │
│ │ • 批量处理优化 │ │
│ │ • 失败重试机制 │ │
│ └────────────────────────────────────────────┘ │
│ ↓ │
└─────────────────────────────────────────────────────┘
↓
┌─────────────────────────────────────────────────────┐
│ 轨迹存储层 │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ RocketMQ │ │ Elastic │ │ MySQL │ │
│ │ Trace │ │ search │ │ (元数据) │ │
│ │ Topic │ │ 集群 │ │ │ │
│ └──────────┘ └──────────┘ └──────────┘ │
└─────────────────────────────────────────────────────┘
↓
┌─────────────────────────────────────────────────────┐
│ 查询分析层 │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ REST │ │ Web │ │ 监控 │ │
│ │ API │ │ UI │ │ 告警 │ │
│ │ 服务 │ │ │ │ 系统 │ │
│ └──────────┘ └──────────┘ └──────────┘ │
└─────────────────────────────────────────────────────┘
7.3 故障处理策略
text
复制
下载
1. 轨迹数据丢失处理:
• 本地文件备份
• 失败重试机制
• 补全日志分析
2. 存储系统故障:
• 降级到文件存储
• 数据异步恢复
• 监控告警
3. 性能问题处理:
• 动态采样调整
• 队列流量控制
• 资源自动扩缩容
4. 数据一致性保证:
• 轨迹ID全局唯一
• 时序性保证
• 最终一致性模型
通过以上完整的消息轨迹追踪实现,可以构建一个高性能、高可用的RocketMQ消息轨迹系统,实现全链路消息追踪、问题定位和性能分析。