背景
最近接了一个活,公司要求全面接入OpenTelemetry(简称OTEL),不再使用之前的Skywalking了。还必须接。我们用的dubbo版本不支持OTEL接入,这个把我们难住了,难道要因为这个全面升级老旧的dubbo。风险太大有点得不偿失。只能另辟蹊径了。
难点细节
项目都比较老,dubbo基本用的都是com.alibaba 包名的2.6.2及2.5.*版本,不兼容OTEL,并且应用很多
升级到com.alibaba 包名的2.8.4版本难度大,也无法直接升级到org.apache.dubbo包名的版本
无法直接接入OTEL。
方案整体思路
使用dubbo SPI,dubbo服务消费端通过从 otel获取trace信息,放到 RpcContext中 。
然后dubbo服务端从 RpcContext获取trace信息,写入OTEL 。
上代码
因为我们是采用的java agent的方式,并设置好想应的环境变量,所以我们无需主动配置OTEL,引入OTEL包,只是为了在Dubbo spi中使用
maven依赖
xml
<dependencies>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-api</artifactId>
<version>1.28.0</version>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk</artifactId>
<version>1.28.0</version>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-exporter-otlp</artifactId>
<version>1.28.0</version>
</dependency>
<!-- 🔽 新增依赖 -->
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-extension-trace-propagators</artifactId>
<version>1.28.0</version>
</dependency>
<dependency>
<groupId>io.opentelemetry.instrumentation</groupId>
<artifactId>opentelemetry-jdbc</artifactId>
<version>1.28.0-alpha</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>dubbo</artifactId>
<version>2.6.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.17</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.17</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.52</version>
<scope>compile</scope>
</dependency>
</dependencies>
dubbo spi filter
ini
package com.haier.hsi.dubbo;
import com.alibaba.dubbo.common.extension.Activate;
import com.alibaba.dubbo.rpc.*;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import io.opentelemetry.api.GlobalOpenTelemetry;
import io.opentelemetry.api.trace.*;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.Scope;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
@Activate(group = {"consumer", "provider"})
public class TraceContextFilter implements Filter {
private static final Logger log = LoggerFactory.getLogger(TraceContextFilter.class);
// 修改: 从环境变量 APP_NAME 获取服务名,若未设置则使用默认值 "com.example.dubbo"
private static final Tracer tracer = GlobalOpenTelemetry.getTracer(System.getenv().getOrDefault("APP_NAME", "com.example.dubbo"));
private static final String RUN_END = "RUN_END";
@Override
public Result invoke(Invoker<?> invoker, Invocation invocation) {
Result result = null;
Boolean isRunDubboError = false;
try {
// 消费者端:注入 Trace 上下文
if (RpcContext.getContext().isConsumerSide()) {
Span currentSpan = Span.current();
try {
if (!currentSpan.getSpanContext().isValid()) {
String spanName = String.format("%s.%s", invoker.getInterface().getName(), invocation.getMethodName());
currentSpan = tracer.spanBuilder(spanName)
.setSpanKind(SpanKind.CLIENT) // 设置 Span 类型为 CLIENT
.startSpan();
}
// 将 Span 绑定到当前上下文
Context context = Context.current().with(currentSpan);
try (Scope scope = context.makeCurrent()) {
RpcContext.getContext().setAttachment("trace_id", currentSpan.getSpanContext().getTraceId());
RpcContext.getContext().setAttachment("span_id", currentSpan.getSpanContext().getSpanId());
RpcContext.getContext().setAttachment("interface", invoker.getInterface().getName());
currentSpan.setAttribute("interface", invoker.getInterface().getName());
currentSpan.setAttribute("method", invocation.getMethodName());
currentSpan.setAttribute("parameters", JSON.toJSONString(invocation.getArguments()));
currentSpan.setAttribute("rpc.system", "dubbo");
currentSpan.setAttribute("rpc.service", invoker.getInterface().getName());
currentSpan.setAttribute("rpc.method", invocation.getMethodName());
// 修改: 使用 TraceFlags.fromHex 替代 fromString
RpcContext.getContext().setAttachment("trace_flags", currentSpan.getSpanContext().getTraceFlags().asHex());
// 修改: 使用 TraceState.toString() 方法获取 TraceState 的字符串表示
RpcContext.getContext().setAttachment("trace_state", JSON.toJSONString(currentSpan.getSpanContext().getTraceState().asMap()));
log.debug("Trace ID: {}, Span ID: {}", currentSpan.getSpanContext().getTraceId(), currentSpan.getSpanContext().getSpanId());
log.debug("getTraceFlags ID: {}, getTraceState ID: {}", currentSpan.getSpanContext().getTraceFlags().asHex(), JSON.toJSONString(currentSpan.getSpanContext().getTraceState().asMap()));
try {
result = invoker.invoke(invocation);
} catch (Throwable e) {
isRunDubboError = true;
throw e;
}
return result;
}
} finally {
currentSpan.end();
}
}
// 提供者端:提取 Trace 上下文
if (RpcContext.getContext().isProviderSide()) {
Map<String, String> attachments = RpcContext.getContext().getAttachments();
String traceId = attachments.get("trace_id");
String spanId = attachments.get("span_id");
String interfaceName = attachments.get("interface");
// 提供者端修改点
String traceFlags = attachments.get("trace_flags");
String traceState = attachments.get("trace_state");
if (traceId != null && spanId != null) {
String spanNamem = String.format("%s.%s", invoker.getInterface().getName(), invocation.getMethodName());
TraceState parsedTraceState = null;
if (traceState != null) {
Map<String, String> map = JSONObject.parseObject(traceState, Map.class);
TraceStateBuilder tsBuilder = TraceState.builder();
if (map != null && !map.isEmpty()) {
for (Map.Entry<String, String> stringStringEntry : map.entrySet()) {
tsBuilder.put(stringStringEntry.getKey(), stringStringEntry.getValue());
}
}
parsedTraceState = tsBuilder.build();
}
// 修改: 使用正确的远程父级上下文创建方式
io.opentelemetry.api.trace.SpanContext parentContext =
io.opentelemetry.api.trace.SpanContext.createFromRemoteParent(
traceId,
spanId,
traceFlags != null ? TraceFlags.fromHex(traceFlags, 0) : TraceFlags.getDefault(),
traceState != null ? parsedTraceState : TraceState.getDefault()
);
// 修改: 使用正确的上下文继承方式
Span serverSpan = tracer.spanBuilder(spanNamem)
.setSpanKind(SpanKind.SERVER)
// 关键修改: 使用 Context.current() 替代 Context.root()
.setParent(Context.current().with(Span.wrap(parentContext)))
.startSpan();
// 将 Span 绑定到当前上下文
Context context = null;
try {
context = Context.current().with(serverSpan);
try (Scope scope = context.makeCurrent()) {
serverSpan.setAttribute("interface", interfaceName);
serverSpan.setAttribute("method", invocation.getMethodName());
serverSpan.setAttribute("parameters", JSON.toJSONString(invocation.getArguments()));
serverSpan.setAttribute("rpc.system", "dubbo");
serverSpan.setAttribute("rpc.service", invoker.getInterface().getName());
serverSpan.setAttribute("rpc.method", invocation.getMethodName());
try {
result = invoker.invoke(invocation);
} catch (Throwable e) {
isRunDubboError = true;
throw e;
}
return result;
}
} finally {
// 确保 serverSpan.end() 总是被执行
serverSpan.end();
}
}
}
// 添加 Dubbo 链路节点记录
Span currentSpan = Span.current();
try {
if (currentSpan.getSpanContext().isValid()) {
String spanName = String.format("%s#%s", invoker.getInterface().getName(), invocation.getMethodName());
Span dubboSpan = tracer.spanBuilder(spanName)
.setSpanKind(SpanKind.INTERNAL) // 设置 Span 类型为 INTERNAL
.setParent(Context.current().with(currentSpan))
.startSpan();
// 将 Span 绑定到当前上下文
Context context = null;
try {
context = Context.current().with(dubboSpan);
try (Scope scope = context.makeCurrent()) {
dubboSpan.setAttribute("interface", invoker.getInterface().getName());
dubboSpan.setAttribute("method", invocation.getMethodName());
dubboSpan.setAttribute("parameters", JSON.toJSONString(invocation.getArguments()));
log.debug("TraceContextFilter (Dubbo): Starting Span for method {}", invocation.getMethodName());
try {
result = invoker.invoke(invocation);
} catch (Throwable e) {
isRunDubboError = true;
throw e;
}
return result;
}
} finally {
// 确保 dubboSpan.end() 总是被执行
dubboSpan.end();
}
}
} finally {
currentSpan.end();
}
} catch (Throwable e) {
if (isRunDubboError) {
log.error("TraceContextFilter error", e);
throw e;
}
if (result != null) {
log.error("TraceContextFilter fail", e);
return result;
}
log.error("TraceContextFilter fail", e);
}
return invoker.invoke(invocation);
}
}
resource修改
resources/META-INF/dubbo 下增加 com.alibaba.dubbo.rpc.Filter文件
文件内容写上filter所在的位置
ini
traceContextFilter=com.haier.hsi.dubbo.TraceContextFilter
编辑x
应用集成
建议把上面的内容打成jar上传到maven,其他应用可以直接集成,以下是例子
xml
<dependency>
<groupId>com.onlylowg.dubbo</groupId>
<artifactId>dubbo-spi</artifactId>
<version>xxxx</version>
<exclusions>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>dubbo</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</exclusion>
<exclusion>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</exclusion>
</exclusions>
</dependency>
logstash接入otel
接入难点
因为otel 接入后,所有的日志都会在最前端加上trace_id=xxx span_id=xxx ,日志原来都是json的,导致logstash采集日志遇到问题
实践
需要在logstash/config 下面对配置文件做如下修改
例子(14~55行为新增内容,对格式进行处理,并且把otel 的trace信息放到json中)
ini
input {
udp {
port => 5016
}
}
filter {
json {
source => "message"
}
grok {
match => ["message","([%{DATA}] --- )?%{GREEDYDATA:msg}"]
}
if [msg] =~ /^trace_id=/ {
grok {
match => { "msg" => "trace_id=(?<trace_id>[^ ]+) span_id=(?<span_id>[^ ]+)" }
}
mutate {
gsub => [
"trace_id", ",.*", "",
"span_id", ",.*", ""
]
}
if [span_id] {
mutate {
add_field => {
"otel_trace_id" => "%{[trace_id]}"
"otel_span_id" => "%{[span_id]}"
}
}
} else {
mutate {
add_field => {
"otel_trace_id" => "%{[trace_id]}"
}
}
}
mutate {
gsub => [
"msg", "trace_id=(?<trace_id>[^ ]+) ", "",
"msg", "span_id=(?<span_id>[^ ]+) ", ""
]
}
mutate {
remove_field => ["trace_id", "span_id"]
}
}
json {
source => "msg"
}
mutate {
remove_field => ["@version","message","msg"]
}
}