一个简单的分布式追踪系统

1. 准备工作

导入必要的库

python 复制代码
import contextvars
import time
from typing import Any, Optional, Dict, List, Union
from dataclasses import dataclass, field

2. 定义上下文变量

python 复制代码
# 定义两个上下文变量,存储当前 Span 和 Trace
_current_span: contextvars.ContextVar[Optional["Span"]] = contextvars.ContextVar(
    "current_span", default=None
)

_current_trace: contextvars.ContextVar[Optional["Trace"]] = contextvars.ContextVar(
    "current_trace", default=None
)

3. 数据模型定义

3.1 SpanContext 类

python 复制代码
@dataclass
class SpanContext:
    """Span 的上下文信息(用于跨进程传递)"""
    trace_id: str
    span_id: str
    is_remote: bool = False

3.2 Span 类

python 复制代码
@dataclass
class Span:
    """表示一个操作的时间段追踪"""
    name: str
    context: SpanContext
    parent: Optional["Span"] = None
    start_time: float = field(default_factory=time.time)
    end_time: Optional[float] = None
    attributes: Dict[str, Any] = field(default_factory=dict)
    events: List[Dict[str, Any]] = field(default_factory=list)
    status: str = "UNSET"

    def end(self, status: str = "OK") -> None:
        """结束 Span 并记录状态"""
        self.end_time = time.time()
        self.status = status

    def add_event(self, name: str, attributes: Optional[Dict[str, Any]] = None) -> None:
        """添加事件到 Span"""
        self.events.append({
            "name": name,
            "timestamp": time.time(),
            "attributes": attributes or {}
        })

    def __enter__(self) -> "Span":
        """支持 with 语句"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        """自动结束 Span"""
        self.end("ERROR" if exc_type else "OK")

3.3 Trace 类

python 复制代码
@dataclass
class Trace:
    """完整的追踪链"""
    root_span: Span
    spans: List[Span] = field(default_factory=list)

    def add_span(self, span: Span) -> None:
        """添加 Span 到 Trace"""
        self.spans.append(span)

4. 追踪 API 实现

4.1 辅助函数

python 复制代码
def generate_id() -> str:
    """生成追踪ID(简化版)"""
    return f"id-{int(time.time() * 1000)}"

def get_current_span() -> Optional[Span]:
    """获取当前 Span"""
    return _current_span.get()

def get_current_trace() -> Optional[Trace]:
    """获取当前 Trace"""
    return _current_trace.get()

4.2 核心函数

python 复制代码
def start_span(name: str, attributes: Optional[Dict[str, Any]] = None) -> Span:
    """
    创建并激活一个新 Span
    :param name: Span 名称
    :param attributes: 附加属性
    :return: 新创建的 Span
    """
    parent = get_current_span()
    context = SpanContext(
        trace_id=parent.context.trace_id if parent else generate_id(),
        span_id=generate_id()
    )

    span = Span(name=name, context=context, parent=parent)
    if attributes:
        span.attributes.update(attributes)

    # 设置当前 Span
    _current_span.set(span)

    # 如果是根 Span,则创建 Trace
    if parent is None:
        trace = Trace(root_span=span)
        _current_trace.set(trace)
    else:
        trace = get_current_trace()
    
    if trace:
        trace.add_span(span)

    return span

def end_span(status: str = "OK") -> None:
    """结束当前 Span 并返回父 Span"""
    current = get_current_span()
    if current:
        current.end(status)
        _current_span.set(current.parent)

5. 数据导出器

python 复制代码
class ConsoleExporter:
    """将追踪数据打印到控制台"""
    @staticmethod
    def export(trace: Trace) -> None:
        print("\n=== Exporting Trace ===")
        print(f"Trace ID: {trace.root_span.context.trace_id}")
        for span in trace.spans:
            duration = (span.end_time or time.time()) - span.start_time
            print(f"Span: {span.name} ({duration:.3f}s), Status: {span.status}")

6. 使用示例

6.1 同步代码示例

python 复制代码
# 示例 1: 同步代码
with start_span("main_operation", {"type": "sync"}):
    # 当前 Span 是 "main_operation"
    with start_span("child_operation"):
        # 当前 Span 是 "child_operation"
        get_current_span().add_event("processing_start")
        time.sleep(0.1)
        get_current_span().add_event("processing_end")
    
    # 手动创建 Span
    span = start_span("manual_span")
    time.sleep(0.05)
    span.end()

# 导出追踪数据
if trace := get_current_trace():
    ConsoleExporter.export(trace)

=== Exporting Trace ===

Trace ID: id-1751643441896

Span: main_operation (0.152s), Status: OK

Span: child_operation (0.101s), Status: OK

Span: manual_span (0.050s), Status: OK

6.2 异步代码示例(可选)

python 复制代码
import asyncio

async def async_task():
    with start_span("async_operation"):
        print(f"Current span: {get_current_span().name}")
        await asyncio.sleep(0.1)

async def main():
    tasks = [async_task() for _ in range(3)]
    await asyncio.gather(*tasks)

# 运行异步示例
asyncio.run(main())

Current span: async_operation

Current span: async_operation

Current span: async_operation

7. 可视化追踪数据(可选)

python 复制代码
import matplotlib.pyplot as plt

def visualize_trace(trace: Trace):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    for i, span in enumerate(trace.spans):
        duration = (span.end_time or time.time()) - span.start_time
        ax.barh(span.name, duration, left=span.start_time, alpha=0.6)
        ax.text(span.start_time, i, f"{duration:.3f}s", va='center')
    
    ax.set_xlabel('Time')
    ax.set_title('Trace Visualization')
    plt.show()

if trace := get_current_trace():
    visualize_trace(trace)

代码:https://github.com/zhouruiliangxian/Awesome-demo/blob/main/Distributed-Tracing/简易分布式追踪系统.ipynb

相关推荐
小江的记录本2 小时前
【Kafka核心】架构模型:Producer、Broker、Consumer、Consumer Group、Topic、Partition、Replica
java·数据库·分布式·后端·搜索引擎·架构·kafka
身如柳絮随风扬10 小时前
多数据源切换实战:从业务场景到3种实现方案全解析
java·分布式·微服务
AIMath~11 小时前
雪花算法+ZooKeeper解决方案+RPC是什么
分布式·zookeeper·云原生
KmSH8umpK12 小时前
Redis分布式锁从原生手写到Redisson高阶落地,附线上死锁复盘优化方案进阶第六篇
数据库·redis·分布式
空中海13 小时前
Kafka :存储、复制与可靠性
分布式·kafka·linq
渣渣盟13 小时前
构建企业级实时数据管道:Kafka + Flink 最佳实践
分布式·flink·kafka
KmSH8umpK14 小时前
Redis分布式锁从原生手写到Redisson高阶落地,附线上死锁复盘优化方案进阶第四篇
数据库·redis·分布式
KmSH8umpK15 小时前
Redis分布式锁从原生手写到Redisson高阶落地,附线上死锁复盘优化方案进阶第五篇
数据库·redis·分布式
卧室小白16 小时前
ceph-分布式存储
分布式
aXin_ya16 小时前
微服务第九天 分布式缓存(Redis)
分布式·缓存·微服务