bpf监控某个应用里各线程锁的申请得到及释放时间

内核观测工具BPF

总体思路

bpf uprobe

bpf程序除了通过kprobe监控内核函数外,还能通过uprobe监控用户态函数,包括动态库的调用,如线程库libpthread.so里锁的调用。

  • 线程里锁的操作在动态库libpthread.so里。
  • 调用pthread_mutex_lock是申请锁
  • 从pthred_mutext_lock返回是得到锁
  • pthred_mutext_unlock是释放锁

perf buffer

  • perf_buffer 是 libbpf 提供的一个高性能事件传输机制,用于将 eBPF 程序中产生的数据高效、可靠地传递到用户态程序。
    ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
    │ eBPF 内核态 │ │ Perf Buffer │ │ 用户态程序 │
    │ │ ────→ │ │ ────→ │ │
    │ 产生事件数据 │ │ 环形缓冲区 │ │ 接收并处理 │
    └─────────────┘ └─────────────┘ └─────────────┘

  • Perf Event Array Map

    struct {
    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); // map 类型:perf event 数组
    __uint(key_size, sizeof(u32)); // key 类型:CPU 编号
    __uint(value_size, sizeof(u32)); // value 类型:文件描述符
    } events SEC(".maps");

    • type BPF_MAP_TYPE_PERF_EVENT_ARRAY 专门用于perf_event 的 map 类型
    • key_size sizeof(u32) key 是 CPU 编号(0,1,2...)
    • value_size sizeof(u32) value 是 perf_event 的文件描述符
    • max_entries 未指定 默认为系统 CPU 数量
  • bpf输出perf事件

    复制代码
      struct lock_event event = {};
      event.timestamp = get_time_us();
      event.pid = pid;
      event.tid = tid;
      event.mutex = mutex;
      event.action = ACTION_REQUEST;
      bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
      bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
  • 用户端程序申请perf buffer绑定到events

    复制代码
      struct perf_buffer_opts pb_opts = {
          .sample_cb = handle_event,
          .lost_cb = handle_lost_events,
      };
    
      pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 4096, &pb_opts);
  • 用户端程序主循环中请求perf事件
    通过poll获取Buffer中的事件,获取后会自动调用注册的回调处理函数。

    复制代码
      // 事件循环
      while (!exiting) {
          err = perf_buffer__poll(pb, 100);
          if (err < 0 && err != -EINTR) {
              fprintf(stderr, "Error polling perf buffer: %d\n", err);
              break;
          }
      }

BPF内核态程序

复制代码
// mutex_trace.bpf.c
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>

typedef unsigned int u32;
typedef unsigned long long u64;

char LICENSE[] SEC("license") = "GPL";

// 定义锁事件结构体
struct lock_event {
    u64 timestamp;      // 时间戳(微秒)
    u32 pid;            // 进程ID
    u32 tid;            // 线程ID
    u64 mutex;          // mutex地址
    u32 action;         // 操作类型
    int ret;            // 返回值
    char comm[16];      // 进程名
};

// 操作类型
#define ACTION_REQUEST    1
#define ACTION_GOT        2
#define ACTION_RELEASE    3
#define ACTION_TRY_LOCK   4
#define ACTION_TRY_RESULT 5

// 需要监控的进程集合
struct {
    __uint(type, BPF_MAP_TYPE_HASH);
    __uint(max_entries, 10240);
    __type(key, u32);
    __type(value, u8);
} target_pids SEC(".maps");

// Perf event 输出
struct {
    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
    __uint(key_size, sizeof(u32));
    __uint(value_size, sizeof(u32));
} events SEC(".maps");

// 获取第一个参数(x86_64)
static __always_inline u64 get_first_param(struct pt_regs *ctx)
{
    return (u64)ctx->di;
}

// 获取时间戳(微秒)
static __always_inline u64 get_time_us(void)
{
    return bpf_ktime_get_ns() / 1000;
}

// 检查是否为目标进程或线程
static __always_inline int is_target(u32 id)
{
    u8 *exists = bpf_map_lookup_elem(&target_pids, &id);
    return exists ? 1 : 0;
}

// 申请锁
SEC("uprobe")
int trace_mutex_lock(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u64 mutex = get_first_param(ctx);
    
    if (!is_target(pid) && !is_target(tid)) {
        return 0;
    }
    
    struct lock_event event = {};
    event.timestamp = get_time_us();
    event.pid = pid;
    event.tid = tid;
    event.mutex = mutex;
    event.action = ACTION_REQUEST;
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
    return 0;
}

// 得到锁
SEC("uretprobe")
int trace_mutex_lock_ret(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u64 mutex = get_first_param(ctx);
    int ret = (int)ctx->ax;
    
    if (!is_target(pid) && !is_target(tid)) {
        return 0;
    }
    
    struct lock_event event = {};
    event.timestamp = get_time_us();
    event.pid = pid;
    event.tid = tid;
    event.mutex = mutex;
    event.action = ACTION_GOT;
    event.ret = ret;
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
    return 0;
}

// 释放锁
SEC("uprobe")
int trace_mutex_unlock(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u64 mutex = get_first_param(ctx);
    
    if (!is_target(pid) && !is_target(tid)) {
        return 0;
    }
    
    struct lock_event event = {};
    event.timestamp = get_time_us();
    event.pid = pid;
    event.tid = tid;
    event.mutex = mutex;
    event.action = ACTION_RELEASE;
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
    return 0;
}

// trylock
SEC("uprobe")
int trace_mutex_trylock(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u64 mutex = get_first_param(ctx);
    
    if (!is_target(pid) && !is_target(tid)) {
        return 0;
    }
    
    struct lock_event event = {};
    event.timestamp = get_time_us();
    event.pid = pid;
    event.tid = tid;
    event.mutex = mutex;
    event.action = ACTION_TRY_LOCK;
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
    return 0;
}

// trylock 结果
SEC("uretprobe")
int trace_mutex_trylock_ret(struct pt_regs *ctx)
{
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u64 mutex = get_first_param(ctx);
    int ret = (int)ctx->ax;
    
    if (!is_target(pid) && !is_target(tid)) {
        return 0;
    }
    
    struct lock_event event = {};
    event.timestamp = get_time_us();
    event.pid = pid;
    event.tid = tid;
    event.mutex = mutex;
    event.action = ACTION_TRY_RESULT;
    event.ret = ret;
    bpf_get_current_comm(&event.comm, sizeof(event.comm));
    
    bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event));
    return 0;
}

用户态程序

复制代码
// loader.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <dirent.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include "mutex_trace_bpf.skel.h"

typedef unsigned int u32;
typedef unsigned long long u64;
typedef unsigned char u8;

// 操作类型
#define ACTION_REQUEST    1
#define ACTION_GOT        2
#define ACTION_RELEASE    3
#define ACTION_TRY_LOCK   4
#define ACTION_TRY_RESULT 5

// 锁事件结构体
struct lock_event {
    u64 timestamp;
    u32 pid;
    u32 tid;
    u64 mutex;
    u32 action;
    int ret;
    char comm[16];
};

static volatile bool exiting = false;
static int target_pid = -1;
static u64 boot_time_us = 0;

static void sig_handler(int sig)
{
    exiting = true;
}

static int bump_memlock_rlimit(void)
{
    struct rlimit rlim = {
        .rlim_cur = 128 * 1024 * 1024,
        .rlim_max = 128 * 1024 * 1024,
    };
    return setrlimit(RLIMIT_MEMLOCK, &rlim);
}

// 获取系统启动时间(微秒)
static u64 get_boot_time_us(void)
{
    FILE *fp = fopen("/proc/uptime", "r");
    if (!fp) return 0;
    
    double uptime;
    fscanf(fp, "%lf", &uptime);
    fclose(fp);
    
    // 获取当前时间
    struct timeval tv;
    gettimeofday(&tv, NULL);
    
    // 计算启动时间 = 当前时间 - uptime
    u64 now_us = (u64)tv.tv_sec * 1000000 + tv.tv_usec;
    u64 uptime_us = (u64)(uptime * 1000000);
    
    return now_us - uptime_us;
}

// 将单调时间转换为友好时间
static void format_time(u64 timestamp_us, char *buf, size_t buf_size)
{
    // timestamp_us 是从系统启动开始的时间
    // 转换为绝对时间
    u64 abs_time_us = boot_time_us + timestamp_us;
    
    time_t sec = abs_time_us / 1000000;
    long usec = abs_time_us % 1000000;
    
    struct tm *tm = localtime(&sec);
    strftime(buf, buf_size, "%H:%M:%S", tm);
    
    char tmp[64];
    snprintf(tmp, sizeof(tmp), "%s.%06ld", buf, usec);
    strncpy(buf, tmp, buf_size);
}

static const char* get_action_name(u32 action)
{
    switch(action) {
        case ACTION_REQUEST:    return "REQUEST";
        case ACTION_GOT:        return "GOT";
        case ACTION_RELEASE:    return "RELEASE";
        case ACTION_TRY_LOCK:   return "TRY";
        case ACTION_TRY_RESULT: return "TRY_RES";
        default:                return "UNKNOWN";
    }
}

static size_t get_symbol_offset(const char *libpath, const char *symbol)
{
    char cmd[512];
    FILE *fp;
    char line[256];
    size_t offset = 0;
    
    snprintf(cmd, sizeof(cmd), "nm -D %s 2>/dev/null | grep ' %s$' | head -1 | awk '{print $1}'", 
             libpath, symbol);
    
    fp = popen(cmd, "r");
    if (!fp) return 0;
    
    if (fgets(line, sizeof(line), fp)) {
        line[strcspn(line, "\n")] = 0;
        sscanf(line, "%zx", &offset);
    }
    
    pclose(fp);
    return offset;
}

static void add_threads(int pid, struct mutex_trace_bpf *skel)
{
    char path[256];
    DIR *dir;
    struct dirent *entry;
    int map_fd = bpf_map__fd(skel->maps.target_pids);
    
    if (map_fd < 0) return;
    
    snprintf(path, sizeof(path), "/proc/%d/task", pid);
    dir = opendir(path);
    if (!dir) return;
    
    while ((entry = readdir(dir)) != NULL) {
        if (entry->d_name[0] >= '0' && entry->d_name[0] <= '9') {
            int tid = atoi(entry->d_name);
            u8 value = 1;
            bpf_map_update_elem(map_fd, &tid, &value, BPF_ANY);
            printf("  Adding thread: %d\n", tid);
        }
    }
    
    closedir(dir);
}

static void add_child_processes(int parent_pid, struct mutex_trace_bpf *skel)
{
    char path[256];
    int map_fd = bpf_map__fd(skel->maps.target_pids);
    
    if (map_fd < 0) return;
    
    snprintf(path, sizeof(path), "/proc/%d/children", parent_pid);
    FILE *fp = fopen(path, "r");
    if (!fp) return;
    
    int child_pid;
    while (fscanf(fp, "%d", &child_pid) == 1) {
        u8 value = 1;
        bpf_map_update_elem(map_fd, &child_pid, &value, BPF_ANY);
        printf("  Adding child process: %d\n", child_pid);
        add_threads(child_pid, skel);
        add_child_processes(child_pid, skel);
    }
    
    fclose(fp);
}

// 处理 perf event
static void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
{
    struct lock_event *e = (struct lock_event *)data;
    char time_str[64];
    
    format_time(e->timestamp, time_str, sizeof(time_str));
    
    switch(e->action) {
        case ACTION_REQUEST:
            printf("[%s] REQUEST  | PID=%d TID=%d COMM=%-15s MUTEX=%p\n",
                   time_str, e->pid, e->tid, e->comm, (void*)e->mutex);
            break;
        case ACTION_GOT:
            printf("[%s] GOT      | PID=%d TID=%d COMM=%-15s MUTEX=%p RESULT=%s\n",
                   time_str, e->pid, e->tid, e->comm, (void*)e->mutex,
                   e->ret == 0 ? "SUCCESS" : "FAIL");
            break;
        case ACTION_RELEASE:
            printf("[%s] RELEASE  | PID=%d TID=%d COMM=%-15s MUTEX=%p\n",
                   time_str, e->pid, e->tid, e->comm, (void*)e->mutex);
            break;
        case ACTION_TRY_LOCK:
            printf("[%s] TRY      | PID=%d TID=%d COMM=%-15s MUTEX=%p\n",
                   time_str, e->pid, e->tid, e->comm, (void*)e->mutex);
            break;
        case ACTION_TRY_RESULT:
            printf("[%s] TRY_RES  | PID=%d TID=%d COMM=%-15s MUTEX=%p RESULT=%s\n",
                   time_str, e->pid, e->tid, e->comm, (void*)e->mutex,
                   e->ret == 0 ? "SUCCESS" : "FAIL");
            break;
    }
    fflush(stdout);
}

static void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
{
    fprintf(stderr, "Lost %llu events on CPU %d\n", lost_cnt, cpu);
}

int main(int argc, char **argv)
{
    struct mutex_trace_bpf *skel;
    const char *pthread_path = "/lib/x86_64-linux-gnu/libpthread.so.0";
    size_t lock_offset, unlock_offset, trylock_offset;
    struct perf_buffer *pb = NULL;
    int err;
    
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <PID>\n", argv[0]);
        fprintf(stderr, "Example: sudo %s 1234\n", argv[0]);
        return 1;
    }
    
    target_pid = atoi(argv[1]);
    
    signal(SIGINT, sig_handler);
    signal(SIGTERM, sig_handler);
    
    // 获取系统启动时间
    boot_time_us = get_boot_time_us();
    
    if (bump_memlock_rlimit()) {
        fprintf(stderr, "Failed to increase memlock limit\n");
        return 1;
    }
    
    // 检查进程
    char proc_path[256];
    snprintf(proc_path, sizeof(proc_path), "/proc/%d", target_pid);
    if (access(proc_path, F_OK) != 0) {
        fprintf(stderr, "Process %d does not exist\n", target_pid);
        return 1;
    }
    
    // 检查 libpthread
    if (access(pthread_path, F_OK) != 0) {
        pthread_path = "/usr/lib/x86_64-linux-gnu/libpthread.so.0";
        if (access(pthread_path, F_OK) != 0) {
            fprintf(stderr, "libpthread not found\n");
            return 1;
        }
    }
    
    // 获取偏移量
    lock_offset = get_symbol_offset(pthread_path, "pthread_mutex_lock");
    unlock_offset = get_symbol_offset(pthread_path, "pthread_mutex_unlock");
    trylock_offset = get_symbol_offset(pthread_path, "pthread_mutex_trylock");
    
    if (lock_offset == 0 || unlock_offset == 0) {
        fprintf(stderr, "Failed to find symbols in %s\n", pthread_path);
        fprintf(stderr, "Try: nm -D %s | grep pthread_mutex\n", pthread_path);
        return 1;
    }
    
    printf("========================================\n");
    printf("Mutex Trace Tool - Real-time Monitor\n");
    printf("========================================\n");
    printf("Target PID: %d\n", target_pid);
    printf("libpthread: %s\n", pthread_path);
    printf("lock offset: 0x%zx\n", lock_offset);
    printf("unlock offset: 0x%zx\n", unlock_offset);
    if (trylock_offset) {
        printf("trylock offset: 0x%zx\n", trylock_offset);
    }
    printf("========================================\n\n");
    
    // 打开 skeleton
    skel = mutex_trace_bpf__open();
    if (!skel) {
        fprintf(stderr, "Failed to open BPF skeleton\n");
        return 1;
    }
    
    // 设置程序类型
    bpf_program__set_type(skel->progs.trace_mutex_lock, BPF_PROG_TYPE_KPROBE);
    bpf_program__set_type(skel->progs.trace_mutex_lock_ret, BPF_PROG_TYPE_KPROBE);
    bpf_program__set_type(skel->progs.trace_mutex_unlock, BPF_PROG_TYPE_KPROBE);
    if (trylock_offset) {
        bpf_program__set_type(skel->progs.trace_mutex_trylock, BPF_PROG_TYPE_KPROBE);
        bpf_program__set_type(skel->progs.trace_mutex_trylock_ret, BPF_PROG_TYPE_KPROBE);
    }
    
    // 加载 BPF 程序
    err = mutex_trace_bpf__load(skel);
    if (err) {
        fprintf(stderr, "Failed to load BPF skeleton: %d\n", err);
        goto cleanup;
    }
    
    // 添加目标进程到监控列表
    printf("Adding target process: %d\n", target_pid);
    int map_fd = bpf_map__fd(skel->maps.target_pids);
    u8 value = 1;
    if (bpf_map_update_elem(map_fd, &target_pid, &value, BPF_ANY) != 0) {
        fprintf(stderr, "Failed to add target PID\n");
        goto cleanup;
    }
    
    // 添加线程和子进程
    printf("Scanning threads...\n");
    add_threads(target_pid, skel);
    
    printf("Scanning child processes...\n");
    add_child_processes(target_pid, skel);
    
    // 挂载 probes
    skel->links.trace_mutex_lock = bpf_program__attach_uprobe(
        skel->progs.trace_mutex_lock, false, -1, pthread_path, lock_offset);
    if (!skel->links.trace_mutex_lock) {
        fprintf(stderr, "Failed to attach lock uprobe\n");
        goto cleanup;
    }
    
    skel->links.trace_mutex_lock_ret = bpf_program__attach_uprobe(
        skel->progs.trace_mutex_lock_ret, true, -1, pthread_path, lock_offset);
    if (!skel->links.trace_mutex_lock_ret) {
        fprintf(stderr, "Failed to attach lock ret uprobe\n");
        goto cleanup;
    }
    
    skel->links.trace_mutex_unlock = bpf_program__attach_uprobe(
        skel->progs.trace_mutex_unlock, false, -1, pthread_path, unlock_offset);
    if (!skel->links.trace_mutex_unlock) {
        fprintf(stderr, "Failed to attach unlock uprobe\n");
        goto cleanup;
    }
    
    if (trylock_offset) {
        skel->links.trace_mutex_trylock = bpf_program__attach_uprobe(
            skel->progs.trace_mutex_trylock, false, -1, pthread_path, trylock_offset);
        skel->links.trace_mutex_trylock_ret = bpf_program__attach_uprobe(
            skel->progs.trace_mutex_trylock_ret, true, -1, pthread_path, trylock_offset);
    }
    
    // 创建 perf buffer
    struct perf_buffer_opts pb_opts = {
        .sample_cb = handle_event,
        .lost_cb = handle_lost_events,
    };

    pb = perf_buffer__new(bpf_map__fd(skel->maps.events), 4096, &pb_opts);
    if (!pb) {
        fprintf(stderr, "Failed to create perf buffer\n");
        goto cleanup;
    }
    
    printf("\n✅ Monitoring PID %d and its children/threads...\n", target_pid);
    printf("Press Ctrl-C to stop...\n\n");
    
    // 事件循环
    while (!exiting) {
        err = perf_buffer__poll(pb, 100);
        if (err < 0 && err != -EINTR) {
            fprintf(stderr, "Error polling perf buffer: %d\n", err);
            break;
        }
    }
    
cleanup:
    if (pb) perf_buffer__free(pb);
    mutex_trace_bpf__destroy(skel);
    printf("\n👋 Exiting...\n");
    return 0;
}

效果

测试程序代码

写一个程序程序,作为被测进程,用来被跟踪其锁的操作。

复制代码
// test_lock.c - 测试锁监控
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

void* thread_func(void* arg) {
    int id = *(int*)arg;
    
    sleep(20);  // 持有锁100ms
    for (int i = 0; i < 5; i++) {
        printf("Thread %d: trying to acquire lock\n", id);
        pthread_mutex_lock(&mutex);
        printf("Thread %d: acquired lock\n", id);
        sleep(5);  // 持有锁100ms
        pthread_mutex_unlock(&mutex);
        printf("Thread %d: released lock\n", id);
        usleep(50000);
    }
    return NULL;
}

int main() {
    pthread_t t1, t2,t3,t4;
    int id1 = 1, id2 = 2, id3=3,id4=4;
    
    printf("Starting lock test...\n");
    
    pthread_create(&t1, NULL, thread_func, &id1);
    pthread_create(&t2, NULL, thread_func, &id2);
    pthread_create(&t3, NULL, thread_func, &id3);
    pthread_create(&t4, NULL, thread_func, &id4);
    
    pthread_join(t1, NULL);
    pthread_join(t2, NULL);
    pthread_join(t3, NULL);
    pthread_join(t4, NULL);
    
    printf("Test complete\n");
    return 0;
}
相关推荐
IMPYLH2 小时前
Linux 的 expand 命令
linux·运维·服务器
小白学鸿蒙2 小时前
服务器可视化部署静态网站或者搭建博客论坛-小白版
运维·服务器
何中应2 小时前
Grafana展示服务器数据
运维·服务器·grafana
问道飞鱼2 小时前
【服务器知识】nginx安全架构巡检
服务器·nginx·安全架构
竹之却2 小时前
【Linux】Linux 中 .service 文件核心介绍
linux·运维·服务器·systemd·.service 文件
不知名。。。。。。。。3 小时前
网络层———IP
服务器·网络·tcp/ip
拾贰_C3 小时前
【Ubuntu】安装Nginx(nVidia驱动未安装成功阻止版)
linux·运维·服务器·ubuntu
克莱因3588 小时前
Linux CentOS7 进程基础知识
linux·运维·服务器
Skilce8 小时前
ZrLog 高可用部署
运维·服务器·数据库·阿里云·maven