以下是针对 RK3588 + Buildroot + Linux 7.0 环境的内核调试进阶课题,每个课题均包含完整代码实例、调试技巧和常见问题解决方案。
一、Kprobe 动态探针实战
1.1 基础 Kprobe(函数入口探测)
用于在 不重新编译内核 的情况下,追踪任意内核函数的参数和调用时机。
c
// kprobe_demo.c - 可加载内核模块
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/spi/spi.h>
/* 目标:追踪 spi_sync() 函数,RK3588 上 SPI NOR/NAND 常用 */
static struct kprobe kp_spi_sync = {
.symbol_name = "spi_sync",
};
/* 进入函数前执行 */
static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
{
/* ARM64 调用约定:x0-x7 为参数
* spi_sync(struct spi_device *spi) -> x0 = spi
*/
struct spi_device *spi = (struct spi_device *)regs->regs[0];
if (spi) {
pr_info("[KPROBE] spi_sync() called by '%s', device='%s', speed=%u\n",
current->comm,
dev_name(&spi->dev),
spi->max_speed_hz);
/* 打印调用栈,定位调用源 */
dump_stack();
}
return 0;
}
/* 函数返回后执行(通过 kretprobe 实现) */
static struct kretprobe kr_spi_sync = {
.kp.symbol_name = "spi_sync",
.handler = NULL, /* 可添加 ret_handler */
};
static int __init kprobe_init(void)
{
int ret;
ret = register_kprobe(&kp_spi_sync);
if (ret < 0) {
pr_err("register_kprobe failed: %d\n", ret);
return ret;
}
pr_info("Kprobe registered at %p\n", kp_spi_sync.addr);
return 0;
}
static void __exit kprobe_exit(void)
{
unregister_kprobe(&kp_spi_sync);
pr_info("Kprobe unregistered\n");
}
module_init(kprobe_init);
module_exit(kprobe_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("RK3588 SPI Sync Kprobe Demo");
编译与使用:
bash
# Makefile
obj-m += kprobe_demo.o
KDIR ?= /lib/modules/$(shell uname -r)/build
all:
make -C $(KDIR) M=$(PWD) modules
# 加载后查看
insmod kprobe_demo.ko
dmesg -w | grep KPROBE
1.2 Kretprobe(追踪函数返回值)
用于分析函数执行时间和错误码分布:
c
#include <linux/kprobes.h>
#include <linux/timekeeping.h>
static struct kretprobe kr_pci_probe;
static DEFINE_SPINLOCK(kr_lock);
struct probe_data {
u64 start_ns;
struct pci_dev *pdev;
};
static int __kprobes entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
struct probe_data *data = (struct probe_data *)ri->data;
data->start_ns = ktime_get_ns();
data->pdev = (struct pci_dev *)regs->regs[0]; // x0 = pdev
return 0;
}
static int __kprobes ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
{
struct probe_data *data = (struct probe_data *)ri->data;
u64 delta = ktime_get_ns() - data->start_ns;
long retval = regs_return_value(regs); // 获取返回值
pr_info("[KRETPROBE] pci_probe() dev=%s ret=%ld time=%llu us\n",
data->pdev ? pci_name(data->pdev) : "null",
retval, delta / 1000);
return 0;
}
static int __init kretprobe_init(void)
{
kr_pci_probe.kp.symbol_name = "pci_device_probe";
kr_pci_probe.handler = ret_handler;
kr_pci_probe.entry_handler = entry_handler;
kr_pci_probe.data_size = sizeof(struct probe_data);
kr_pci_probe.maxactive = 20; // 最大并发探测数
return register_kretprobe(&kr_pci_probe);
}
二、Ftrace 高级追踪
2.1 函数图追踪(分析调用延迟)
bash
# RK3588 上追踪 NPU 驱动加载过程
echo function_graph > /sys/kernel/debug/tracing/current_tracer
echo accel_rocket_probe > /sys/kernel/debug/tracing/set_graph_function
echo 1 > /sys/kernel/debug/tracing/tracing_on
# 触发 NPU 驱动加载(如 rmmod/insmod)
modprobe rocket
# 查看结果
cat /sys/kernel/debug/tracing/trace
# 典型输出:
# 0) 0.123 us | accel_rocket_probe();
# 0) | rockchip_iommu_attach_device() {
# 0) 1.456 us | dev_iommu_priv_set();
# 0) 3.789 us | }
2.2 自定义 Tracepoint(驱动内嵌)
c
// 在驱动代码中添加 tracepoint
#include <trace/events/power.h> // 复用现有 tracepoint
static int rk3588_npu_submit(struct rocket_job *job)
{
/* 记录提交时间 */
trace_clock_set_rate("npu_submit", job->cmd_count, raw_smp_processor_id());
/* 实际提交 */
writel(job->head, job->core->reg_base + REG_JOB_HEAD);
writel(job->tail, job->core->reg_base + REG_JOB_TAIL);
/* 记录硬件响应时间 */
trace_cpu_frequency(RK3588_NPU_FREQ, raw_smp_processor_id());
return 0;
}
2.3 使用 trace-cmd 离线分析(主机侧)
bash
# RK3588 目标板
trace-cmd record -e irq_handler_entry -e irq_handler_exit -e sched_switch \
-F /usr/bin/teflon-run --model test.onnx
# 拷贝到主机分析
scp trace.dat ubuntu-host:/tmp/
# 主机侧(Ubuntu 22.04)
sudo apt install kernelshark
trace-cmd report trace.dat | head -100
kernelshark trace.dat # GUI 可视化
三、KGDB 双机源码级调试
3.1 配置内核(已在前文基础上补充)
config
CONFIG_KGDB=y
CONFIG_KGDB_SERIAL_CONSOLE=y
CONFIG_KGDB_KDB=y
CONFIG_KDB_DEFAULT_ENABLE=0
CONFIG_DEBUG_RODATA_TEST=n # 关闭只读数据测试,避免 KGDB 断点冲突
CONFIG_STRICT_KERNEL_RWX=n # 开发阶段关闭,允许修改代码段
3.2 U-Boot 启动参数(双串口方案)
Orange Pi 5 Plus 有 UART2(调试) 和 UART4(扩展),建议 UART2 给 console,UART4 给 KGDB:
bash
setenv bootargs '... console=ttyS2,1500000n8 kgdboc=ttyS4,1500000n8 nokaslr'
若只有 UART2,使用代理模式:
bash
# 内核启动后,在 console 切换
echo ttyS2,1500000n8 > /sys/module/kgdboc/parameters/kgdboc
echo g > /proc/sysrq-trigger # 进入 KGDB 等待
3.3 GDB 调试会话实例
bash
# 主机侧(Ubuntu 22.04)
aarch64-linux-gnu-gdb ${BUILDROOT}/output/build/linux-custom/vmlinux
(gdb) set serial baud 1500000
(gdb) target remote /dev/ttyUSB1 # KGDB 专用串口
# 断点:NPU 驱动 probe
(gdb) break accel_rocket_probe
(gdb) continue
# 当驱动加载时命中断点
(gdb) list
(gdb) print *pdev
(gdb) print /x readl(0xfdab0000) # 直接读取 NPU 寄存器
(gdb) x/16wx 0xfdab0000 # 查看寄存器 dump
# 单步跟踪
(gdb) next
(gdb) step
(gdb) finish # 执行完当前函数
# 条件断点:仅在特定设备触发
(gdb) break rk3588_pcie_probe if pdev->id == 2
# 硬件断点(最多 4 个,ARM64 限制)
(gdb) hbreak 0xffffff8008123000
3.4 KDB 快速排查(无需 GDB)
bash
# 在 console 触发
echo g > /proc/sysrq-trigger
# KDB 命令
kdb> ps -k # 查看内核线程
kdb> dmesg | tail -50 # 查看日志
kdb> md 0xfdab0000 0x40 # 内存 dump(NPU 基地址)
kdb> rd -S 0xfeb30000 16 # 串口寄存器
kdb> bp accel_rocket_irq # 设置断点
kdb> go # 继续运行
四、KASAN + KCSAN 内存/并发错误检测
4.1 KASAN 检测内存越界
c
// buggy_driver.c - 故意包含错误的示例
static int __init buggy_init(void)
{
char *buf;
int i;
buf = kmalloc(16, GFP_KERNEL); // 分配 16 字节
/* 错误 1:越界写 */
for (i = 0; i < 32; i++) {
buf[i] = i; // KASAN 会捕获此处
}
/* 错误 2:Use-after-free */
kfree(buf);
printk("%d\n", buf[0]); // KASAN 捕获 UAF
/* 错误 3:栈溢出 */
char big[8192]; // 内核栈通常 16KB,危险!
memset(big, 0, sizeof(big));
return 0;
}
KASAN 报告解读:
[ 12.345] ==================================================================
[ 12.346] BUG: KASAN: slab-out-of-bounds in buggy_init+0x45/0x100 [buggy]
[ 12.347] Write of size 1 at addr ffff00000d234810 by task insmod/1234
[ 12.348]
[ 12.349] CPU: 0 PID: 1234 Comm: insmod Tainted: G B O
[ 12.350] Hardware name: Orange Pi 5 Plus (DT)
[ 12.351] Call trace:
[ 12.352] dump_backtrace+0x0/0x1c0
[ 12.353] show_stack+0x20/0x30
[ 12.354] kasan_report+0x140/0x1a0
[ 12.355] buggy_init+0x45/0x100 [buggy]
[ 12.356]
[ 12.357] Allocated by task 1234:
[ 12.358] kasan_save_stack+0x30/0x60
[ 12.359] __kasan_kmalloc+0x8c/0x90
[ 12.360] buggy_init+0x30/0x100 [buggy]
[ 12.361]
[ 12.362] The buggy address belongs to the object at ffff00000d234800
[ 12.363] which belongs to the cache kmalloc-16 of size 16
[ 12.364] ==================================================================
4.2 KCSAN 检测数据竞争
c
// race_demo.c - 演示数据竞争
static int shared_counter;
static DEFINE_SPINLOCK(race_lock);
static void correct_increment(void)
{
/* 正确:使用锁保护 */
spin_lock(&race_lock);
shared_counter++;
spin_unlock(&race_lock);
}
static void buggy_increment(void)
{
/* 错误:无保护,KCSAN 会报告 */
shared_counter++;
}
KCSAN 配置:
config
CONFIG_KCSAN=y
CONFIG_KCSAN_VERBOSE=y
五、eBPF/BCC 高级追踪
5.1 使用 bpftrace 追踪 RK3588 DDR 带宽
bash
# ddr_bandwidth.bt - 追踪 DDR 控制器访问
#!/usr/bin/env bpftrace
kprobe:rockchip_ddr_set_rate
{
printf("DDR freq change: %lu MHz -> %lu MHz (CPU: %d)\n",
args->old_rate / 1000000,
args->new_rate / 1000000,
cpu);
}
kprobe:devfreq_monitor
{
@ddr_requests[comm] = count();
}
interval:s:5
{
printf("\n=== DDR 请求统计 (5s) ===\n");
print(@ddr_requests);
clear(@ddr_requests);
}
5.2 自定义 eBPF 程序追踪 NPU 任务延迟
c
// npu_latency.bpf.c
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1024);
__type(key, u32); // PID
__type(value, u64); // start time
} start_ns SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HISTOGRAM);
__uint(max_entries, 64);
__type(key, u64); // latency bucket
__type(value, u64); // count
} latency_hist SEC(".maps");
SEC("kprobe/accel_rocket_job_submit")
int BPF_KPROBE(trace_submit, struct rocket_job *job)
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
u64 ts = bpf_ktime_get_ns();
bpf_map_update_elem(&start_ns, &pid, &ts, BPF_ANY);
return 0;
}
SEC("kprobe/accel_rocket_job_complete")
int BPF_KPROBE(trace_complete, struct rocket_job *job)
{
u32 pid = bpf_get_current_pid_tgid() >> 32;
u64 *start = bpf_map_lookup_elem(&start_ns, &pid);
if (start) {
u64 delta = bpf_ktime_get_ns() - *start;
bpf_map_delete_elem(&start_ns, &pid);
// 记录到直方图(单位:微秒)
bpf_map_update_elem(&latency_hist, &delta, &delta, BPF_ANY);
}
return 0;
}
char LICENSE[] SEC("license") = "GPL";
编译与加载:
bash
# 主机侧交叉编译
clang -target bpf -D__TARGET_ARCH_arm64 -I/usr/include/aarch64-linux-gnu \
-c npu_latency.bpf.c -o npu_latency.bpf.o
# 拷贝到 RK3588 加载
scp npu_latency.bpf.o root@rk3588:/tmp/
ssh root@rk3588 "bpftool prog load /tmp/npu_latency.bpf.o /sys/fs/bpf/npu_latency autoattach"
# 查看直方图
bpftool map dump name latency_hist
六、Lockdep 死锁检测
6.1 典型死锁模式与检测
c
// deadlock_demo.c
static DEFINE_MUTEX(lock_a);
static DEFINE_MUTEX(lock_b);
/* 线程 1:先 A 后 B */
static int thread1_fn(void *data)
{
mutex_lock(&lock_a);
msleep(100); // 增加竞争概率
mutex_lock(&lock_b); // 可能死锁!
/* critical section */
mutex_unlock(&lock_b);
mutex_unlock(&lock_a);
return 0;
}
/* 线程 2:先 B 后 A(错误顺序) */
static int thread2_fn(void *data)
{
mutex_lock(&lock_b); // 错误顺序
msleep(100);
mutex_lock(&lock_a); // 死锁发生
mutex_unlock(&lock_a);
mutex_unlock(&lock_b);
return 0;
}
Lockdep 报告:
[ 45.678] ======================================================
[ 45.679] WARNING: possible circular locking dependency detected
[ 45.680] ------------------------------------------------------
[ 45.681] thread_test/100 is trying to acquire lock:
[ 45.682] ffff00000d234800 (&lock_b){+.+.}, at: thread1_fn+0x40/0x80 [deadlock]
[ 45.683]
[ 45.684] but task is already holding lock:
[ 45.685] ffff00000d2347c0 (&lock_a){+.+.}, at: thread1_fn+0x20/0x80 [deadlock]
[ 45.686]
[ 45.687] which lock already depends on the new lock.
[ 45.688]
[ 45.689] the existing dependency chain (in reverse order) is:
[ 45.690] -> #1 (&lock_b){+.+.}:
[ 45.691] thread2_fn+0x20/0x80 [deadlock]
[ 45.692] -> #0 (&lock_a){+.+.}:
[ 45.693] thread1_fn+0x20/0x80 [deadlock]
[ 45.694] ======================================================
修复:统一加锁顺序
c
static void safe_dual_lock(void)
{
/* 始终先 A 后 B */
mutex_lock(&lock_a);
mutex_lock(&lock_b);
/* ... */
mutex_unlock(&lock_b);
mutex_unlock(&lock_a);
}
七、Kdump + Crash Utility 崩溃分析
7.1 配置 Kdump
config
CONFIG_CRASH_DUMP=y
CONFIG_PROC_VMCORE=y
CONFIG_RELOCATABLE=y
CONFIG_PHYSICAL_START=0x8000000 # 128MB,为 crashkernel 预留
U-Boot 启动参数:
bash
setenv bootargs '... crashkernel=256M@512M'
# 预留 256MB 内存从 512MB 处开始,用于捕获崩溃
7.2 手动触发崩溃测试
bash
echo c > /proc/sysrq-trigger # 触发 panic,进入 kdump
7.3 主机侧 Crash 分析
bash
# 安装 crash
sudo apt install crash
# 分析 vmcore
crash ${BUILDROOT}/output/build/linux-custom/vmlinux /var/crash/vmcore
crash> bt -a # 所有 CPU 调用栈
crash> ps -k # 内核线程状态
crash> kmem -i # 内存使用统计
crash> mount # 文件系统挂载信息
crash> net # 网络设备状态
crash> dev -p # PCI 设备
crash> runq # 运行队列
crash> timer # 定时器列表
八、IOMMU 与 DMA 调试
8.1 RK3588 IOMMU 状态检查
bash
#!/bin/sh
# iommu_debug.sh
echo "=== IOMMU 域 ==="
ls /sys/kernel/iommu_groups/ 2>/dev/null | while read grp; do
echo "Group $grp:"
ls /sys/kernel/iommu_groups/$grp/devices/
done
echo ""
echo "=== Rockchip IOMMU 状态 ==="
cat /sys/class/iommu/iommu*/version 2>/dev/null
cat /sys/kernel/debug/iommu/rockchip_iommu/status 2>/dev/null
echo ""
echo "=== NPU IOMMU 映射 ==="
cat /sys/kernel/debug/iommu/iommu_domain.*/pgtable 2>/dev/null | head -20
8.2 DMA 映射错误注入(调试驱动)
c
#include <linux/dma-debug.h>
static int test_dma_mapping(struct device *dev)
{
void *cpu_addr;
dma_addr_t dma_addr;
size_t size = PAGE_SIZE;
cpu_addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL);
if (!cpu_addr)
return -ENOMEM;
/* 错误:访问未同步的流式映射区域 */
dma_addr = dma_map_single(dev, cpu_addr, size, DMA_TO_DEVICE);
/* 错误:CPU 在映射期间写入 */
memset(cpu_addr, 0xAA, size); // DMA-debug 会捕获此错误!
dma_unmap_single(dev, dma_addr, size, DMA_TO_DEVICE);
dma_free_coherent(dev, size, cpu_addr, dma_addr);
return 0;
}
九、Perf + PMU 硬件性能剖析
9.1 RK3588 ARMv8 PMU 事件
bash
# 列出可用硬件事件
perf list | grep armv8
# 典型事件:
# armv8_pmuv3/inst_retired/ - 指令 retired
# armv8_pmuv3/l1d_cache_refill/ - L1 D-Cache miss
# armv8_pmuv3/l2d_cache_refill/ - L2 Cache miss
# armv8_pmuv3/bus_access/ - 总线访问
# armv8_pmuv3/bus_cycles/ - 总线周期
9.2 NPU 推理性能分析
bash
# 追踪 NPU 推理期间的缓存行为
perf stat -e cycles,instructions,armv8_pmuv3/l1d_cache_refill/,armv8_pmuv3/l2d_cache_refill/ \
-C 4-7 -a sleep 10 # 监控 NPU 使用的 CPU 核心
# 生成火焰图
perf record -F 99 -g -C 4-7 -- sleep 30
perf script | stackcollapse-perf.pl | flamegraph.pl > npu_perf.svg
十、内核热补丁(Livepatch)
10.1 修复函数示例
c
// livepatch_fix.c
#include <linux/module.h>
#include <linux/livepatch.h>
/* 原始有问题的函数(假设在 vmlinux 中) */
extern int buggy_pcie_link_up(struct dw_pcie *pci);
/* 修复后的函数 */
static int fixed_pcie_link_up(struct dw_pcie *pci)
{
u32 val;
/* 增加额外的延迟等待链路稳定 */
msleep(50);
val = dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG1);
if ((val & PCIE_PORT_DEBUG1_LINK_UP) == 0)
return 0;
/* 额外检查 LTSSM 状态 */
val = dw_pcie_readl_dbi(pci, PCIE_PORT_DEBUG0);
if ((val & 0x1f) != 0x11) // L0 状态
return 0;
return 1;
}
static struct klp_func funcs[] = {
{
.old_name = "dw_pcie_link_up", // 要替换的函数名
.new_func = fixed_pcie_link_up,
}, { }
};
static struct klp_object objs[] = {
{
.funcs = funcs,
}, { }
};
static struct klp_patch patch = {
.mod = THIS_MODULE,
.objs = objs,
};
static int __init livepatch_init(void)
{
return klp_enable_patch(&patch);
}
static void __exit livepatch_exit(void)
{
klp_disable_patch(&patch);
}
module_init(livepatch_init);
module_exit(livepatch_exit);
MODULE_LICENSE("GPL");
MODULE_INFO(livepatch, "Y");
十一、综合调试检查清单
| 调试场景 | 首选工具 | 备用方案 |
|---|---|---|
| 驱动 probe 失败 | dynamic_debug + dmesg |
Kprobe 追踪 |
| 随机崩溃/Oops | KASAN + KCSAN | Kdump + Crash |
| 性能瓶颈 | Perf + PMU | eBPF 直方图 |
| 死锁/竞态 | Lockdep | KCSAN |
| 内存泄漏 | kmemleak | KASAN |
| 源码级调试 | KGDB | KDB |
| 硬件寄存器确认 | devmem2 |
/dev/mem + mmap |
| 实时追踪 | ftrace | trace-cmd + KernelShark |
| 网络启动失败 | U-Boot dhcp + ping |
Wireshark 抓包 |
如需针对 特定驱动子系统(PCIe、DRM、V4L2、SPI、I2C) 或 特定硬件模块(AX210、NPU、MIPI-CSI) 的更深入调试实例,可以进一步展开。