文章目录
- [1. 前言](#1. 前言)
- [2. 内存 domain](#2. 内存 domain)
-
- [2.1 domain 硬件支持](#2.1 domain 硬件支持)
- [2.2 Linux 的 domain 管理](#2.2 Linux 的 domain 管理)
-
- [2.2.1 定义](#2.2.1 定义)
- [2.2.2 初始化](#2.2.2 初始化)
- [2.3 Domain fault](#2.3 Domain fault)
- [3. 参考资料](#3. 参考资料)
1. 前言
限于作者能力水平,本文可能存在谬误,因此而给读者带来的损失,作者不做任何承诺。
2. 内存 domain
本文以 ARMv7 架构 + Linux 4.14.x 为例,说明 Linux 内存管理的 domain。
2.1 domain 硬件支持
ARMv7 支持最多 16 个内存 domain:

在 32-bit 的 DACR 寄存器定义了 16 个内存 domain,内存 domain 可以是以下 4 种类型之一:
- 0b00:No access
任何对标记为No access类型内存 domain 的访问,会生成Domain fault。 - 0b01:Client
对 Client 类型内存 domain 的访问,会做权限检查,不满足权限的访问会触发Permission fault。 - 0b10: Reserved
保留类型。 - 0b11: Manager
对 Manager 类型内存 domain 的访问,不做权限检查。
2.2 Linux 的 domain 管理
2.2.1 定义
首先看 Linux 下对内存 domain 编号定义:
c
/* arch/arm/include/asm/domain.h */
/*
* Domain numbers
*
* DOMAIN_IO - domain 2 includes all IO only
* DOMAIN_USER - domain 1 includes all user memory only
* DOMAIN_KERNEL - domain 0 includes all kernel memory only
*
* The domain numbering depends on whether we support 36 physical
* address for I/O or not. Addresses above the 32 bit boundary can
* only be mapped using supersections and supersections can only
* be set for domain 0. We could just default to DOMAIN_IO as zero,
* but there may be systems with supersection support and no 36-bit
* addressing. In such cases, we want to map system memory with
* supersections to reduce TLB misses and footprint.
*
* 36-bit addressing and supersections are only available on
* CPUs based on ARMv6+ or the Intel XSC3 core.
*/
#ifndef CONFIG_IO_36
#define DOMAIN_KERNEL 0
#define DOMAIN_USER 1
#define DOMAIN_IO 2
#else
#define DOMAIN_KERNEL 2
#define DOMAIN_USER 1
#define DOMAIN_IO 0
#endif
#define DOMAIN_VECTORS 3
从上可见,Linux 使用了 DOMAIN_KERNEL,DOMAIN_USER,DOMAIN_IO,DOMAIN_VECTORS 这 4 个内存 domain。
再来看下 Linux 对 domain 类型的定义:
c
/* arch/arm/include/asm/domain.h */
/*
* Domain types
*/
#define DOMAIN_NOACCESS 0
#define DOMAIN_CLIENT 1
#ifdef CONFIG_CPU_USE_DOMAINS
#define DOMAIN_MANAGER 3
#else
#define DOMAIN_MANAGER 1
#endif
2.2.2 初始化
c
/* arch/arm/kernel/head.S */
__enable_mmu:
...
#ifdef CONFIG_ARM_LPAE
...
#else
/*
* 设置 内存域(domain) 访问控制
* DDI0406C_d_armv7ar_arm.pdf, P554
* B4.1.43 DACR, Domain Access Control Register, VMSA
*/
mov r5, #DACR_INIT
mcr p15, 0, r5, c3, c0, 0 @ load domain access register (DACR = DACR_INIT)
#endif
DACR_INIT 的定义如下:
c
/* arch/arm/include/asm/domain.h */
#define domain_mask(dom) ((3) << (2 * (dom)))
#define domain_val(dom,type) ((type) << (2 * (dom)))
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
#define DACR_INIT \
(domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
#else
#define DACR_INIT \
(domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \
domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \
domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT))
#endif
所以,__enable_mmu 启用 MMU 时,初始化 DOMAIN_KERNEL,DOMAIN_USER,DOMAIN_IO,DOMAIN_VECTORS 这 4 个内存 domain 的类型到 DACR 寄存器。在 CONFIG_CPU_SW_DOMAIN_PAN 开启或关闭的两种情形下,这 4 个内存 domain 的类型各有不同,具体见上面 DACR_INIT 的定义。
这里只是初始设定 Linux DOMAIN_KERNEL,DOMAIN_USER,DOMAIN_IO,DOMAIN_VECTORS 这 4 个内存 domain 的类型,那么,怎么决定哪块内存属于哪个 domain?答案是页表(这里讨论的是 Short-descriptor 格式页表项):


mem_types[] 定义了各种类型内存的 domain 类型,而后创建页表时,按 mem_types[] 设置各类型内存的页表项的 domain 域:
c
/* arch/arm/mm/mmu.c */
static struct mem_type mem_types[] __ro_after_init = {
[MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
L_PTE_SHARED,
.prot_pte_s2 = s2_policy(PROT_PTE_S2_DEVICE) |
s2_policy(L_PTE_S2_MT_DEV_SHARED) |
L_PTE_SHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
.domain = DOMAIN_IO,
},
[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_DEVICE_CACHED] = { /* ioremap_cached */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
.domain = DOMAIN_IO,
},
[MT_DEVICE_WC] = { /* ioremap_wc */
.prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PROT_SECT_DEVICE,
.domain = DOMAIN_IO,
},
[MT_UNCACHED] = {
.prot_pte = PROT_PTE_DEVICE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_IO,
},
[MT_CACHECLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_KERNEL,
},
#ifndef CONFIG_ARM_LPAE
[MT_MINICLEAN] = {
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
.domain = DOMAIN_KERNEL,
},
#endif
[MT_LOW_VECTORS] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_RDONLY,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_VECTORS,
},
[MT_HIGH_VECTORS] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_USER | L_PTE_RDONLY,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_VECTORS,
},
[MT_MEMORY_RWX] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_RW] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_XN,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_RWX_NONCACHED] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_MT_BUFFERABLE,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_RW_DTCM] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_XN,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_RWX_ITCM] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_RW_SO] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_MT_UNCACHED | L_PTE_XN,
.prot_l1 = PMD_TYPE_TABLE,
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
PMD_SECT_UNCACHED | PMD_SECT_XN,
.domain = DOMAIN_KERNEL,
},
[MT_MEMORY_DMA_READY] = {
.prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
L_PTE_XN,
.prot_l1 = PMD_TYPE_TABLE,
.domain = DOMAIN_KERNEL,
},
};
build_mem_type_table() 初始化各类型内存的 domain number(DOMAIN_KERNEL, ...):
c
start_kernel()
setup_arch()
early_mm_init()
build_mem_type_table()
static void __init build_mem_type_table(void)
{
...
/* 将 domain number(DOMAIN_KERNEL, ...) 设置到 页表项 初始值, 后续创建页表时配置到页表项 */
for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
struct mem_type *t = &mem_types[i];
if (t->prot_l1)
t->prot_l1 |= PMD_DOMAIN(t->domain);
if (t->prot_sect)
t->prot_sect |= PMD_DOMAIN(t->domain);
}
}
创建页表时将预设的 domain number(DOMAIN_KERNEL, ...) 填充到页目录表项,如 BOOT 阶段创建 lowmem 页表的 create_mapping() 调用:
c
create_mapping()
__create_mapping(&init_mm, md, early_alloc, false);
static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md,
void *(*alloc)(unsigned long sz),
bool ng)
{
...
const struct mem_type *type;
...
/* @md->type 类型内存的 domain number,已由 build_mem_type_table() 时设置好 */
type = &mem_types[md->type];
do {
...
alloc_init_pud(pgd, addr, next, phys, type, alloc, ng);
...
} while (pgd++, addr != end);
}
alloc_init_pud()
alloc_init_pmd()
static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
unsigned long end, phys_addr_t phys,
const struct mem_type *type,
void *(*alloc)(unsigned long sz), bool ng)
{
pmd_t *pmd = pmd_offset(pud, addr);
unsigned long next;
do {
next = pmd_addr_end(addr, end);
if (type->prot_sect &&
((addr | next | phys) & ~SECTION_MASK) == 0) {
/* 映射 section */
__map_init_section(pmd, addr, next, phys, type, ng);
} else {
/* 映射 page */
alloc_init_pte(pmd, addr, next,
__phys_to_pfn(phys), type, alloc, ng);
}
} while (pmd++, addr = next, addr != end);
}
static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
unsigned long end, phys_addr_t phys,
const struct mem_type *type, bool ng)
{
pmd_t *p = pmd;
#ifndef CONFIG_ARM_LPAE
/*
* In classic MMU format, puds and pmds are folded in to
* the pgds. pmd_offset gives the PGD entry. PGDs refer to a
* group of L1 entries making up one logical pointer to
* an L2 table (2MB), where as PMDs refer to the individual
* L1 entries (1MB). Hence increment to get the correct
* offset for odd 1MB sections.
* (See arch/arm/include/asm/pgtable-2level.h)
*/
if (addr & SECTION_SIZE)
pmd++;
#endif
do {
/*
* type->prot_sect 含有 build_mem_type_table() 设置的 domain number,
* 这样就设定映射 section 所属的 domain number,而该 domain number 的
* 类型,从 __enable_mmu() 设置到 DACR 寄存器的对应 bit 去查找,即
* domain number 的 type = domain_mask(domain number)。
*/
*pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0));
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);
flush_pmd_entry(p);
}
static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
const struct mem_type *type,
void *(*alloc)(unsigned long sz),
bool ng)
{
pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc);
do {
/*
* type->prot_pte 含有 build_mem_type_table() 设置的 domain number,
* 这样就设定映射 section 所属的 domain number,而该 domain number 的
* 类型,从 __enable_mmu() 设置到 DACR 寄存器的对应 bit 去查找,即
* domain number 的 type = domain_mask(domain number)。
*/
set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
ng ? PTE_EXT_NG : 0);
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
}
上面的代码分析的是 BOOT 阶段内存映射时 domain number 设置过程,那内核空间其它动态内存映射,以及用户空间内存映射,又是如何配置内存的 domain number 呢?
先看下内核空间其它动态内存映射 domain number 的设置:
c
/* arch/arm/include/asm/pgalloc.h */
/* [内核空间] PMD 表项, domain 为 DOMAIN_KERNEL */
#define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL))
/*
* Populate the pmdp entry with a pointer to the pte. This pmd is part
* of the mm address space.
*
* Ensure that we always set both PMD entries.
*/
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
{
/*
* The pmd must be loaded with the physical address of the PTE table
*/
__pmd_populate(pmdp, __pa(ptep), _PAGE_KERNEL_TABLE);
}
可以看到,内核空间动态内存映射,将内存的 domain number 设置为 DOMAIN_KERNEL。
再来看看用户空间内存映射 domain number 的设置:
c
/* arch/arm/include/asm/pgalloc.h */
/* [用户空间] PMD 表项, domain 为 DOMAIN_USER */
#define _PAGE_USER_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
c
/* arch/arm/mm/mmu.c */
pmdval_t user_pmd_table = _PAGE_USER_TABLE;
c
/* arch/arm/include/asm/pgalloc.h */
static inline void
pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t ptep)
{
extern pmdval_t user_pmd_table;
pmdval_t prot;
if (__LINUX_ARM_ARCH__ >= 6 && !IS_ENABLED(CONFIG_ARM_LPAE))
prot = user_pmd_table;
else
prot = _PAGE_USER_TABLE;
__pmd_populate(pmdp, page_to_phys(ptep), prot);
}
可见,用户空间内存映射,将内存的 domain number 设置为 DOMAIN_USER。
2.3 Domain fault
看一个 domain 访问触发的 Domain fault 例子:
c
Unhandled fault: page domain fault (0x01b) at 0x00000000
pgd = 9ea5c000
[00000000] *pgd=7eedc831, *pte=00000000, *ppte=00000000
Internal error: : 1b [#1] SMP ARM
Modules linked in:
CPU: 1 PID: 982 Comm: ping Not tainted 4.14.111 #21
Hardware name: ARM-Versatile Express
task: 9ee44800 task.stack: 9ea36000
PC is at ___bpf_prog_run+0x1440/0x19f8
LR is at irq_work_queue+0x14/0x78
pc : [<801eec58>] lr : [<801eae7c>] psr: 800e0013
sp : 9ea37978 ip : 00000000 fp : 9ea379dc
r10: 801eb234 r9 : 8080c668 r8 : 9ea37a00
r7 : 00000000 r6 : 9ef94c00 r5 : 801fa818 r4 : a12be094
r3 : 00000001 r2 : 9ea37a08 r1 : 00000000 r0 : 00000000
Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none
Control: 10c5387d Table: 7ea5c06a DAC: 00000051
Process ping (pid: 982, stack limit = 0x9ea36210)
Stack: (0x9ea37978 to 0x9ea38000)
7960: 0000000c 00000000
7980: 9ef94c00 00000000 9ef94c00 00000011 600e0013 00000000 00000000 00000000
79a0: 808ce7e4 00000000 9ea379f4 9ea379b8 80169f5c a12be000 9ef94c00 00000011
79c0: 8092230c 9f411000 00000062 00000062 9ea37a64 9ea379e0 801ef648 801ed824
79e0: 00000062 00000062 9ea37a0c 9ea379f8 8016a258 80169e7c 80926de4 00000001
7a00: 9f7b7098 00000000 00000000 00000000 9ea379fc 00000000 00000001 00000000
7a20: 9ea37a4c 9f60c5c0 9ef94c00 00000011 9ef94c00 00000000 8016af94 8016b5e0
7a40: 9ea37a6c 9ea37a5c 9ea37a9c 9ea37a68 9ea37a00 00000000 9ea37a9c 9ea37a68
7a60: 806c0c64 801ef618 00000000 00000000 00000000 00000000 00000000 00000000
7a80: 9ef94c00 9ee1dc00 9f411000 9efe6202 9ea37ad4 9ea37aa0 806c0d98 806c0bf4
7aa0: c3db3c00 1889692d 9ef949c0 806c0d10 9ef94c00 9ee1df80 9ef949c0 9f411054
7ac0: 9f411000 80921294 9ea37b14 9ea37ad8 8061dd40 806c0d1c 80b04e7c 9f411000
7ae0: 01080020 00000000 401d7c69 9ef949c0 80a86204 00000000 9f411000 9f5fec00
7b00: 9f411000 00000000 9ea37b6c 9ea37b18 806237ec 8061dadc 9ef949c0 80a86204
7b20: 9ea37b88 80b02d00 80b04e7c 80b03c6c 9ea36000 80b65244 80b65230 9f5fec00
7b40: 80710b34 9ef949c0 80a86204 00000001 00000000 9f5fec00 9f411000 8064c65c
7b60: 9ea37bcc 9ea37b70 806240f0 8062374c 80632de4 9ef94900 9f6cdec8 9f411000
7b80: 9ea37bac 00000200 fffffff4 ffffe000 80634030 9ef94914 00000008 8064c688
7ba0: 00000800 9ef949c0 9ef94900 9f411000 00000000 00000000 9ef9495c 8064c65c
7bc0: 9ea37bdc 9ea37bd0 80624194 80623aa8 9ea37c14 9ea37be0 80633fcc 80624184
7be0: 00000000 00000054 9ea37c14 9f411000 9ef949c0 9ef94900 9f5d9980 80b54740
7c00: 0000ffff 00000000 9ea37c44 9ea37c18 8065e668 80633ec8 00000008 0100007f
7c20: 9ef949c0 9eeeec00 9ef949c0 9f411000 80b54740 0000ffff 9ea37c7c 9ea37c48
7c40: 80660524 8065e464 9ea37d14 00000000 9ea37c7c 9ef949c0 80b54740 9eeeec00
7c60: 9f411000 9ea37d14 00000000 00000000 9ea37c9c 9ea37c80 8066089c 80660348
7c80: 9ef949c0 80b54740 9eeeec00 00000000 9ea37cbc 9ea37ca0 8065e1e4 806607f8
7ca0: 00000040 80b54740 9eeeec00 00000000 9ea37cd4 9ea37cc0 8066116c 8065e184
7cc0: 00000040 9ea37f48 9ea37ce4 9ea37cd8 80661220 80661154 9ea37dec 9ea37ce8
7ce0: 8068a2e0 806611f8 00000040 00000000 9ea37d2c 9ea37d14 00000000 80b03cac
7d00: 9ea37d7c 9ea37d10 801562cc 80151a94 80a868c0 00000000 9fbc38c0 9ea37f48
7d20: 80700008 ffffe000 00000002 9ea37d38 80140000 0100007f 00000000 00000000
7d40: ffff0000 00000200 00000001 00000001 00000000 00010000 00000000 9ea37e48
7d60: 00000000 00000000 00000000 802767f4 0100007f 0100007f 00000008 9ea37e24
7d80: 00000001 00000001 9ea36000 00000128 9ea37dd4 9ea37da0 8042a2d4 8027676c
7da0: 9ea37e48 9ea37dac 00000010 9ea37e48 00000010 9ea37f50 00000000 9ea37e24
7dc0: 00000051 9eeeec00 00000000 00000000 9f2d6480 801080a4 9ea37e28 00000000
7de0: 9ea37e0c 9ea37df0 8069842c 80689e5c 00000001 0001814c 9ea37f48 00000000
7e00: 9ea37e1c 9ea37e10 8060397c 806983e0 9ea37f34 9ea37e20 8060444c 80603964
7e20: 9efae120 00000000 9ea37ea4 9ea37e38 8024ec74 80217d8c 9f2d6320 fffff000
7e40: 9efae120 00000054 00019314 00000040 76f08000 9ea5ddb8 9ea5ddb8 00000000
7e60: 00000000 00000000 00000000 00000000 9ffdad5c 00000000 9ea37ea4 9ea37fb0
7e80: 9ee44800 80000017 00000002 0100007f 00000000 00000000 9ea37efc 9ea37ea8
7ea0: 80711608 80161aa4 00000000 80609330 00001180 00000001 00000000 00000000
7ec0: 00000000 00000000 00000100 00000054 9f2d6480 80b0885c 00000017 807113ac
7ee0: 76f08314 9ea37fb0 00000000 802891f4 7edad114 9f2fb428 9ef57600 80295de4
7f00: 9ea37f1c 9ea37f10 80295de4 00018164 00000000 9f2d6480 00000128 801080a4
7f20: 9ea36000 00000128 9ea37f94 9ea37f38 806052e4 8060426c 00000000 00000000
7f40: 00000000 fffffff7 9ea37e88 00000010 00000001 00000000 00000000 9ea37e50
7f60: 00000000 0000004e 0001814c 00000000 00000000 00000000 7edac150 00019304
7f80: 00000000 00000040 9ea37fa4 9ea37f98 80605328 806052a0 00000000 9ea37fa8
7fa0: 80107ec0 8060531c 00019304 00000000 00000003 00018164 00000000 0001813c
7fc0: 00019304 00000000 00000040 00000128 0002b350 00018230 000192f0 10624dd3
7fe0: 00000000 7edac14c 0000af4b 76f36ad8 400e0030 00000003 00000000 00000000
[<801eec58>] (___bpf_prog_run) from [<801ef648>] (__bpf_prog_run32+0x3c/0x44)
[<801ef648>] (__bpf_prog_run32) from [<806c0c64>] (run_filter+0x7c/0x128)
[<806c0c64>] (run_filter) from [<806c0d98>] (packet_rcv+0x88/0x38c)
[<806c0d98>] (packet_rcv) from [<8061dd40>] (dev_queue_xmit_nit+0x270/0x298)
[<8061dd40>] (dev_queue_xmit_nit) from [<806237ec>] (dev_hard_start_xmit+0xac/0x258)
[<806237ec>] (dev_hard_start_xmit) from [<806240f0>] (__dev_queue_xmit+0x654/0x6dc)
[<806240f0>] (__dev_queue_xmit) from [<80624194>] (dev_queue_xmit+0x1c/0x20)
[<80624194>] (dev_queue_xmit) from [<80633fcc>] (neigh_resolve_output+0x110/0x198)
[<80633fcc>] (neigh_resolve_output) from [<8065e668>] (ip_finish_output2+0x210/0x44c)
[<8065e668>] (ip_finish_output2) from [<80660524>] (ip_finish_output+0x1e8/0x260)
[<80660524>] (ip_finish_output) from [<8066089c>] (ip_output+0xb0/0xbc)
[<8066089c>] (ip_output) from [<8065e1e4>] (ip_local_out+0x6c/0x78)
[<8065e1e4>] (ip_local_out) from [<8066116c>] (ip_send_skb+0x24/0xa4)
[<8066116c>] (ip_send_skb) from [<80661220>] (ip_push_pending_frames+0x34/0x40)
[<80661220>] (ip_push_pending_frames) from [<8068a2e0>] (raw_sendmsg+0x490/0x83c)
[<8068a2e0>] (raw_sendmsg) from [<8069842c>] (inet_sendmsg+0x58/0xf4)
[<8069842c>] (inet_sendmsg) from [<8060397c>] (sock_sendmsg+0x24/0x34)
[<8060397c>] (sock_sendmsg) from [<8060444c>] (___sys_sendmsg+0x1ec/0x214)
[<8060444c>] (___sys_sendmsg) from [<806052e4>] (__sys_sendmsg+0x50/0x7c)
[<806052e4>] (__sys_sendmsg) from [<80605328>] (SyS_sendmsg+0x18/0x1c)
[<80605328>] (SyS_sendmsg) from [<80107ec0>] (ret_fast_syscall+0x0/0x28)
Code: e2844008 e798c182 e3a01000 e0882183 (e79c0000)
---[ end trace b3bef05bd8168206 ]---
Kernel panic - not syncing: Fatal exception in interrupt
CPU2: stopping
CPU: 2 PID: 0 Comm: swapper/2 Tainted: G D 4.14.111 #21
Hardware name: ARM-Versatile Express
[<80110660>] (unwind_backtrace) from [<8010c3e0>] (show_stack+0x20/0x24)
[<8010c3e0>] (show_stack) from [<806f4e18>] (dump_stack+0xac/0xd8)
[<806f4e18>] (dump_stack) from [<8010ea44>] (handle_IPI+0x2d0/0x34c)
[<8010ea44>] (handle_IPI) from [<80101570>] (gic_handle_irq+0x9c/0xa0)
[<80101570>] (gic_handle_irq) from [<80710b30>] (__irq_svc+0x70/0x98)
Exception stack(0x9f509f28 to 0x9f509f70)
9f20: 00000001 00000000 00000000 80b0402c 9f508000 00000000
9f40: 00000000 80b03cb8 80b03c6c 80a85c08 9f509f98 9f509f84 9f509f78 9f509f78
9f60: 801088c8 801088cc 60070013 ffffffff
[<80710b30>] (__irq_svc) from [<801088cc>] (arch_cpu_idle+0x34/0x4c)
[<801088cc>] (arch_cpu_idle) from [<807102b4>] (default_idle_call+0x34/0x48)
[<807102b4>] (default_idle_call) from [<8015e9b4>] (do_idle+0x16c/0x218)
[<8015e9b4>] (do_idle) from [<8015ed0c>] (cpu_startup_entry+0x28/0x2c)
[<8015ed0c>] (cpu_startup_entry) from [<8010e504>] (secondary_start_kernel+0x168/0x170)
[<8010e504>] (secondary_start_kernel) from [<60101a0c>] (0x60101a0c)
CPU3: stopping
CPU: 3 PID: 0 Comm: swapper/3 Tainted: G D 4.14.111 #21
Hardware name: ARM-Versatile Express
[<80110660>] (unwind_backtrace) from [<8010c3e0>] (show_stack+0x20/0x24)
[<8010c3e0>] (show_stack) from [<806f4e18>] (dump_stack+0xac/0xd8)
[<806f4e18>] (dump_stack) from [<8010ea44>] (handle_IPI+0x2d0/0x34c)
[<8010ea44>] (handle_IPI) from [<80101570>] (gic_handle_irq+0x9c/0xa0)
[<80101570>] (gic_handle_irq) from [<80710b30>] (__irq_svc+0x70/0x98)
Exception stack(0x9f50bf28 to 0x9f50bf70)
bf20: 00000001 00000000 00000000 80b0402c 9f50a000 00000000
bf40: 00000000 80b03cb8 80b03c6c 80a85c08 9f50bf98 9f50bf84 9f50bf78 9f50bf78
bf60: 801088c8 801088cc 600e0013 ffffffff
[<80710b30>] (__irq_svc) from [<801088cc>] (arch_cpu_idle+0x34/0x4c)
[<801088cc>] (arch_cpu_idle) from [<807102b4>] (default_idle_call+0x34/0x48)
[<807102b4>] (default_idle_call) from [<8015e9b4>] (do_idle+0x16c/0x218)
[<8015e9b4>] (do_idle) from [<8015ed0c>] (cpu_startup_entry+0x28/0x2c)
[<8015ed0c>] (cpu_startup_entry) from [<8010e504>] (secondary_start_kernel+0x168/0x170)
[<8010e504>] (secondary_start_kernel) from [<60101a0c>] (0x60101a0c)
CPU0: stopping
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G D 4.14.111 #21
Hardware name: ARM-Versatile Express
[<80110660>] (unwind_backtrace) from [<8010c3e0>] (show_stack+0x20/0x24)
[<8010c3e0>] (show_stack) from [<806f4e18>] (dump_stack+0xac/0xd8)
[<806f4e18>] (dump_stack) from [<8010ea44>] (handle_IPI+0x2d0/0x34c)
[<8010ea44>] (handle_IPI) from [<80101570>] (gic_handle_irq+0x9c/0xa0)
[<80101570>] (gic_handle_irq) from [<80710b30>] (__irq_svc+0x70/0x98)
Exception stack(0x80b01ee0 to 0x80b01f28)
1ee0: 00000001 00000000 00000000 80b0402c 80b00000 00000000 00000000 80b03cb8
1f00: 80b03c6c 80a85c08 80b01f50 80b01f3c 80b01f30 80b01f30 801088c8 801088cc
1f20: 60070013 ffffffff
[<80710b30>] (__irq_svc) from [<801088cc>] (arch_cpu_idle+0x34/0x4c)
[<801088cc>] (arch_cpu_idle) from [<807102b4>] (default_idle_call+0x34/0x48)
[<807102b4>] (default_idle_call) from [<8015e9b4>] (do_idle+0x16c/0x218)
[<8015e9b4>] (do_idle) from [<8015ed0c>] (cpu_startup_entry+0x28/0x2c)
[<8015ed0c>] (cpu_startup_entry) from [<80709ce0>] (rest_init+0xbc/0xc0)
[<80709ce0>] (rest_init) from [<80a00dec>] (start_kernel+0x3a8/0x3b4)
---[ end Kernel panic - not syncing: Fatal exception in interrupt
这里是一个 eBPF 测试程序导致的 Domain fault 异常。简略分析下日志:
c
Unhandled fault: page domain fault (0x01b) at 0x00000000
日志显示,当前 DFSR = 0x01b:

所以得知 domain number 为 0x1 (即 DOMAIN_USER) ,原因 fault 类型是 0xb (0b01011),即下图中的 Second level 页表访问引发的 Domain fault:

page domain fault 信息来自 arch/arm/mm/fsr-2level.c 的 fsr_info[] 表(上下文使用 2 级分页):
c
static struct fsr_info fsr_info[] = {
...
{ do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" },
...
};
Unhandled fault: page domain fault (0x01b) at 0x00000000 打印自函数 data abort 异常 C 入口:
c
/* arch/arm/mm/fault.c */
static int
do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
return 1;
}
asmlinkage void __exception
do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
struct siginfo info;
if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) /* Domain fault 进入 do_bad() */
return;
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
show_pte(current->mm, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
arm_notify_die("", regs, &info, fsr, 0);
}
另外,从页表项的打印日志
c
pgd = 9ea5c000
[00000000] *pgd=7eedc831, *pte=00000000, *ppte=00000000
的 *pgd=7eedc831,即第一级页表项的值 0x7eedc831,也可以推导出是对 DOMAIN_USER 的访问出错:0x7eedc831 = 0b0111111011101101110010 0 0001 1 0 0 01,得知 domain number = 0b0001,即 domain = DOMAIN_USER。
3. 参考资料
1\] DDI0406C_d_armv7ar_arm.pdf