1、简介
以 rk3568 pcie 代码为例,简要介绍一下 pcie 初始化设备枚举的过程。比较重要的函数,就是 pci_scan_child_bus_extend
、pci_scan_bridge_extend
,这两个函数是递归的核心。简要函数调用流程如下:
2、pci_scan_child_bus_extend
- 该函数入参 bus 为 pci_bus 结构,表示一个总线资源。该函数首次被调用时,传递下来的是 root bus 结构
- 该函数作用为扫描并创建当前 bus (入参)上的所有 pci 设备(可能是 endpoint 设备,也可能是桥设备)
- 当前 bus 扫描结束,调用
pci_scan_bridge_extend
尝试遍历下一级 bus - 返回值为新的 subordinate number(根据不断递归获取到当前 bus 下的最最深一级的 bus 号)
c
/**
* pci_scan_child_bus_extend() - Scan devices below a bus
* @bus: Bus to scan for devices
* @available_buses: Total number of buses available (%0 does not try to
* extend beyond the minimal)
*
* Scans devices below @bus including subordinate buses. Returns new
* subordinate number including all the found devices. Passing
* @available_buses causes the remaining bus space to be distributed
* equally between hotplug-capable bridges to allow future extension of the
* hierarchy.
*/
static unsigned int pci_scan_child_bus_extend(struct pci_bus *bus,
unsigned int available_buses)
{
......
/* Go find them, Rover! */
/* 这里的含义,是遍历当前 bus 上的所有设备。针对遍历到的设备,创建 pci_dev 结构,挂载 pci_bus 结构的链表上 */
for (devfn = 0; devfn < 256; devfn += 8) {
nr_devs = pci_scan_slot(bus, devfn);
/*
* The Jailhouse hypervisor may pass individual functions of a
* multi-function device to a guest without passing function 0.
* Look for them as well.
*/
if (jailhouse_paravirt() && nr_devs == 0) {
for (fn = 1; fn < 8; fn++) {
/*
* 这个函数会去创建、初始化 pci 设备(包括 endpoint 设备与桥设备)
* 包括但不限于 BAR 空间的初始化、中断资源的初始化、设备 capability 使能等
*/
dev = pci_scan_single_device(bus, devfn + fn);
if (dev)
dev->multifunction = 1;
}
}
}
/* Reserve buses for SR-IOV capability */
/* 还记得前面 SR-IOV 章节的保留 bus 号么?没有阅读的,可以先去阅读下 */
used_buses = pci_iov_bus_range(bus);
max += used_buses;
......
/*
* Scan bridges that are already configured. We don't touch them
* unless they are misconfigured (which will be done in the second
* scan below).
*/
/* for 循环这里是遍历当前 bus 上的所有桥设备(不包括 endpoint 设备!)*/
for_each_pci_bridge(dev, bus) {
cmax = max;
/* 这里是为了处理 BIOS/Boot 中已经被配置好的 pci 桥, 这个是为了兼容各个架构所做的妥协 */
max = pci_scan_bridge_extend(bus, dev, max, 0, 0);
/*
* Reserve one bus for each bridge now to avoid extending
* hotplug bridges too much during the second scan below.
*/
used_buses++;
if (cmax - max > 1)
used_buses += cmax - max - 1;
}
/* Scan bridges that need to be reconfigured */
/* for 循环这里是遍历当前 bus 上的所有桥设备(不包括 endpoint 设备!) */
for_each_pci_bridge(dev, bus) {
unsigned int buses = 0;
if (!hotplug_bridges && normal_bridges == 1) {
/*
* There is only one bridge on the bus (upstream
* port) so it gets all available buses which it
* can then distribute to the possible hotplug
* bridges below.
*/
buses = available_buses;
} else if (dev->is_hotplug_bridge) {
/*
* Distribute the extra buses between hotplug
* bridges if any.
*/
buses = available_buses / hotplug_bridges;
buses = min(buses, available_buses - used_buses + 1);
}
cmax = max;
/* 这里才是真的是递归遍历下一级 bus,通过 buses 参数传递 bus 号 */
max = pci_scan_bridge_extend(bus, dev, cmax, buses, 1);
/* One bus is already accounted so don't add it again */
if (max - cmax > 1)
used_buses += max - cmax - 1;
}
......
}
3、pci_scan_single_device
pci_scan_single_device
函数,是初始化 pcie 设备的重中之重。本篇其余部分都是 pcie 设备枚举的过程,只有这个函数是配置函数。
c
struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
dev = pci_get_slot(bus, devfn);
if (dev) {
pci_dev_put(dev);
return dev;
}
/*
* 为当前设备创建 pci_dev 结构
* 同时初始化当前设备的 BAR 资源、中断资源
*/
dev = pci_scan_device(bus, devfn);
if (!dev)
return NULL;
/* 初始化当前设备的 capabilities 功能 */
pci_device_add(dev, bus);
return dev;
}
bash
pci_device_add()
+-> pci_init_capabilities()
+-> pci_ea_init
+-> pci_configure_ari
+-> pci_iov_init
+-> ......
4、pci_scan_bridge_extend
- 该函数入参 bus 为 pci_bus 结构,表示一个总线资源。该函数首次被调用时,传递下来的是 root bus 结构
- 该函数作用为扫描并创建当前 bus (入参)的下级 bus
- 在
pci_add_new_bus
函数中会去创建新的 bus 结构 - dev 结构,是 pci 设备的 pci_dev 结构。注意,这里的 pci 设备只会是 pci 桥设备,不会是 endpoint 设备
- 返回值为新的 subordinate number(根据不断递归获取到当前 bus 下的最最深一级的 bus 号)
c
/*
* pci_scan_bridge_extend() - Scan buses behind a bridge
* @bus: Parent bus the bridge is on
* @dev: Bridge itself
* @max: Starting subordinate number of buses behind this bridge
* @available_buses: Total number of buses available for this bridge and
* the devices below. After the minimal bus space has
* been allocated the remaining buses will be
* distributed equally between hotplug-capable bridges.
* @pass: Either %0 (scan already configured bridges) or %1 (scan bridges
* that need to be reconfigured.
*
* If it's a bridge, configure it and scan the bus behind it.
* For CardBus bridges, we don't scan behind as the devices will
* be handled by the bridge driver itself.
*
* We need to process bridges in two passes -- first we scan those
* already configured by the BIOS and after we are done with all of
* them, we proceed to assigning numbers to the remaining buses in
* order to avoid overlaps between old and new bus numbers.
*
* Return: New subordinate number covering all buses behind this bridge.
*/
static int pci_scan_bridge_extend(struct pci_bus *bus, struct pci_dev *dev,
int max, unsigned int available_buses,
int pass)
{
......
pci_read_config_dword(dev, PCI_PRIMARY_BUS, &buses);
primary = buses & 0xFF;
secondary = (buses >> 8) & 0xFF;
subordinate = (buses >> 16) & 0xFF;
......
/* 这里的 if 分支,含义是在 BIOS/Boot 没有配置的情况下,当前桥设备 pci_dev 的配置空间读出非 0 */
if ((secondary || subordinate) && !pcibios_assign_all_busses() &&
!is_cardbus && !broken) {
unsigned int cmax;
/*
* Bus already configured by firmware, process it in the
* first pass and just note the configuration.
*/
if (pass)
goto out;
/*
* The bus might already exist for two reasons: Either we
* are rescanning the bus or the bus is reachable through
* more than one bridge. The second case can happen with
* the i450NX chipset.
*/
/* 这里会去为下一级 bus 创建 pci_bus 结构,下一级 bus 的 bus 号为 secondary */
child = pci_find_bus(pci_domain_nr(bus), secondary);
if (!child) {
child = pci_add_new_bus(bus, dev, secondary);
if (!child)
goto out;
child->primary = primary;
pci_bus_insert_busn_res(child, secondary, subordinate);
child->bridge_ctl = bctl;
}
/* 递归入口,这里的 child 已经下一级的 bus 了 */
cmax = pci_scan_child_bus(child);
if (cmax > subordinate)
pci_warn(dev, "bridge has subordinate %02x but max busn %02x\n",
subordinate, cmax);
/* Subordinate should equal child->busn_res.end */
if (subordinate > max)
max = subordinate;
} else {
/* 这里的 else 分支,含义是在 BIOS/Boot 已经配置的情况下,或者当前桥设备 pci_dev 的配置空间读出为 0 */
/*
* We need to assign a number to this bus which we always
* do in the second pass.
*/
if (!pass) {
if (pcibios_assign_all_busses() || broken || is_cardbus)
/*
* Temporarily disable forwarding of the
* configuration cycles on all bridges in
* this bus segment to avoid possible
* conflicts in the second pass between two
* bridges programmed with overlapping bus
* ranges.
*/
/*
* 这里是为了解决 bus 号冲突问题。因为 BIOS 已经配置好桥设备的 bus 资源,
* 但因为现在操作系统又在重新配置,可能会和 BIOS 原先的配置有冲突,
* 所以这里对 bus 资源先进行了一个复位操作,全写 0
*/
pci_write_config_dword(dev, PCI_PRIMARY_BUS,
buses & ~0xffffff);
goto out;
}
/* Clear errors */
pci_write_config_word(dev, PCI_STATUS, 0xffff);
/* Read bus numbers from EA Capability (if present) */
fixed_buses = pci_ea_fixed_busnrs(dev, &fixed_sec, &fixed_sub);
if (fixed_buses)
next_busnr = fixed_sec;
else
next_busnr = max + 1; /* 更新下一级 bus 的 bus 号 */
/*
* Prevent assigning a bus number that already exists.
* This can happen when a bridge is hot-plugged, so in this
* case we only re-scan this bus.
*/
/* 这里会去为下一级 bus 创建 pci_bus 结构,下一级 bus 的 bus 号为 next_busnr*/
child = pci_find_bus(pci_domain_nr(bus), next_busnr);
if (!child) {
child = pci_add_new_bus(bus, dev, next_busnr);
if (!child)
goto out;
pci_bus_insert_busn_res(child, next_busnr,
bus->busn_res.end);
}
max++;
if (available_buses)
available_buses--;
/* 这里会去更新当前桥设备的 pri、sec、sub 寄存器(这里的 sub 还默认是 0xff) */
buses = (buses & 0xff000000)
| ((unsigned int)(child->primary) << 0)
| ((unsigned int)(child->busn_res.start) << 8)
| ((unsigned int)(child->busn_res.end) << 16);
......
/* We need to blast all three values with a single write */
pci_write_config_dword(dev, PCI_PRIMARY_BUS, buses);
if (!is_cardbus) {
child->bridge_ctl = bctl;
/* 递归入口,这里的 child 已经下一级的 bus 了 */
max = pci_scan_child_bus_extend(child, available_buses);
} else {
......
}
/*
* Set subordinate bus number to its real value.
* If fixed subordinate bus number exists from EA
* capability then use it.
*/
if (fixed_buses)
max = fixed_sub;
pci_bus_update_busn_res_end(child, max);
/* 递归结束,会根据递归得到的 max 值去修改 sub 寄存器的值 */
pci_write_config_byte(dev, PCI_SUBORDINATE_BUS, max);
}
......
}
5、举例

以上面这张图举例,Linux 下的 PCIe 设备枚举顺序为:
- 调用
pci_scan_child_bus_extend
创建 bus 2 上的设备,创建出 root bus 设备(2,0,0) - 调用
pci_scan_bridge_extend
创建下一级 bus 3 - 再次调用
pci_scan_child_bus_extend
,创建 bus 3 上的设备,创建出 upstream port(3,0,0)[这里开始第一次递归] - 再调用
pci_scan_bridge_extend
创建下一级 bus 4 - 再次调用
pci_scan_child_bus_extend
,创建 bus 4 上的设备,创建出 downstream port(4,1,0)、(4,2,0)...(4,18,0) [这里开始第二次递归]
要注意,这里会一次性创建出当前 bus 上所有扫描到的设备
- 再调用
pci_scan_bridge_extend
创建下一级 bus 5 - 再次调用
pci_scan_child_bus_extend
,创建 bus 5 上的设备,创建出 endpoint 设备(5,0,0)[这里开始第一次递归返回,返回的位置就是 for_each_pci_bridge 这个循环]
如果 (5,0,0)是一个 switch 的话,当尝试创建 bus 6 时,实际上就等同于创建 upstream port (3,0,0)的过程,这时会接着往下深度遍历(不会往右遍历)。遍历结束,根据遍历的返回值(也就是递归的返回值),原图中的 bus 6,这时就有可能变成 bus 8、bus 9...
- 再次调用
pci_scan_child_bus_extend
,创建 bus 6 上的设备,创建出 endpoint 设备(6,0,0) - ...