QEMU源码全解析 —— virtio(19)

接前一篇文章:

上回书继续讲解virtio_pci_driver的probe回调函数virtio_pci_probe(),在讲到第5段代码的时候,

cpp 复制代码
    if (force_legacy) {
		rc = virtio_pci_legacy_probe(vp_dev);
		/* Also try modern mode if we can't map BAR0 (no IO space). */
		if (rc == -ENODEV || rc == -ENOMEM)
			rc = virtio_pci_modern_probe(vp_dev);
		if (rc)
			goto err_probe;
	} else {
		rc = virtio_pci_modern_probe(vp_dev);
		if (rc == -ENODEV)
			rc = virtio_pci_legacy_probe(vp_dev);
		if (rc)
			goto err_probe;
	}

引出来两个函数:virtio_pci_legacy_probe和virtio_pci_modern_probe。本回就来对它们进行解析。当然,由于legacy已成过去,因此重点围绕virtio_pci_modern_probe函数进行解析,捎带手地也讲一下virtio_pci_legacy_probe()。为了便于理解,再次贴出两个函数的源码:

  • virtio_pci_legacy_probe

virtio_pci_legacy_probe函数在Linux内核源码/drivers/virtio/virtio_pci_legacy.c中,代码如下:

cpp 复制代码
/* the PCI probing function */
int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
{
	struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
	struct pci_dev *pci_dev = vp_dev->pci_dev;
	int rc;
 
	ldev->pci_dev = pci_dev;
 
	rc = vp_legacy_probe(ldev);
	if (rc)
		return rc;
 
	vp_dev->isr = ldev->isr;
	vp_dev->vdev.id = ldev->id;
 
	vp_dev->vdev.config = &virtio_pci_config_ops;
 
	vp_dev->config_vector = vp_config_vector;
	vp_dev->setup_vq = setup_vq;
	vp_dev->del_vq = del_vq;
 
	return 0;
}
  • virtio_pci_modern_probe

virtio_pci_modern_probe函数在Linux内核源码/drivers/virtio/virtio_pci_modern.c中,代码如下:

cpp 复制代码
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
	struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
	struct pci_dev *pci_dev = vp_dev->pci_dev;
	int err;
 
	mdev->pci_dev = pci_dev;
 
	err = vp_modern_probe(mdev);
	if (err)
		return err;
 
	if (mdev->device)
		vp_dev->vdev.config = &virtio_pci_config_ops;
	else
		vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
 
	vp_dev->config_vector = vp_config_vector;
	vp_dev->setup_vq = setup_vq;
	vp_dev->del_vq = del_vq;
	vp_dev->isr = mdev->isr;
	vp_dev->vdev.id = mdev->id;
 
	return 0;
}

virtio_pci_modern_probe函数中最主要的是调用了vp_modern_probe函数,其在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

cpp 复制代码
/*
 * vp_modern_probe: probe the modern virtio pci device, note that the
 * caller is required to enable PCI device before calling this function.
 * @mdev: the modern virtio-pci device
 *
 * Return 0 on succeed otherwise fail
 */
int vp_modern_probe(struct virtio_pci_modern_device *mdev)
{
	struct pci_dev *pci_dev = mdev->pci_dev;
	int err, common, isr, notify, device;
	u32 notify_length;
	u32 notify_offset;

	check_offsets();

	/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
	if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
		return -ENODEV;

	if (pci_dev->device < 0x1040) {
		/* Transitional devices: use the PCI subsystem device id as
		 * virtio device id, same as legacy driver always did.
		 */
		mdev->id.device = pci_dev->subsystem_device;
	} else {
		/* Modern devices: simply use PCI device id, but start from 0x1040. */
		mdev->id.device = pci_dev->device - 0x1040;
	}
	mdev->id.vendor = pci_dev->subsystem_vendor;

	/* check for a common config: if not, use legacy mode (bar 0). */
	common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);
	if (!common) {
		dev_info(&pci_dev->dev,
			 "virtio_pci: leaving for legacy driver\n");
		return -ENODEV;
	}

	/* If common is there, these should be too... */
	isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
					 IORESOURCE_IO | IORESOURCE_MEM,
					 &mdev->modern_bars);
	notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);
	if (!isr || !notify) {
		dev_err(&pci_dev->dev,
			"virtio_pci: missing capabilities %i/%i/%i\n",
			common, isr, notify);
		return -EINVAL;
	}

	err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
	if (err)
		err = dma_set_mask_and_coherent(&pci_dev->dev,
						DMA_BIT_MASK(32));
	if (err)
		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");

	/* Device capability is only mandatory for devices that have
	 * device-specific configuration.
	 */
	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);

	err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
					   "virtio-pci-modern");
	if (err)
		return err;

	err = -EINVAL;
	mdev->common = vp_modern_map_capability(mdev, common,
				      sizeof(struct virtio_pci_common_cfg), 4,
				      0, sizeof(struct virtio_pci_common_cfg),
				      NULL, NULL);
	if (!mdev->common)
		goto err_map_common;
	mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
					     0, 1,
					     NULL, NULL);
	if (!mdev->isr)
		goto err_map_isr;

	/* Read notify_off_multiplier from config space. */
	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						notify_off_multiplier),
			      &mdev->notify_offset_multiplier);
	/* Read notify length and offset from config space. */
	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						cap.length),
			      &notify_length);

	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						cap.offset),
			      &notify_offset);

	/* We don't know how many VQs we'll map, ahead of the time.
	 * If notify length is small, map it all now.
	 * Otherwise, map each VQ individually later.
	 */
	if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
		mdev->notify_base = vp_modern_map_capability(mdev, notify,
							     2, 2,
							     0, notify_length,
							     &mdev->notify_len,
							     &mdev->notify_pa);
		if (!mdev->notify_base)
			goto err_map_notify;
	} else {
		mdev->notify_map_cap = notify;
	}

	/* Again, we don't know how much we should map, but PAGE_SIZE
	 * is more than enough for all existing devices.
	 */
	if (device) {
		mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
							0, PAGE_SIZE,
							&mdev->device_len,
							NULL);
		if (!mdev->device)
			goto err_map_device;
	}

	return 0;

err_map_device:
	if (mdev->notify_base)
		pci_iounmap(pci_dev, mdev->notify_base);
err_map_notify:
	pci_iounmap(pci_dev, mdev->isr);
err_map_isr:
	pci_iounmap(pci_dev, mdev->common);
err_map_common:
	pci_release_selected_regions(pci_dev, mdev->modern_bars);
	return err;
}
EXPORT_SYMBOL_GPL(vp_modern_probe);

实际上在老版本KVM即Linux内核代码中,vp_modern_probe函数中的内容绝大多数是直接放在virtio_pci_modern_probe函数中的,后来才单独封了这样一个函数。

(1)vp_modern_probe首先设置了virtio设备的verdor ID和device ID。代码片段如下:

cpp 复制代码
    /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
	if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
		return -ENODEV;

	if (pci_dev->device < 0x1040) {
		/* Transitional devices: use the PCI subsystem device id as
		 * virtio device id, same as legacy driver always did.
		 */
		mdev->id.device = pci_dev->subsystem_device;
	} else {
		/* Modern devices: simply use PCI device id, but start from 0x1040. */
		mdev->id.device = pci_dev->device - 0x1040;
	}
	mdev->id.vendor = pci_dev->subsystem_vendor;

值得注意的是,virtio PCI代理设备的device iD就是前文书(参见QEMU源码全解析 ------ virtio(14))在讲virtio_pci_device_plugged函数(QEMU源码中)时设置的PCI_DEVICE_ID_VIRTIO_10_BASE+VIRTIO_ID_BALLOON,即0x1040+5。

所以,这里virtio设备的device ID(mdev->id.device)就是0x1040+5-0x1040=5,也就代表了VIRTIO_ID_BALLOON。

(2)接下来,调用多次virtio_pci_find_capability函数来发现virtio PCI代理设备的pci capability。代码片段如下:

cpp 复制代码
/* check for a common config: if not, use legacy mode (bar 0). */
	common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);
	if (!common) {
		dev_info(&pci_dev->dev,
			 "virtio_pci: leaving for legacy driver\n");
		return -ENODEV;
	}

	/* If common is there, these should be too... */
	isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
					 IORESOURCE_IO | IORESOURCE_MEM,
					 &mdev->modern_bars);
	notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);
	if (!isr || !notify) {
		dev_err(&pci_dev->dev,
			"virtio_pci: missing capabilities %i/%i/%i\n",
			common, isr, notify);
		return -EINVAL;
	}

	err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
	if (err)
		err = dma_set_mask_and_coherent(&pci_dev->dev,
						DMA_BIT_MASK(32));
	if (err)
		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");

	/* Device capability is only mandatory for devices that have
	 * device-specific configuration.
	 */
	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);

这也是在(QEMU源码)virtio_pci_device_plugged函数中写入到virtio PCI代理设备的配置空间中的,参见QEMU源码全解析 ------ virtio(14)QEMU源码全解析 ------ virtio(15)

(3)virtio_pci_find_capability函数找到所属的PCI BAR,然后写入到virt_pci_device的modern_bars成员中。代码片段如下:

cpp 复制代码
    /* Device capability is only mandatory for devices that have
	 * device-specific configuration.
	 */
	device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
					    IORESOURCE_IO | IORESOURCE_MEM,
					    &mdev->modern_bars);

从(QEMU源码)virtio_pci_realize函数中可以知道这个modern_bars是1<<4,如下图所示:

(4)接着,pci_request_selected_regions函数就将virtio PCI代理设备的BAR地址空间保留出来了。代码片段如下:

cpp 复制代码
    err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
					   "virtio-pci-modern");
	if (err)
		return err;

(5)调用vp_modern_map_capability函数将对应的capability在PCI代理设备中的BAR空间映射到内核地址空间。代码片段如下:

cpp 复制代码
    err = -EINVAL;
	mdev->common = vp_modern_map_capability(mdev, common,
				      sizeof(struct virtio_pci_common_cfg), 4,
				      0, sizeof(struct virtio_pci_common_cfg),
				      NULL, NULL);
	if (!mdev->common)
		goto err_map_common;
	mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
					     0, 1,
					     NULL, NULL);
	if (!mdev->isr)
		goto err_map_isr;

	/* Read notify_off_multiplier from config space. */
	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						notify_off_multiplier),
			      &mdev->notify_offset_multiplier);
	/* Read notify length and offset from config space. */
	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						cap.length),
			      &notify_length);

	pci_read_config_dword(pci_dev,
			      notify + offsetof(struct virtio_pci_notify_cap,
						cap.offset),
			      &notify_offset);

	/* We don't know how many VQs we'll map, ahead of the time.
	 * If notify length is small, map it all now.
	 * Otherwise, map each VQ individually later.
	 */
	if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
		mdev->notify_base = vp_modern_map_capability(mdev, notify,
							     2, 2,
							     0, notify_length,
							     &mdev->notify_len,
							     &mdev->notify_pa);
		if (!mdev->notify_base)
			goto err_map_notify;
	} else {
		mdev->notify_map_cap = notify;
	}

	/* Again, we don't know how much we should map, but PAGE_SIZE
	 * is more than enough for all existing devices.
	 */
	if (device) {
		mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
							0, PAGE_SIZE,
							&mdev->device_len,
							NULL);
		if (!mdev->device)
			goto err_map_device;
	}

如mp_dev(struct virtio_pci_modern_device *mdev = &vp_dev->mdev;)的common成员映射了virtio_pci_common_cfg的数据到内核中。这样,后续就可以直接通过这个内存地址空间来访问common这一capability了,其它的capability(isr、notify、device)也与此类似。

vp_modern_map_capability函数在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

cpp 复制代码
/*
 * vp_modern_map_capability - map a part of virtio pci capability
 * @mdev: the modern virtio-pci device
 * @off: offset of the capability
 * @minlen: minimal length of the capability
 * @align: align requirement
 * @start: start from the capability
 * @size: map size
 * @len: the length that is actually mapped
 * @pa: physical address of the capability
 *
 * Returns the io address of for the part of the capability
 */
static void __iomem *
vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
			 size_t minlen, u32 align, u32 start, u32 size,
			 size_t *len, resource_size_t *pa)
{
	struct pci_dev *dev = mdev->pci_dev;
	u8 bar;
	u32 offset, length;
	void __iomem *p;

	pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
						 bar),
			     &bar);
	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
			     &offset);
	pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
			      &length);

	/* Check if the BAR may have changed since we requested the region. */
	if (bar >= PCI_STD_NUM_BARS || !(mdev->modern_bars & (1 << bar))) {
		dev_err(&dev->dev,
			"virtio_pci: bar unexpectedly changed to %u\n", bar);
		return NULL;
	}

	if (length <= start) {
		dev_err(&dev->dev,
			"virtio_pci: bad capability len %u (>%u expected)\n",
			length, start);
		return NULL;
	}

	if (length - start < minlen) {
		dev_err(&dev->dev,
			"virtio_pci: bad capability len %u (>=%zu expected)\n",
			length, minlen);
		return NULL;
	}

	length -= start;

	if (start + offset < offset) {
		dev_err(&dev->dev,
			"virtio_pci: map wrap-around %u+%u\n",
			start, offset);
		return NULL;
	}

	offset += start;

	if (offset & (align - 1)) {
		dev_err(&dev->dev,
			"virtio_pci: offset %u not aligned to %u\n",
			offset, align);
		return NULL;
	}

	if (length > size)
		length = size;

	if (len)
		*len = length;

	if (minlen + offset < minlen ||
	    minlen + offset > pci_resource_len(dev, bar)) {
		dev_err(&dev->dev,
			"virtio_pci: map virtio %zu@%u "
			"out of range on bar %i length %lu\n",
			minlen, offset,
			bar, (unsigned long)pci_resource_len(dev, bar));
		return NULL;
	}

	p = pci_iomap_range(dev, bar, offset, length);
	if (!p)
		dev_err(&dev->dev,
			"virtio_pci: unable to map virtio %u@%u on bar %i\n",
			length, offset, bar);
	else if (pa)
		*pa = pci_resource_start(dev, bar) + offset;

	return p;
}

这样实际上就将virtio PCI代理设备的BAR映射到虚拟机内核地址空间了,后续直接访问这些地址即可实现对virtio PCI代理设备的配置和控制。

回到virtio_pci_modern_probe函数。

cpp 复制代码
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
	struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
	struct pci_dev *pci_dev = vp_dev->pci_dev;
	int err;
 
	mdev->pci_dev = pci_dev;
 
	err = vp_modern_probe(mdev);
	if (err)
		return err;
 
	if (mdev->device)
		vp_dev->vdev.config = &virtio_pci_config_ops;
	else
		vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
 
	vp_dev->config_vector = vp_config_vector;
	vp_dev->setup_vq = setup_vq;
	vp_dev->del_vq = del_vq;
	vp_dev->isr = mdev->isr;
	vp_dev->vdev.id = mdev->id;
 
	return 0;
}

在调用完vp_modern_probe函数之后,virtio_pci_modern_probe函数接着设置virtio_pci_device中virtio_device的成员vdev的config成员。如果有device这一capability,则设置为virtio_pci_config_ops,否则设置为virtio_pci_config_nodev_ops。

之后设置vpdev即struct virtio_pci_device的几个回调函数:config_vector与MSI中断有关,设置为vp_config_vector;setup_vq用来配置virtio设备virt queue,设置为setup_vq;del_vq用来删除virt queue,设置为del_vq。

至此,virtio_pci_modern_probe函数就解析完了。

欲知后事如何,且看下回分解。

相关推荐
Wanliang Li4 天前
Linux电源管理——Device Power Management Interface
linux·嵌入式·virtio·电源管理·suspend
Hi202402177 天前
KVM创建ubuntu20.04虚机,部署K8S,再克隆出二份,做为Worker节点加入集群,通过Helm创建2个Pod,让它们之间通过域名互访
云原生·容器·kubernetes·helm·kvm
码上飞扬17 天前
深入探讨服务器虚拟化:架构、技术与应用
服务器·docker·k8s·虚拟化·kvm·xen
Hi202402171 个月前
ubuntu22.04上安装win10虚拟机,并采用noVNC+frp,让远程通过web访问桌面
运维·kvm·云桌面
京雨1 个月前
Qemu 加载你指定的 initrd、dtb 到哪里?
qemu·riscv64·fdt·initrd
专注VB编程开发20年2 个月前
虚拟机添加硬盘驱动,Windows 系统添加 VirtIO 驱动(Windows ISO 安装镜像添加驱动)
服务器·windows·虚拟机·virtio
公西雒2 个月前
关于在GitLab的CI/CD中用docker buildx本地化多架构打包dotnet应用的问题
ci/cd·docker·gitlab·qemu·dotnet
团儿.3 个月前
KVM磁盘配置:构建高效虚拟环境的基石
linux·运维·centos·kvm·kvm磁盘
ywang_wnlo3 个月前
【Kenel】基于 QEMU 的 Linux 内核编译和安装
linux·qemu·kernel
ywang_wnlo3 个月前
【Kernel】基于 QEMU 的 Linux 内核编译和安装
linux·qemu·kernel