接前一篇文章:

上一回讲到了vp_find_vqs_msix函数的第5步:进入循环,设置每个使能的vq并申请中断。

再次贴出代码循环的代码片段:

    for (i = 0; i < nvqs; ++i) {
		if (!names[i]) {
			vqs[i] = NULL;
			continue;
		}
 
		if (!callbacks[i])
			msix_vec = VIRTIO_MSI_NO_VECTOR;
		else if (vp_dev->per_vq_vectors)
			msix_vec = allocated_vectors++;
		else
			msix_vec = VP_MSIX_VQ_VECTOR;
		vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
				     ctx ? ctx[i] : false,
				     msix_vec);
		if (IS_ERR(vqs[i])) {
			err = PTR_ERR(vqs[i]);
			goto error_find;
		}
 
		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
			continue;
 
		/* allocate per-vq irq if available and necessary */
		snprintf(vp_dev->msix_names[msix_vec],
			 sizeof *vp_dev->msix_names,
			 "%s-%s",
			 dev_name(&vp_dev->vdev.dev), names[i]);
		err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
				  vring_interrupt, 0,
				  vp_dev->msix_names[msix_vec],
				  vqs[i]);
		if (err)
			goto error_find;
	}

上一回也讲解了循环中的前两个步骤,来到了第一个关键函数:vp_setup_vq。本回对该函数进行解析。

vp_setup_vq函数也在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:

static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
				     void (*callback)(struct virtqueue *vq),
				     const char *name,
				     bool ctx,
				     u16 msix_vec)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
	struct virtqueue *vq;
	unsigned long flags;

	/* fill out our structure that represents an active queue */
	if (!info)
		return ERR_PTR(-ENOMEM);

	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
			      msix_vec);
	if (IS_ERR(vq))
		goto out_info;

	info->vq = vq;
	if (callback) {
		spin_lock_irqsave(&vp_dev->lock, flags);
		list_add(&info->node, &vp_dev->virtqueues);
		spin_unlock_irqrestore(&vp_dev->lock, flags);
	} else {
		INIT_LIST_HEAD(&info->node);
	}

	vp_dev->vqs[index] = info;
	return vq;

out_info:
	kfree(info);
	return vq;
}

vp_setup_vq函数初始化virtqueue。在该函数中会分配一个具体的virtio_pci_vq_info结构体对象,来表示一个virtqueue信息,代码片段如下:

    struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);

并且会以该对象会为参数,调用virtio_pci_device的setup_vq回调函数,代码片段如下:

    vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
			      msix_vec);
	if (IS_ERR(vq))
		goto out_info;

这个回调函数同样是在virtio_pci_modern_probe函数中设置的,参考前文所讲的virtio_pci_modern_probe函数代码(Linux内核源码/drivers/virtio/virtio_pci_modern.c中):

/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
	struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
	struct pci_dev *pci_dev = vp_dev->pci_dev;
	int err;

	mdev->pci_dev = pci_dev;

	err = vp_modern_probe(mdev);
	if (err)
		return err;

	if (mdev->device)
		vp_dev->vdev.config = &virtio_pci_config_ops;
	else
		vp_dev->vdev.config = &virtio_pci_config_nodev_ops;

	vp_dev->config_vector = vp_config_vector;
	vp_dev->setup_vq = setup_vq;
	vp_dev->del_vq = del_vq;
	vp_dev->isr = mdev->isr;
	vp_dev->vdev.id = mdev->id;

	return 0;
}

可见,回调函数指向的是同文件(Linux内核源码/drivers/virtio/virtio_pci_modern.c)中的setup_vq函数。该函数代码如下:

static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
				  struct virtio_pci_vq_info *info,
				  unsigned int index,
				  void (*callback)(struct virtqueue *vq),
				  const char *name,
				  bool ctx,
				  u16 msix_vec)
{

	struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
	bool (*notify)(struct virtqueue *vq);
	struct virtqueue *vq;
	u16 num;
	int err;

	if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
		notify = vp_notify_with_data;
	else
		notify = vp_notify;

	if (index >= vp_modern_get_num_queues(mdev))
		return ERR_PTR(-EINVAL);

	/* Check if queue is either not available or already active. */
	num = vp_modern_get_queue_size(mdev, index);
	if (!num || vp_modern_get_queue_enable(mdev, index))
		return ERR_PTR(-ENOENT);

	info->msix_vector = msix_vec;

	/* create the vring */
	vq = vring_create_virtqueue(index, num,
				    SMP_CACHE_BYTES, &vp_dev->vdev,
				    true, true, ctx,
				    notify, callback, name);
	if (!vq)
		return ERR_PTR(-ENOMEM);

	vq->num_max = num;

	err = vp_active_vq(vq, msix_vec);
	if (err)
		goto err;

	vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
	if (!vq->priv) {
		err = -ENOMEM;
		goto err;
	}

	return vq;

err:
	vring_del_virtqueue(vq);
	return ERR_PTR(err);
}

(1)setup_vq函数首先检测virtio PCI设备是否具有VIRTIO_F_NOTIFICATION_DATA特性。代码片段如下:

	if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
		notify = vp_notify_with_data;
	else
		notify = vp_notify;

VIRTIO_F_NOTIFICATION_DATA宏在Linux内核源码/include/uapi/linux/virtio_config.h中定义,如下:

/*
 * This feature indicates that the driver passes extra data (besides
 * identifying the virtqueue) in its device notifications.
 */
#define VIRTIO_F_NOTIFICATION_DATA	38

如果设备支持 VIRTIO_F_NOTIFICATION_DATA,则说明notify时携带(额外)数据,就将notify函数指针设置为vp_notify_with_data,即指向vp_notify_with_data函数;否则指向notify函数。

vp_notify_with_data函数也在Linux内核源码/drivers/virtio/virtio_pci_modern.c中(就在上边),代码如下:

static bool vp_notify_with_data(struct virtqueue *vq)
{
	u32 data = vring_notification_data(vq);

	iowrite32(data, (void __iomem *)vq->priv);

	return true;
}

而vp_notify函数则是在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:

/* the notify function used when creating a virt queue */
bool vp_notify(struct virtqueue *vq)
{
	/* we write the queue's selector into the notification register to
	 * signal the other end */
	iowrite16(vq->index, (void __iomem *)vq->priv);
	return true;
}

(2)接下来,调用vp_modern_get_num_queues函数获取virtqueues的长度(个数)。代码片段如下:

	if (index >= vp_modern_get_num_queues(mdev))
		return ERR_PTR(-EINVAL);

vp_modern_get_num_queues函数在Linux内核源代码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

/*
 * vp_modern_get_num_queues - get the number of virtqueues
 * @mdev: the modern virtio-pci device
 *
 * Returns the number of virtqueues
 */
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev)
{
	return vp_ioread16(&mdev->common->num_queues);
}
EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);

(3)接下来,调用vp_modern_get_queue_size函数获得一个virtqueue的大小。代码片段如下:

	/* Check if queue is either not available or already active. */
	num = vp_modern_get_queue_size(mdev, index);
	if (!num || vp_modern_get_queue_enable(mdev, index))
		return ERR_PTR(-ENOENT);

vp_modern_get_queue_size函数在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

/*
 * vp_modern_get_queue_size - get size for a virtqueue
 * @mdev: the modern virtio-pci device
 * @index: the queue index
 *
 * Returns the size of the virtqueue
 */
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
			     u16 index)
{
	vp_iowrite16(index, &mdev->common->queue_select);

	return vp_ioread16(&mdev->common->queue_size);

}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_size);

vp_modern_get_queue_size函数先选择某个virtqueue,然后得到其大小。

vp_modern_get_queue_enable函数也在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:

/*
 * vp_modern_get_queue_enable - enable a virtqueue
 * @mdev: the modern virtio-pci device
 * @index: the queue index
 *
 * Returns whether a virtqueue is enabled or not
 */
bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
				u16 index)
{
	vp_iowrite16(index, &mdev->common->queue_select);

	return vp_ioread16(&mdev->common->queue_enable);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_enable);

vp_modern_get_queue_enable函数获得所选中的virtqueue是否使能。

这里要特别说明一下,在老版代码的setup_vq函数中,一上来首先得到virtio_pci_device的common成员,代码片段如下:

    struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;

在新版本中,虽然是直接使用具体的common->x,并未单独使用一个中间变量专门保存virtio_pci_device的common成员,但意思一样。

struct virtio_pci_common_cfg的定义在Linux内核源码/include/uapi/linux/virtio_pci.h中,代码如下:

/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
struct virtio_pci_common_cfg {
	/* About the whole device. */
	__le32 device_feature_select;	/* read-write */
	__le32 device_feature;		/* read-only */
	__le32 guest_feature_select;	/* read-write */
	__le32 guest_feature;		/* read-write */
	__le16 msix_config;		/* read-write */
	__le16 num_queues;		/* read-only */
	__u8 device_status;		/* read-write */
	__u8 config_generation;		/* read-only */

	/* About a specific virtqueue. */
	__le16 queue_select;		/* read-write */
	__le16 queue_size;		/* read-write, power of 2. */
	__le16 queue_msix_vector;	/* read-write */
	__le16 queue_enable;		/* read-write */
	__le16 queue_notify_off;	/* read-only */
	__le32 queue_desc_lo;		/* read-write */
	__le32 queue_desc_hi;		/* read-write */
	__le32 queue_avail_lo;		/* read-write */
	__le32 queue_avail_hi;		/* read-write */
	__le32 queue_used_lo;		/* read-write */
	__le32 queue_used_hi;		/* read-write */
};

这是virtio PCI代理设备中用来配置的一段MMIO,如下图中间部分所示:

QEMU源码全解析 —— virtio(26)-LMLPHP

直接读写这些地址会陷入到QEMU的virtio_pci_common_read/write函数。这里将common的各个偏移和对应的寄存器名列出来以方便对照,在Linux内核源码/include/uapi/linux/virtio_pci.h中,如下:

/* Macro versions of offsets for the Old Timers! */
#define VIRTIO_PCI_CAP_VNDR		0
#define VIRTIO_PCI_CAP_NEXT		1
#define VIRTIO_PCI_CAP_LEN		2
#define VIRTIO_PCI_CAP_CFG_TYPE		3
#define VIRTIO_PCI_CAP_BAR		4
#define VIRTIO_PCI_CAP_OFFSET		8
#define VIRTIO_PCI_CAP_LENGTH		12

#define VIRTIO_PCI_NOTIFY_CAP_MULT	16

#define VIRTIO_PCI_COMMON_DFSELECT	0
#define VIRTIO_PCI_COMMON_DF		4
#define VIRTIO_PCI_COMMON_GFSELECT	8
#define VIRTIO_PCI_COMMON_GF		12
#define VIRTIO_PCI_COMMON_MSIX		16
#define VIRTIO_PCI_COMMON_NUMQ		18
#define VIRTIO_PCI_COMMON_STATUS	20
#define VIRTIO_PCI_COMMON_CFGGENERATION	21
#define VIRTIO_PCI_COMMON_Q_SELECT	22
#define VIRTIO_PCI_COMMON_Q_SIZE	24
#define VIRTIO_PCI_COMMON_Q_MSIX	26
#define VIRTIO_PCI_COMMON_Q_ENABLE	28
#define VIRTIO_PCI_COMMON_Q_NOFF	30
#define VIRTIO_PCI_COMMON_Q_DESCLO	32
#define VIRTIO_PCI_COMMON_Q_DESCHI	36
#define VIRTIO_PCI_COMMON_Q_AVAILLO	40
#define VIRTIO_PCI_COMMON_Q_AVAILHI	44
#define VIRTIO_PCI_COMMON_Q_USEDLO	48
#define VIRTIO_PCI_COMMON_Q_USEDHI	52
#define VIRTIO_PCI_COMMON_Q_NDATA	56
#define VIRTIO_PCI_COMMON_Q_RESET	58

对照上边的struct virtio_pci_common_cfg的定义,一目了然。

/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
struct virtio_pci_common_cfg {
	/* About the whole device. */
	__le32 device_feature_select;	/* read-write */
	__le32 device_feature;		/* read-only */
	__le32 guest_feature_select;	/* read-write */
	__le32 guest_feature;		/* read-write */
	__le16 msix_config;		/* read-write */
	__le16 num_queues;		/* read-only */
	__u8 device_status;		/* read-write */
	__u8 config_generation;		/* read-only */

	/* About a specific virtqueue. */
	__le16 queue_select;		/* read-write */
	__le16 queue_size;		/* read-write, power of 2. */
	__le16 queue_msix_vector;	/* read-write */
	__le16 queue_enable;		/* read-write */
	__le16 queue_notify_off;	/* read-only */
	__le32 queue_desc_lo;		/* read-write */
	__le32 queue_desc_hi;		/* read-write */
	__le32 queue_avail_lo;		/* read-write */
	__le32 queue_avail_hi;		/* read-write */
	__le32 queue_used_lo;		/* read-write */
	__le32 queue_used_hi;		/* read-write */
};

这里特别说明以上内容,是因为这是前不久(2024年春节后)笔者参加地平线面试时被问到的一个问题:

回到setup_vq函数来,setup_vq函数的余下部分,将在下一回中进行解析。

02-28 08:55