接前一篇文章:
上一回讲到了vp_find_vqs_msix函数的第5步:进入循环,设置每个使能的vq并申请中断。
再次贴出代码循环的代码片段:
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
}
if (!callbacks[i])
msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
msix_vec = allocated_vectors++;
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
ctx ? ctx[i] : false,
msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
}
if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
continue;
/* allocate per-vq irq if available and necessary */
snprintf(vp_dev->msix_names[msix_vec],
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec),
vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err)
goto error_find;
}
上一回也讲解了循环中的前两个步骤,来到了第一个关键函数:vp_setup_vq。本回对该函数进行解析。
vp_setup_vq函数也在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name,
bool ctx,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
struct virtqueue *vq;
unsigned long flags;
/* fill out our structure that represents an active queue */
if (!info)
return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
msix_vec);
if (IS_ERR(vq))
goto out_info;
info->vq = vq;
if (callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
vp_dev->vqs[index] = info;
return vq;
out_info:
kfree(info);
return vq;
}
vp_setup_vq函数初始化virtqueue。在该函数中会分配一个具体的virtio_pci_vq_info结构体对象,来表示一个virtqueue信息,代码片段如下:
struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
并且会以该对象会为参数,调用virtio_pci_device的setup_vq回调函数,代码片段如下:
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx,
msix_vec);
if (IS_ERR(vq))
goto out_info;
这个回调函数同样是在virtio_pci_modern_probe函数中设置的,参考前文所讲的virtio_pci_modern_probe函数代码(Linux内核源码/drivers/virtio/virtio_pci_modern.c中):
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct pci_dev *pci_dev = vp_dev->pci_dev;
int err;
mdev->pci_dev = pci_dev;
err = vp_modern_probe(mdev);
if (err)
return err;
if (mdev->device)
vp_dev->vdev.config = &virtio_pci_config_ops;
else
vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
vp_dev->isr = mdev->isr;
vp_dev->vdev.id = mdev->id;
return 0;
}
可见,回调函数指向的是同文件(Linux内核源码/drivers/virtio/virtio_pci_modern.c)中的setup_vq函数。该函数代码如下:
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned int index,
void (*callback)(struct virtqueue *vq),
const char *name,
bool ctx,
u16 msix_vec)
{
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
bool (*notify)(struct virtqueue *vq);
struct virtqueue *vq;
u16 num;
int err;
if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
notify = vp_notify_with_data;
else
notify = vp_notify;
if (index >= vp_modern_get_num_queues(mdev))
return ERR_PTR(-EINVAL);
/* Check if queue is either not available or already active. */
num = vp_modern_get_queue_size(mdev, index);
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
info->msix_vector = msix_vec;
/* create the vring */
vq = vring_create_virtqueue(index, num,
SMP_CACHE_BYTES, &vp_dev->vdev,
true, true, ctx,
notify, callback, name);
if (!vq)
return ERR_PTR(-ENOMEM);
vq->num_max = num;
err = vp_active_vq(vq, msix_vec);
if (err)
goto err;
vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err;
}
return vq;
err:
vring_del_virtqueue(vq);
return ERR_PTR(err);
}
(1)setup_vq函数首先检测virtio PCI设备是否具有VIRTIO_F_NOTIFICATION_DATA特性。代码片段如下:
if (__virtio_test_bit(&vp_dev->vdev, VIRTIO_F_NOTIFICATION_DATA))
notify = vp_notify_with_data;
else
notify = vp_notify;
VIRTIO_F_NOTIFICATION_DATA宏在Linux内核源码/include/uapi/linux/virtio_config.h中定义,如下:
/*
* This feature indicates that the driver passes extra data (besides
* identifying the virtqueue) in its device notifications.
*/
#define VIRTIO_F_NOTIFICATION_DATA 38
如果设备支持 VIRTIO_F_NOTIFICATION_DATA,则说明notify时携带(额外)数据,就将notify函数指针设置为vp_notify_with_data,即指向vp_notify_with_data函数;否则指向notify函数。
vp_notify_with_data函数也在Linux内核源码/drivers/virtio/virtio_pci_modern.c中(就在上边),代码如下:
static bool vp_notify_with_data(struct virtqueue *vq)
{
u32 data = vring_notification_data(vq);
iowrite32(data, (void __iomem *)vq->priv);
return true;
}
而vp_notify函数则是在Linux内核源码/drivers/virtio/virtio_pci_common.c中,代码如下:
/* the notify function used when creating a virt queue */
bool vp_notify(struct virtqueue *vq)
{
/* we write the queue's selector into the notification register to
* signal the other end */
iowrite16(vq->index, (void __iomem *)vq->priv);
return true;
}
(2)接下来,调用vp_modern_get_num_queues函数获取virtqueues的长度(个数)。代码片段如下:
if (index >= vp_modern_get_num_queues(mdev))
return ERR_PTR(-EINVAL);
vp_modern_get_num_queues函数在Linux内核源代码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
/*
* vp_modern_get_num_queues - get the number of virtqueues
* @mdev: the modern virtio-pci device
*
* Returns the number of virtqueues
*/
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev)
{
return vp_ioread16(&mdev->common->num_queues);
}
EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
(3)接下来,调用vp_modern_get_queue_size函数获得一个virtqueue的大小。代码片段如下:
/* Check if queue is either not available or already active. */
num = vp_modern_get_queue_size(mdev, index);
if (!num || vp_modern_get_queue_enable(mdev, index))
return ERR_PTR(-ENOENT);
vp_modern_get_queue_size函数在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
/*
* vp_modern_get_queue_size - get size for a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
*
* Returns the size of the virtqueue
*/
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_size);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_size);
vp_modern_get_queue_size函数先选择某个virtqueue,然后得到其大小。
vp_modern_get_queue_enable函数也在Linux内核源码/drivers/virtio/virtio_pci_modern_dev.c中,代码如下:
/*
* vp_modern_get_queue_enable - enable a virtqueue
* @mdev: the modern virtio-pci device
* @index: the queue index
*
* Returns whether a virtqueue is enabled or not
*/
bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_enable);
}
EXPORT_SYMBOL_GPL(vp_modern_get_queue_enable);
vp_modern_get_queue_enable函数获得所选中的virtqueue是否使能。
这里要特别说明一下,在老版代码的setup_vq函数中,一上来首先得到virtio_pci_device的common成员,代码片段如下:
struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
在新版本中,虽然是直接使用具体的common->x,并未单独使用一个中间变量专门保存virtio_pci_device的common成员,但意思一样。
struct virtio_pci_common_cfg的定义在Linux内核源码/include/uapi/linux/virtio_pci.h中,代码如下:
/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
struct virtio_pci_common_cfg {
/* About the whole device. */
__le32 device_feature_select; /* read-write */
__le32 device_feature; /* read-only */
__le32 guest_feature_select; /* read-write */
__le32 guest_feature; /* read-write */
__le16 msix_config; /* read-write */
__le16 num_queues; /* read-only */
__u8 device_status; /* read-write */
__u8 config_generation; /* read-only */
/* About a specific virtqueue. */
__le16 queue_select; /* read-write */
__le16 queue_size; /* read-write, power of 2. */
__le16 queue_msix_vector; /* read-write */
__le16 queue_enable; /* read-write */
__le16 queue_notify_off; /* read-only */
__le32 queue_desc_lo; /* read-write */
__le32 queue_desc_hi; /* read-write */
__le32 queue_avail_lo; /* read-write */
__le32 queue_avail_hi; /* read-write */
__le32 queue_used_lo; /* read-write */
__le32 queue_used_hi; /* read-write */
};
这是virtio PCI代理设备中用来配置的一段MMIO,如下图中间部分所示:
直接读写这些地址会陷入到QEMU的virtio_pci_common_read/write函数。这里将common的各个偏移和对应的寄存器名列出来以方便对照,在Linux内核源码/include/uapi/linux/virtio_pci.h中,如下:
/* Macro versions of offsets for the Old Timers! */
#define VIRTIO_PCI_CAP_VNDR 0
#define VIRTIO_PCI_CAP_NEXT 1
#define VIRTIO_PCI_CAP_LEN 2
#define VIRTIO_PCI_CAP_CFG_TYPE 3
#define VIRTIO_PCI_CAP_BAR 4
#define VIRTIO_PCI_CAP_OFFSET 8
#define VIRTIO_PCI_CAP_LENGTH 12
#define VIRTIO_PCI_NOTIFY_CAP_MULT 16
#define VIRTIO_PCI_COMMON_DFSELECT 0
#define VIRTIO_PCI_COMMON_DF 4
#define VIRTIO_PCI_COMMON_GFSELECT 8
#define VIRTIO_PCI_COMMON_GF 12
#define VIRTIO_PCI_COMMON_MSIX 16
#define VIRTIO_PCI_COMMON_NUMQ 18
#define VIRTIO_PCI_COMMON_STATUS 20
#define VIRTIO_PCI_COMMON_CFGGENERATION 21
#define VIRTIO_PCI_COMMON_Q_SELECT 22
#define VIRTIO_PCI_COMMON_Q_SIZE 24
#define VIRTIO_PCI_COMMON_Q_MSIX 26
#define VIRTIO_PCI_COMMON_Q_ENABLE 28
#define VIRTIO_PCI_COMMON_Q_NOFF 30
#define VIRTIO_PCI_COMMON_Q_DESCLO 32
#define VIRTIO_PCI_COMMON_Q_DESCHI 36
#define VIRTIO_PCI_COMMON_Q_AVAILLO 40
#define VIRTIO_PCI_COMMON_Q_AVAILHI 44
#define VIRTIO_PCI_COMMON_Q_USEDLO 48
#define VIRTIO_PCI_COMMON_Q_USEDHI 52
#define VIRTIO_PCI_COMMON_Q_NDATA 56
#define VIRTIO_PCI_COMMON_Q_RESET 58
对照上边的struct virtio_pci_common_cfg的定义,一目了然。
/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
struct virtio_pci_common_cfg {
/* About the whole device. */
__le32 device_feature_select; /* read-write */
__le32 device_feature; /* read-only */
__le32 guest_feature_select; /* read-write */
__le32 guest_feature; /* read-write */
__le16 msix_config; /* read-write */
__le16 num_queues; /* read-only */
__u8 device_status; /* read-write */
__u8 config_generation; /* read-only */
/* About a specific virtqueue. */
__le16 queue_select; /* read-write */
__le16 queue_size; /* read-write, power of 2. */
__le16 queue_msix_vector; /* read-write */
__le16 queue_enable; /* read-write */
__le16 queue_notify_off; /* read-only */
__le32 queue_desc_lo; /* read-write */
__le32 queue_desc_hi; /* read-write */
__le32 queue_avail_lo; /* read-write */
__le32 queue_avail_hi; /* read-write */
__le32 queue_used_lo; /* read-write */
__le32 queue_used_hi; /* read-write */
};
这里特别说明以上内容,是因为这是前不久(2024年春节后)笔者参加地平线面试时被问到的一个问题:
回到setup_vq函数来,setup_vq函数的余下部分,将在下一回中进行解析。