Commit 529395d2 authored by Peter-Jan Gootzen's avatar Peter-Jan Gootzen Committed by Miklos Szeredi

virtio-fs: add multi-queue support

This commit creates a multi-queue mapping at device bring-up.
The driver first attempts to use the existing MSI-X interrupt
affinities (previously disabled), and if not present, will distribute
the request queues evenly over the CPUs.
If the latter fails as well, all CPUs are mapped to request queue zero.

When a request is handed from FUSE to the virtio-fs device driver, the
driver will use the current CPU to index into the multi-queue mapping
and determine the optimal request queue to use.

We measured the performance of this patch with the fio benchmarking
tool, increasing the number of queues results in a significant speedup
for both read and write operations, demonstrating the effectiveness
of multi-queue support.

Host:
  - Dell PowerEdge R760
  - CPU: Intel(R) Xeon(R) Gold 6438M, 128 cores
  - VM: KVM with 32 cores
Virtio-fs device:
  - BlueField-3 DPU
  - CPU: ARM Cortex-A78AE, 16 cores
  - One thread per queue, each busy polling on one request queue
  - Each queue is 1024 descriptors deep
Workload:
  - fio, sequential read or write, ioengine=libaio, numjobs=32,
    4GiB file per job, iodepth=8, bs=256KiB, runtime=30s
Performance Results:
+===========================+==========+===========+
|     Number of queues      | Fio read | Fio write |
+===========================+==========+===========+
| 1 request queue (GiB/s)   | 6.1     | 4.6        |
+---------------------------+----------+-----------+
| 8 request queues (GiB/s)  | 25.8    | 10.3       |
+---------------------------+----------+-----------+
| 16 request queues (GiB/s) | 30.9    | 19.5       |
+---------------------------+----------+-----------+
| 32 request queue (GiB/s)  | 33.2    | 22.6       |
+---------------------------+----------+-----------+
| Speedup                   | 5.5x    | 5x         |
+---------------=-----------+----------+-----------+
Signed-off-by: default avatarPeter-Jan Gootzen <pgootzen@nvidia.com>
Signed-off-by: default avatarYoray Zack <yorayz@nvidia.com>
Signed-off-by: default avatarMax Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
Acked-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
parent 103c2de1
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/group_cpus.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include <linux/memremap.h> #include <linux/memremap.h>
#include <linux/module.h> #include <linux/module.h>
...@@ -67,6 +69,8 @@ struct virtio_fs { ...@@ -67,6 +69,8 @@ struct virtio_fs {
unsigned int num_request_queues; /* number of request queues */ unsigned int num_request_queues; /* number of request queues */
struct dax_device *dax_dev; struct dax_device *dax_dev;
unsigned int *mq_map; /* index = cpu id, value = request vq id */
/* DAX memory window where file contents are mapped */ /* DAX memory window where file contents are mapped */
void *window_kaddr; void *window_kaddr;
phys_addr_t window_phys_addr; phys_addr_t window_phys_addr;
...@@ -185,6 +189,7 @@ static void virtio_fs_ktype_release(struct kobject *kobj) ...@@ -185,6 +189,7 @@ static void virtio_fs_ktype_release(struct kobject *kobj)
{ {
struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj);
kfree(vfs->mq_map);
kfree(vfs->vqs); kfree(vfs->vqs);
kfree(vfs); kfree(vfs);
} }
...@@ -706,6 +711,44 @@ static void virtio_fs_requests_done_work(struct work_struct *work) ...@@ -706,6 +711,44 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
} }
} }
static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs)
{
const struct cpumask *mask, *masks;
unsigned int q, cpu;
/* First attempt to map using existing transport layer affinities
* e.g. PCIe MSI-X
*/
if (!vdev->config->get_vq_affinity)
goto fallback;
for (q = 0; q < fs->num_request_queues; q++) {
mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q);
if (!mask)
goto fallback;
for_each_cpu(cpu, mask)
fs->mq_map[cpu] = q;
}
return;
fallback:
/* Attempt to map evenly in groups over the CPUs */
masks = group_cpus_evenly(fs->num_request_queues);
/* If even this fails we default to all CPUs use queue zero */
if (!masks) {
for_each_possible_cpu(cpu)
fs->mq_map[cpu] = 0;
return;
}
for (q = 0; q < fs->num_request_queues; q++) {
for_each_cpu(cpu, &masks[q])
fs->mq_map[cpu] = q;
}
kfree(masks);
}
/* Virtqueue interrupt handler */ /* Virtqueue interrupt handler */
static void virtio_fs_vq_done(struct virtqueue *vq) static void virtio_fs_vq_done(struct virtqueue *vq)
{ {
...@@ -742,6 +785,11 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -742,6 +785,11 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
{ {
struct virtqueue **vqs; struct virtqueue **vqs;
vq_callback_t **callbacks; vq_callback_t **callbacks;
/* Specify pre_vectors to ensure that the queues before the
* request queues (e.g. hiprio) don't claim any of the CPUs in
* the multi-queue mapping and interrupt affinities
*/
struct irq_affinity desc = { .pre_vectors = VQ_REQUEST };
const char **names; const char **names;
unsigned int i; unsigned int i;
int ret = 0; int ret = 0;
...@@ -763,7 +811,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -763,7 +811,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
GFP_KERNEL); GFP_KERNEL);
names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
if (!vqs || !callbacks || !names) { fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL,
dev_to_node(&vdev->dev));
if (!vqs || !callbacks || !names || !fs->mq_map) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
...@@ -783,7 +833,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -783,7 +833,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
names[i] = fs->vqs[i].name; names[i] = fs->vqs[i].name;
} }
ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, &desc);
if (ret < 0) if (ret < 0)
goto out; goto out;
...@@ -795,8 +845,10 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, ...@@ -795,8 +845,10 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
kfree(names); kfree(names);
kfree(callbacks); kfree(callbacks);
kfree(vqs); kfree(vqs);
if (ret) if (ret) {
kfree(fs->vqs); kfree(fs->vqs);
kfree(fs->mq_map);
}
return ret; return ret;
} }
...@@ -942,7 +994,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) ...@@ -942,7 +994,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
if (ret < 0) if (ret < 0)
goto out; goto out;
/* TODO vq affinity */ virtio_fs_map_queues(vdev, fs);
ret = virtio_fs_setup_dax(vdev, fs); ret = virtio_fs_setup_dax(vdev, fs);
if (ret < 0) if (ret < 0)
...@@ -1291,7 +1343,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, ...@@ -1291,7 +1343,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
__releases(fiq->lock) __releases(fiq->lock)
{ {
unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ unsigned int queue_id;
struct virtio_fs *fs; struct virtio_fs *fs;
struct fuse_req *req; struct fuse_req *req;
struct virtio_fs_vq *fsvq; struct virtio_fs_vq *fsvq;
...@@ -1305,11 +1357,13 @@ __releases(fiq->lock) ...@@ -1305,11 +1357,13 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock); spin_unlock(&fiq->lock);
fs = fiq->priv; fs = fiq->priv;
queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()];
pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n",
__func__, req->in.h.opcode, req->in.h.unique, __func__, req->in.h.opcode, req->in.h.unique,
req->in.h.nodeid, req->in.h.len, req->in.h.nodeid, req->in.h.len,
fuse_len_args(req->args->out_numargs, req->args->out_args)); fuse_len_args(req->args->out_numargs, req->args->out_args),
queue_id);
fsvq = &fs->vqs[queue_id]; fsvq = &fs->vqs[queue_id];
ret = virtio_fs_enqueue_req(fsvq, req, false); ret = virtio_fs_enqueue_req(fsvq, req, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment