vhost: Release worker mutex during flushes

In the next patches where the worker can be killed while in use, we need to be able to take the worker mutex and kill queued works for new IO and flushes, and set some new flags to prevent new __vhost_vq_attach_worker calls from swapping in/out killed workers. If we are holding the worker mutex during a flush and the flush's work is still in the queue, the worker code that will handle the SIGKILL cleanup won't be able to take the mutex and perform it's cleanup. So this patch has us drop the worker mutex while waiting for the flush to complete. Signed-off-by: Mike Christie <michael.christie@oracle.com> Message-Id: <20240316004707.45557-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

vhost: Release worker mutex during flushes
In the next patches where the worker can be killed while in use, we need to be able to take the worker mutex and kill queued works for new IO and flushes, and set some new flags to prevent new __vhost_vq_attach_worker calls from swapping in/out killed workers. If we are holding the worker mutex during a flush and the flush's work is still in the queue, the worker code that will handle the SIGKILL cleanup won't be able to take the mutex and perform it's cleanup. So this patch has us drop the worker mutex while waiting for the flush to complete. Signed-off-by: Mike Christie <michael.christie@oracle.com> Message-Id: <20240316004707.45557-8-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
ba704ff4 · Mike Christie · Michael S. Tsirkin · 34cf9ba5 · ba704ff4
Commit ba704ff4 authored Mar 15, 2024 by Mike Christie Committed by Michael S. Tsirkin May 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 14 deletions

drivers/vhost/vhost.c drivers/vhost/vhost.c +30 -14

No files found.
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -264,21 +264,36 @@ bool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work)
 EXPORT_SYMBOL_GPL(vhost_vq_work_queue);

 /**
- * vhost_worker_flush - flush a worker
+ * __vhost_worker_flush - flush a worker
 * @worker: worker to flush
 *
- * This does not use RCU to protect the worker, so the device or worker
- * mutex must be held.
+ * The worker's flush_mutex must be held.
 */
-static void vhost_worker_flush(struct vhost_worker *worker)
+static void __vhost_worker_flush(struct vhost_worker *worker)
 {
 	struct vhost_flush_struct flush;

+	if (!worker->attachment_cnt)
+		return;
+
 	init_completion(&flush.wait_event);
 	vhost_work_init(&flush.work, vhost_flush_work);

 	vhost_worker_queue(worker, &flush.work);
+	/*
+	 * Drop mutex in case our worker is killed and it needs to take the
+	 * mutex to force cleanup.
+	 */
+	mutex_unlock(&worker->mutex);
 	wait_for_completion(&flush.wait_event);
+	mutex_lock(&worker->mutex);
+}
+
+static void vhost_worker_flush(struct vhost_worker *worker)
+{
+	mutex_lock(&worker->mutex);
+	__vhost_worker_flush(worker);
+	mutex_unlock(&worker->mutex);
 }

 void vhost_dev_flush(struct vhost_dev *dev)
@@ -286,15 +301,8 @@ void vhost_dev_flush(struct vhost_dev *dev)
 	struct vhost_worker *worker;
 	unsigned long i;

-	xa_for_each(&dev->worker_xa, i, worker) {
-		mutex_lock(&worker->mutex);
-		if (!worker->attachment_cnt) {
-			mutex_unlock(&worker->mutex);
-			continue;
-		}
+	xa_for_each(&dev->worker_xa, i, worker)
 		vhost_worker_flush(worker);
-		mutex_unlock(&worker->mutex);
-	}
 }
 EXPORT_SYMBOL_GPL(vhost_dev_flush);

@@ -673,7 +681,6 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
 	 * device wide flushes which doesn't use RCU for execution.
 	 */
 	mutex_lock(&old_worker->mutex);
-	old_worker->attachment_cnt--;
 	/*
 	 * We don't want to call synchronize_rcu for every vq during setup
 	 * because it will slow down VM startup. If we haven't done
@@ -684,6 +691,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
 	mutex_lock(&vq->mutex);
 	if (!vhost_vq_get_backend(vq) && !vq->kick) {
 		mutex_unlock(&vq->mutex);
+
+		old_worker->attachment_cnt--;
 		mutex_unlock(&old_worker->mutex);
 		/*
 		 * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID.
@@ -699,7 +708,8 @@ static void __vhost_vq_attach_worker(struct vhost_virtqueue *vq,
 	/* Make sure new vq queue/flush/poll calls see the new worker */
 	synchronize_rcu();
 	/* Make sure whatever was queued gets run */
-	vhost_worker_flush(old_worker);
+	__vhost_worker_flush(old_worker);
+	old_worker->attachment_cnt--;
 	mutex_unlock(&old_worker->mutex);
 }

@@ -752,6 +762,12 @@ static int vhost_free_worker(struct vhost_dev *dev,
 		mutex_unlock(&worker->mutex);
 		return -EBUSY;
 	}
+	/*
+	 * A flush might have raced and snuck in before attachment_cnt was set
+	 * to zero. Make sure flushes are flushed from the queue before
+	 * freeing.
+	 */
+	__vhost_worker_flush(worker);
 	mutex_unlock(&worker->mutex);

 	vhost_worker_destroy(dev, worker);