xen/pvcallsback: use lateeoi irq binding

In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pvcallsfront use the lateeoi irq binding for pvcallsback and unmask the event channel only after handling all write requests, which are the ones coming in via an irq. This requires modifying the logic a little bit to not require an event for each write request, but to keep the ioworker running until no further data is found on the ring page to be processed. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall <julien@xen.org> Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Reviewed-by: Wei Liu <wl@xen.org>

xen/pvcallsback: use lateeoi irq binding
In order to reduce the chance for the system becoming unresponsive due to event storms triggered by a misbehaving pvcallsfront use the lateeoi irq binding for pvcallsback and unmask the event channel only after handling all write requests, which are the ones coming in via an irq. This requires modifying the logic a little bit to not require an event for each write request, but to keep the ioworker running until no further data is found on the ring page to be processed. This is part of XSA-332. Cc: stable@vger.kernel.org Reported-by: Julien Grall <julien@xen.org> Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Reviewed-by: Wei Liu <wl@xen.org>
c8d647a3 · Juergen Gross · 86991b6e · c8d647a3
Commit c8d647a3 authored Sep 07, 2020 by Juergen Gross
Show whitespace changes
Inline Side-by-side

Showing with 46 additions and 30 deletions

drivers/xen/pvcalls-back.c drivers/xen/pvcalls-back.c +46 -30

No files found.
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -66,6 +66,7 @@ struct sock_mapping {
 	atomic_t write;
 	atomic_t io;
 	atomic_t release;
+	atomic_t eoi;
 	void (*saved_data_ready)(struct sock *sk);
 	struct pvcalls_ioworker ioworker;
 };
@@ -87,7 +88,7 @@ static int pvcalls_back_release_active(struct xenbus_device *dev,
 				       struct pvcalls_fedata *fedata,
 				       struct sock_mapping *map);

-static void pvcalls_conn_back_read(void *opaque)
+static bool pvcalls_conn_back_read(void *opaque)
 {
 	struct sock_mapping *map = (struct sock_mapping *)opaque;
 	struct msghdr msg;
@@ -107,17 +108,17 @@ static void pvcalls_conn_back_read(void *opaque)
 	virt_mb();

 	if (error)
-		return;
+		return false;

 	size = pvcalls_queued(prod, cons, array_size);
 	if (size >= array_size)
-		return;
+		return false;
 	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
 	if (skb_queue_empty(&map->sock->sk->sk_receive_queue)) {
 		atomic_set(&map->read, 0);
 		spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock,
 				flags);
-		return;
+		return true;
 	}
 	spin_unlock_irqrestore(&map->sock->sk->sk_receive_queue.lock, flags);
 	wanted = array_size - size;
@@ -141,7 +142,7 @@ static void pvcalls_conn_back_read(void *opaque)
 	ret = inet_recvmsg(map->sock, &msg, wanted, MSG_DONTWAIT);
 	WARN_ON(ret > wanted);
 	if (ret == -EAGAIN) /* shouldn't happen */
-		return;
+		return true;
 	if (!ret)
 		ret = -ENOTCONN;
 	spin_lock_irqsave(&map->sock->sk->sk_receive_queue.lock, flags);
@@ -160,10 +161,10 @@ static void pvcalls_conn_back_read(void *opaque)
 	virt_wmb();
 	notify_remote_via_irq(map->irq);

-	return;
+	return true;
 }

-static void pvcalls_conn_back_write(struct sock_mapping *map)
+static bool pvcalls_conn_back_write(struct sock_mapping *map)
 {
 	struct pvcalls_data_intf *intf = map->ring;
 	struct pvcalls_data *data = &map->data;
@@ -180,7 +181,7 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
 	array_size = XEN_FLEX_RING_SIZE(map->ring_order);
 	size = pvcalls_queued(prod, cons, array_size);
 	if (size == 0)
-		return;
+		return false;

 	memset(&msg, 0, sizeof(msg));
 	msg.msg_flags |= MSG_DONTWAIT;
@@ -198,12 +199,11 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)

 	atomic_set(&map->write, 0);
 	ret = inet_sendmsg(map->sock, &msg, size);
-	if (ret == -EAGAIN || (ret >= 0 && ret < size)) {
+	if (ret == -EAGAIN) {
 		atomic_inc(&map->write);
 		atomic_inc(&map->io);
+		return true;
 	}
-	if (ret == -EAGAIN)
-		return;

 	/* write the data, then update the indexes */
 	virt_wmb();
@@ -216,9 +216,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
 	}
 	/* update the indexes, then notify the other end */
 	virt_wmb();
-	if (prod != cons + ret)
+	if (prod != cons + ret) {
 		atomic_inc(&map->write);
+		atomic_inc(&map->io);
+	}
 	notify_remote_via_irq(map->irq);
+
+	return true;
 }

 static void pvcalls_back_ioworker(struct work_struct *work)
@@ -227,6 +231,7 @@ static void pvcalls_back_ioworker(struct work_struct *work)
 		struct pvcalls_ioworker, register_work);
 	struct sock_mapping *map = container_of(ioworker, struct sock_mapping,
 		ioworker);
+	unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;

 	while (atomic_read(&map->io) > 0) {
 		if (atomic_read(&map->release) > 0) {
@@ -234,10 +239,18 @@ static void pvcalls_back_ioworker(struct work_struct *work)
 			return;
 		}

-		if (atomic_read(&map->read) > 0)
-			pvcalls_conn_back_read(map);
-		if (atomic_read(&map->write) > 0)
-			pvcalls_conn_back_write(map);
+		if (atomic_read(&map->read) > 0 &&
+		    pvcalls_conn_back_read(map))
+			eoi_flags = 0;
+		if (atomic_read(&map->write) > 0 &&
+		    pvcalls_conn_back_write(map))
+			eoi_flags = 0;
+
+		if (atomic_read(&map->eoi) > 0 && !atomic_read(&map->write)) {
+			atomic_set(&map->eoi, 0);
+			xen_irq_lateeoi(map->irq, eoi_flags);
+			eoi_flags = XEN_EOI_FLAG_SPURIOUS;
+		}

 		atomic_dec(&map->io);
 	}
@@ -334,12 +347,9 @@ static struct sock_mapping *pvcalls_new_active_socket(
 		goto out;
 	map->bytes = page;

-	ret = bind_interdomain_evtchn_to_irqhandler(fedata->dev->otherend_id,
-						    evtchn,
-						    pvcalls_back_conn_event,
-						    0,
-						    "pvcalls-backend",
-						    map);
+	ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
+			fedata->dev->otherend_id, evtchn,
+			pvcalls_back_conn_event, 0, "pvcalls-backend", map);
 	if (ret < 0)
 		goto out;
 	map->irq = ret;
@@ -873,15 +883,18 @@ static irqreturn_t pvcalls_back_event(int irq, void *dev_id)
 {
 	struct xenbus_device *dev = dev_id;
 	struct pvcalls_fedata *fedata = NULL;
+	unsigned int eoi_flags = XEN_EOI_FLAG_SPURIOUS;

-	if (dev == NULL)
-		return IRQ_HANDLED;
-
+	if (dev) {
 		fedata = dev_get_drvdata(&dev->dev);
-	if (fedata == NULL)
-		return IRQ_HANDLED;
-
+		if (fedata) {
 			pvcalls_back_work(fedata);
+			eoi_flags = 0;
+		}
+	}
+
+	xen_irq_lateeoi(irq, eoi_flags);
+
 	return IRQ_HANDLED;
 }

@@ -891,12 +904,15 @@ static irqreturn_t pvcalls_back_conn_event(int irq, void *sock_map)
 	struct pvcalls_ioworker *iow;

 	if (map == NULL || map->sock == NULL || map->sock->sk == NULL ||
-		map->sock->sk->sk_user_data != map)
+		map->sock->sk->sk_user_data != map) {
+		xen_irq_lateeoi(irq, 0);
 		return IRQ_HANDLED;
+	}

 	iow = &map->ioworker;

 	atomic_inc(&map->write);
+	atomic_inc(&map->eoi);
 	atomic_inc(&map->io);
 	queue_work(iow->wq, &iow->register_work);

@@ -932,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev)
 		goto error;
 	}

-	err = bind_interdomain_evtchn_to_irq(dev->otherend_id, evtchn);
+	err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
 	if (err < 0)
 		goto error;
 	fedata->irq = err;