Commit c2711441 authored by Juergen Gross's avatar Juergen Gross

xen/pciback: use lateeoi irq binding

In order to reduce the chance for the system becoming unresponsive due
to event storms triggered by a misbehaving pcifront use the lateeoi irq
binding for pciback and unmask the event channel only just before
leaving the event handling function.

Restructure the handling to support that scheme. Basically an event can
come in for two reasons: either a normal request for a pciback action,
which is handled in a worker, or in case the guest has finished an AER
request which was requested by pciback.

When an AER request is issued to the guest and a normal pciback action
is currently active issue an EOI early in order to be able to receive
another event when the AER request has been finished by the guest.

Let the worker processing the normal requests run until no further
request is pending, instead of starting a new worker ion that case.
Issue the EOI only just before leaving the worker.

This scheme allows to drop calling the generic function
xen_pcibk_test_and_schedule_op() after processing of any request as
the handling of both request types is now separated more cleanly.

This is part of XSA-332.

Cc: stable@vger.kernel.org
Reported-by: default avatarJulien Grall <julien@xen.org>
Signed-off-by: default avatarJuergen Gross <jgross@suse.com>
Reviewed-by: default avatarJan Beulich <jbeulich@suse.com>
Reviewed-by: default avatarWei Liu <wl@xen.org>
parent c8d647a3
...@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, ...@@ -734,10 +734,17 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
wmb(); wmb();
notify_remote_via_irq(pdev->evtchn_irq); notify_remote_via_irq(pdev->evtchn_irq);
/* Enable IRQ to signal "request done". */
xen_pcibk_lateeoi(pdev, 0);
ret = wait_event_timeout(xen_pcibk_aer_wait_queue, ret = wait_event_timeout(xen_pcibk_aer_wait_queue,
!(test_bit(_XEN_PCIB_active, (unsigned long *) !(test_bit(_XEN_PCIB_active, (unsigned long *)
&sh_info->flags)), 300*HZ); &sh_info->flags)), 300*HZ);
/* Enable IRQ for pcifront request if not already active. */
if (!test_bit(_PDEVF_op_active, &pdev->flags))
xen_pcibk_lateeoi(pdev, 0);
if (!ret) { if (!ret) {
if (test_bit(_XEN_PCIB_active, if (test_bit(_XEN_PCIB_active,
(unsigned long *)&sh_info->flags)) { (unsigned long *)&sh_info->flags)) {
...@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, ...@@ -751,12 +758,6 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev,
} }
clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags); clear_bit(_PCIB_op_pending, (unsigned long *)&pdev->flags);
if (test_bit(_XEN_PCIF_active,
(unsigned long *)&sh_info->flags)) {
dev_dbg(&psdev->dev->dev, "schedule pci_conf service\n");
xen_pcibk_test_and_schedule_op(psdev->pdev);
}
res = (pci_ers_result_t)aer_op->err; res = (pci_ers_result_t)aer_op->err;
return res; return res;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <xen/events.h>
#include <xen/interface/io/pciif.h> #include <xen/interface/io/pciif.h>
#define DRV_NAME "xen-pciback" #define DRV_NAME "xen-pciback"
...@@ -27,6 +28,8 @@ struct pci_dev_entry { ...@@ -27,6 +28,8 @@ struct pci_dev_entry {
#define PDEVF_op_active (1<<(_PDEVF_op_active)) #define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1) #define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending)) #define PCIB_op_pending (1<<(_PCIB_op_pending))
#define _EOI_pending (2)
#define EOI_pending (1<<(_EOI_pending))
struct xen_pcibk_device { struct xen_pcibk_device {
void *pci_dev_data; void *pci_dev_data;
...@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) ...@@ -183,10 +186,15 @@ static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data); void xen_pcibk_do_op(struct work_struct *data);
static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
unsigned int eoi_flag)
{
if (test_and_clear_bit(_EOI_pending, &pdev->flags))
xen_irq_lateeoi(pdev->evtchn_irq, eoi_flag);
}
int xen_pcibk_xenbus_register(void); int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void); void xen_pcibk_xenbus_unregister(void);
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif #endif
/* Handles shared IRQs that can to device domain and control domain. */ /* Handles shared IRQs that can to device domain and control domain. */
......
...@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, ...@@ -276,26 +276,41 @@ int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
return 0; return 0;
} }
#endif #endif
static inline bool xen_pcibk_test_op_pending(struct xen_pcibk_device *pdev)
{
return test_bit(_XEN_PCIF_active,
(unsigned long *)&pdev->sh_info->flags) &&
!test_and_set_bit(_PDEVF_op_active, &pdev->flags);
}
/* /*
* Now the same evtchn is used for both pcifront conf_read_write request * Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule * as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core * xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue * do_recovery job which also use the system default work_queue
*/ */
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) static void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{ {
bool eoi = true;
/* Check that frontend is requesting an operation and that we are not /* Check that frontend is requesting an operation and that we are not
* already processing a request */ * already processing a request */
if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) if (xen_pcibk_test_op_pending(pdev)) {
&& !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
schedule_work(&pdev->op_work); schedule_work(&pdev->op_work);
eoi = false;
} }
/*_XEN_PCIB_active should have been cleared by pcifront. And also make /*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) { && test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue); wake_up(&xen_pcibk_aer_wait_queue);
eoi = false;
} }
/* EOI if there was nothing to do. */
if (eoi)
xen_pcibk_lateeoi(pdev, XEN_EOI_FLAG_SPURIOUS);
} }
/* Performing the configuration space reads/writes must not be done in atomic /* Performing the configuration space reads/writes must not be done in atomic
...@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) ...@@ -303,10 +318,8 @@ void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
* use of semaphores). This function is intended to be called from a work * use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */ * queue in process context taking a struct xen_pcibk_device as a parameter */
void xen_pcibk_do_op(struct work_struct *data) static void xen_pcibk_do_one_op(struct xen_pcibk_device *pdev)
{ {
struct xen_pcibk_device *pdev =
container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev; struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op; struct xen_pci_op *op = &pdev->op;
...@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data) ...@@ -379,16 +392,31 @@ void xen_pcibk_do_op(struct work_struct *data)
smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags); clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_atomic(); /* /before/ final check for work */ smp_mb__after_atomic(); /* /before/ final check for work */
}
/* Check to see if the driver domain tried to start another request in void xen_pcibk_do_op(struct work_struct *data)
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. {
*/ struct xen_pcibk_device *pdev =
xen_pcibk_test_and_schedule_op(pdev); container_of(data, struct xen_pcibk_device, op_work);
do {
xen_pcibk_do_one_op(pdev);
} while (xen_pcibk_test_op_pending(pdev));
xen_pcibk_lateeoi(pdev, 0);
} }
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{ {
struct xen_pcibk_device *pdev = dev_id; struct xen_pcibk_device *pdev = dev_id;
bool eoi;
/* IRQs might come in before pdev->evtchn_irq is written. */
if (unlikely(pdev->evtchn_irq != irq))
pdev->evtchn_irq = irq;
eoi = test_and_set_bit(_EOI_pending, &pdev->flags);
WARN(eoi, "IRQ while EOI pending\n");
xen_pcibk_test_and_schedule_op(pdev); xen_pcibk_test_and_schedule_op(pdev);
......
...@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, ...@@ -123,7 +123,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr; pdev->sh_info = vaddr;
err = bind_interdomain_evtchn_to_irqhandler( err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev); 0, DRV_NAME, pdev);
if (err < 0) { if (err < 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment