Commit f4f30031 authored by Dean Luick's avatar Dean Luick Committed by Greg Kroah-Hartman

staging/rdma/hfi1: Thread the receive interrupt.

When under heavy load, the receive interrupt handler can run too long with IRQs
disabled.  Add a mixed-mode threading scheme.  Initially process packets in the
handler for quick responses (latency).  If there are too many packets to
process move to a thread to continue (bandwidth).
Reviewed-by: default avatarMike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: default avatarDean Luick <dean.luick@intel.com>
Signed-off-by: default avatarIra Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b77d713a
...@@ -4424,7 +4424,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) ...@@ -4424,7 +4424,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
rcd = dd->rcd[source]; rcd = dd->rcd[source];
if (rcd) { if (rcd) {
if (source < dd->first_user_ctxt) if (source < dd->first_user_ctxt)
rcd->do_interrupt(rcd); rcd->do_interrupt(rcd, 0);
else else
handle_user_interrupt(rcd); handle_user_interrupt(rcd);
return; /* OK */ return; /* OK */
...@@ -4590,23 +4590,106 @@ static irqreturn_t sdma_interrupt(int irq, void *data) ...@@ -4590,23 +4590,106 @@ static irqreturn_t sdma_interrupt(int irq, void *data)
} }
/* /*
* NOTE: this routine expects to be on its own MSI-X interrupt. If * Clear the receive interrupt, forcing the write and making sure
* multiple receive contexts share the same MSI-X interrupt, then this * we have data from the chip, pushing everything in front of it
* routine must check for who received it. * back to the host.
*/
static inline void clear_recv_intr(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 addr = CCE_INT_CLEAR + (8 * rcd->ireg);
mmiowb(); /* make sure everything before is written */
write_csr(dd, addr, rcd->imask);
/* force the above write on the chip and get a value back */
(void)read_csr(dd, addr);
}
/* force the receive interrupt */
static inline void force_recv_intr(struct hfi1_ctxtdata *rcd)
{
write_csr(rcd->dd, CCE_INT_FORCE + (8 * rcd->ireg), rcd->imask);
}
/* return non-zero if a packet is present */
static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
{
if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
return (rcd->seq_cnt ==
rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
/* else is RDMA rtail */
return (rcd->head != get_rcvhdrtail(rcd));
}
/*
* Receive packet IRQ handler. This routine expects to be on its own IRQ.
* This routine will try to handle packets immediately (latency), but if
* it finds too many, it will invoke the thread handler (bandwitdh). The
* chip receive interupt is *not* cleared down until this or the thread (if
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/ */
static irqreturn_t receive_context_interrupt(int irq, void *data) static irqreturn_t receive_context_interrupt(int irq, void *data)
{ {
struct hfi1_ctxtdata *rcd = data; struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd; struct hfi1_devdata *dd = rcd->dd;
int disposition;
int present;
trace_hfi1_receive_interrupt(dd, rcd->ctxt); trace_hfi1_receive_interrupt(dd, rcd->ctxt);
this_cpu_inc(*dd->int_counter); this_cpu_inc(*dd->int_counter);
/* clear the interrupt */ /* receive interrupt remains blocked while processing packets */
write_csr(rcd->dd, CCE_INT_CLEAR + (8*rcd->ireg), rcd->imask); disposition = rcd->do_interrupt(rcd, 0);
/* handle the interrupt */ /*
rcd->do_interrupt(rcd); * Too many packets were seen while processing packets in this
* IRQ handler. Invoke the handler thread. The receive interrupt
* remains blocked.
*/
if (disposition == RCV_PKT_LIMIT)
return IRQ_WAKE_THREAD;
/*
* The packet processor detected no more packets. Clear the receive
* interrupt and recheck for a packet packet that may have arrived
* after the previous check and interrupt clear. If a packet arrived,
* force another interrupt.
*/
clear_recv_intr(rcd);
present = check_packet_present(rcd);
if (present)
force_recv_intr(rcd);
return IRQ_HANDLED;
}
/*
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
static irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
/* receive interrupt is still blocked from the IRQ handler */
(void)rcd->do_interrupt(rcd, 1);
/*
* The packet processor will only return if it detected no more
* packets. Hold IRQs here so we can safely clear the interrupt and
* recheck for a packet that may have arrived after the previous
* check and the interrupt clear. If a packet arrived, force another
* interrupt.
*/
local_irq_disable();
clear_recv_intr(rcd);
present = check_packet_present(rcd);
if (present)
force_recv_intr(rcd);
local_irq_enable();
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -8858,6 +8941,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ...@@ -8858,6 +8941,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
struct hfi1_msix_entry *me = &dd->msix_entries[i]; struct hfi1_msix_entry *me = &dd->msix_entries[i];
const char *err_info; const char *err_info;
irq_handler_t handler; irq_handler_t handler;
irq_handler_t thread = NULL;
void *arg; void *arg;
int idx; int idx;
struct hfi1_ctxtdata *rcd = NULL; struct hfi1_ctxtdata *rcd = NULL;
...@@ -8894,6 +8978,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ...@@ -8894,6 +8978,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
rcd->imask = ((u64)1) << rcd->imask = ((u64)1) <<
((IS_RCVAVAIL_START+idx) % 64); ((IS_RCVAVAIL_START+idx) % 64);
handler = receive_context_interrupt; handler = receive_context_interrupt;
thread = receive_context_thread;
arg = rcd; arg = rcd;
snprintf(me->name, sizeof(me->name), snprintf(me->name, sizeof(me->name),
DRIVER_NAME"_%d kctxt%d", dd->unit, idx); DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
...@@ -8912,7 +8997,8 @@ static int request_msix_irqs(struct hfi1_devdata *dd) ...@@ -8912,7 +8997,8 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
/* make sure the name is terminated */ /* make sure the name is terminated */
me->name[sizeof(me->name)-1] = 0; me->name[sizeof(me->name)-1] = 0;
ret = request_irq(me->msix.vector, handler, 0, me->name, arg); ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
me->name, arg);
if (ret) { if (ret) {
dd_dev_err(dd, dd_dev_err(dd,
"unable to allocate %s interrupt, vector %d, index %d, err %d\n", "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
......
...@@ -427,8 +427,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, ...@@ -427,8 +427,7 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rcd = rcd; packet->rcd = rcd;
packet->updegr = 0; packet->updegr = 0;
packet->etail = -1; packet->etail = -1;
packet->rhf_addr = (__le32 *) rcd->rcvhdrq + rcd->head + packet->rhf_addr = get_rhf_addr(rcd);
rcd->dd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr); packet->rhf = rhf_to_cpu(packet->rhf_addr);
packet->rhqoff = rcd->head; packet->rhqoff = rcd->head;
packet->numpkt = 0; packet->numpkt = 0;
...@@ -619,10 +618,7 @@ static void prescan_rxq(struct hfi1_packet *packet) ...@@ -619,10 +618,7 @@ static void prescan_rxq(struct hfi1_packet *packet)
} }
#endif /* CONFIG_PRESCAN_RXQ */ #endif /* CONFIG_PRESCAN_RXQ */
#define RCV_PKT_OK 0x0 static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
#define RCV_PKT_MAX 0x1
static inline int process_rcv_packet(struct hfi1_packet *packet)
{ {
int ret = RCV_PKT_OK; int ret = RCV_PKT_OK;
...@@ -664,9 +660,13 @@ static inline int process_rcv_packet(struct hfi1_packet *packet) ...@@ -664,9 +660,13 @@ static inline int process_rcv_packet(struct hfi1_packet *packet)
if (packet->rhqoff >= packet->maxcnt) if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0; packet->rhqoff = 0;
if (packet->numpkt == MAX_PKT_RECV) { if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
ret = RCV_PKT_MAX; if (thread) {
this_cpu_inc(*packet->rcd->dd->rcv_limit); cond_resched();
} else {
ret = RCV_PKT_LIMIT;
this_cpu_inc(*packet->rcd->dd->rcv_limit);
}
} }
packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff + packet->rhf_addr = (__le32 *) packet->rcd->rcvhdrq + packet->rhqoff +
...@@ -743,57 +743,63 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) ...@@ -743,57 +743,63 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
/* /*
* Handle receive interrupts when using the no dma rtail option. * Handle receive interrupts when using the no dma rtail option.
*/ */
void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd) int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{ {
u32 seq; u32 seq;
int last = 0; int last = RCV_PKT_OK;
struct hfi1_packet packet; struct hfi1_packet packet;
init_packet(rcd, &packet); init_packet(rcd, &packet);
seq = rhf_rcv_seq(packet.rhf); seq = rhf_rcv_seq(packet.rhf);
if (seq != rcd->seq_cnt) if (seq != rcd->seq_cnt) {
last = RCV_PKT_DONE;
goto bail; goto bail;
}
prescan_rxq(&packet); prescan_rxq(&packet);
while (!last) { while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet); last = process_rcv_packet(&packet, thread);
seq = rhf_rcv_seq(packet.rhf); seq = rhf_rcv_seq(packet.rhf);
if (++rcd->seq_cnt > 13) if (++rcd->seq_cnt > 13)
rcd->seq_cnt = 1; rcd->seq_cnt = 1;
if (seq != rcd->seq_cnt) if (seq != rcd->seq_cnt)
last = 1; last = RCV_PKT_DONE;
process_rcv_update(last, &packet); process_rcv_update(last, &packet);
} }
process_rcv_qp_work(&packet); process_rcv_qp_work(&packet);
bail: bail:
finish_packet(&packet); finish_packet(&packet);
return last;
} }
void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd) int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
{ {
u32 hdrqtail; u32 hdrqtail;
int last = 0; int last = RCV_PKT_OK;
struct hfi1_packet packet; struct hfi1_packet packet;
init_packet(rcd, &packet); init_packet(rcd, &packet);
hdrqtail = get_rcvhdrtail(rcd); hdrqtail = get_rcvhdrtail(rcd);
if (packet.rhqoff == hdrqtail) if (packet.rhqoff == hdrqtail) {
last = RCV_PKT_DONE;
goto bail; goto bail;
}
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
prescan_rxq(&packet); prescan_rxq(&packet);
while (!last) { while (last == RCV_PKT_OK) {
last = process_rcv_packet(&packet); last = process_rcv_packet(&packet, thread);
hdrqtail = get_rcvhdrtail(rcd);
if (packet.rhqoff == hdrqtail) if (packet.rhqoff == hdrqtail)
last = 1; last = RCV_PKT_DONE;
process_rcv_update(last, &packet); process_rcv_update(last, &packet);
} }
process_rcv_qp_work(&packet); process_rcv_qp_work(&packet);
bail: bail:
finish_packet(&packet); finish_packet(&packet);
return last;
} }
static inline void set_all_nodma_rtail(struct hfi1_devdata *dd) static inline void set_all_nodma_rtail(struct hfi1_devdata *dd)
...@@ -821,12 +827,11 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd) ...@@ -821,12 +827,11 @@ static inline void set_all_dma_rtail(struct hfi1_devdata *dd)
* Called from interrupt handler for errors or receive interrupt. * Called from interrupt handler for errors or receive interrupt.
* This is the slow path interrupt handler. * This is the slow path interrupt handler.
*/ */
void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
{ {
struct hfi1_devdata *dd = rcd->dd; struct hfi1_devdata *dd = rcd->dd;
u32 hdrqtail; u32 hdrqtail;
int last = 0, needset = 1; int last = RCV_PKT_OK, needset = 1;
struct hfi1_packet packet; struct hfi1_packet packet;
init_packet(rcd, &packet); init_packet(rcd, &packet);
...@@ -834,19 +839,23 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) ...@@ -834,19 +839,23 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
u32 seq = rhf_rcv_seq(packet.rhf); u32 seq = rhf_rcv_seq(packet.rhf);
if (seq != rcd->seq_cnt) if (seq != rcd->seq_cnt) {
last = RCV_PKT_DONE;
goto bail; goto bail;
}
hdrqtail = 0; hdrqtail = 0;
} else { } else {
hdrqtail = get_rcvhdrtail(rcd); hdrqtail = get_rcvhdrtail(rcd);
if (packet.rhqoff == hdrqtail) if (packet.rhqoff == hdrqtail) {
last = RCV_PKT_DONE;
goto bail; goto bail;
}
smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ smp_rmb(); /* prevent speculative reads of dma'ed hdrq */
} }
prescan_rxq(&packet); prescan_rxq(&packet);
while (!last) { while (last == RCV_PKT_OK) {
if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet, if (unlikely(dd->do_drop && atomic_xchg(&dd->drop_packet,
DROP_PACKET_OFF) == DROP_PACKET_ON)) { DROP_PACKET_OFF) == DROP_PACKET_ON)) {
...@@ -860,7 +869,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) ...@@ -860,7 +869,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
packet.rhf = rhf_to_cpu(packet.rhf_addr); packet.rhf = rhf_to_cpu(packet.rhf_addr);
} else { } else {
last = process_rcv_packet(&packet); last = process_rcv_packet(&packet, thread);
} }
if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) { if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) {
...@@ -869,7 +878,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) ...@@ -869,7 +878,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
if (++rcd->seq_cnt > 13) if (++rcd->seq_cnt > 13)
rcd->seq_cnt = 1; rcd->seq_cnt = 1;
if (seq != rcd->seq_cnt) if (seq != rcd->seq_cnt)
last = 1; last = RCV_PKT_DONE;
if (needset) { if (needset) {
dd_dev_info(dd, dd_dev_info(dd,
"Switching to NO_DMA_RTAIL\n"); "Switching to NO_DMA_RTAIL\n");
...@@ -878,7 +887,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) ...@@ -878,7 +887,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
} }
} else { } else {
if (packet.rhqoff == hdrqtail) if (packet.rhqoff == hdrqtail)
last = 1; last = RCV_PKT_DONE;
if (needset) { if (needset) {
dd_dev_info(dd, dd_dev_info(dd,
"Switching to DMA_RTAIL\n"); "Switching to DMA_RTAIL\n");
...@@ -898,6 +907,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd) ...@@ -898,6 +907,7 @@ void handle_receive_interrupt(struct hfi1_ctxtdata *rcd)
* if no packets were processed. * if no packets were processed.
*/ */
finish_packet(&packet); finish_packet(&packet);
return last;
} }
/* /*
......
...@@ -313,7 +313,7 @@ struct hfi1_ctxtdata { ...@@ -313,7 +313,7 @@ struct hfi1_ctxtdata {
* be valid. Worst case is we process an extra interrupt and up to 64 * be valid. Worst case is we process an extra interrupt and up to 64
* packets with the wrong interrupt handler. * packets with the wrong interrupt handler.
*/ */
void (*do_interrupt)(struct hfi1_ctxtdata *rcd); int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
}; };
/* /*
...@@ -1130,9 +1130,21 @@ void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, ...@@ -1130,9 +1130,21 @@ void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *,
struct hfi1_devdata *, u8, u8); struct hfi1_devdata *, u8, u8);
void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *);
void handle_receive_interrupt(struct hfi1_ctxtdata *); int handle_receive_interrupt(struct hfi1_ctxtdata *, int);
void handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int);
void handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd); int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int);
/* receive packet handler dispositions */
#define RCV_PKT_OK 0x0 /* keep going */
#define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */
#define RCV_PKT_DONE 0x2 /* stop, no more packets detected */
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset;
}
int hfi1_reset_device(int); int hfi1_reset_device(int);
/* return the driver's idea of the logical OPA port state */ /* return the driver's idea of the logical OPA port state */
......
...@@ -2096,9 +2096,9 @@ int sdma_send_txreq(struct sdma_engine *sde, ...@@ -2096,9 +2096,9 @@ int sdma_send_txreq(struct sdma_engine *sde,
tx->sn = sde->tail_sn++; tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn); trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif #endif
spin_lock_irqsave(&sde->flushlist_lock, flags); spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist); list_add_tail(&tx->list, &sde->flushlist);
spin_unlock_irqrestore(&sde->flushlist_lock, flags); spin_unlock(&sde->flushlist_lock);
if (wait) { if (wait) {
wait->tx_count++; wait->tx_count++;
wait->count += tx->num_desc; wait->count += tx->num_desc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment