Commit 1781dd37 authored by Michael S. Tsirkin's avatar Michael S. Tsirkin Committed by Linus Torvalds

[PATCH] IB/mthca: QP locking optimization

1. Split the QP spinlock into separate send and receive locks.

   The only place where we have to lock both is upon modify_qp, and
   that is not on data path.

2. Avoid taking any QP locks when polling CQ.

   This last part is achieved by getting rid of the cur field in
   mthca_wq, and calculating the number of outstanding WQEs by
   comparing the head and tail fields.  head is only updated by
   post, tail is only updated by poll.

   In a rare case where an overrun is detected, a CQ is locked and the
   overrun condition is re-tested, to avoid any potential for stale
   tail values.
Signed-off-by: default avatarMichael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: default avatarRoland Dreier <roland@topspin.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c9171ea9
...@@ -423,15 +423,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev, ...@@ -423,15 +423,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80; is_send = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) { if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
if (*cur_qp) {
if (*freed) {
wmb();
update_cons_index(dev, cq, *freed);
*freed = 0;
}
spin_unlock(&(*cur_qp)->lock);
}
/* /*
* We do not have to take the QP table lock here, * We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed * because CQs will be locked while QPs are removed
...@@ -446,8 +437,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev, ...@@ -446,8 +437,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
err = -EINVAL; err = -EINVAL;
goto out; goto out;
} }
spin_lock(&(*cur_qp)->lock);
} }
entry->qp_num = (*cur_qp)->qpn; entry->qp_num = (*cur_qp)->qpn;
...@@ -465,9 +454,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev, ...@@ -465,9 +454,9 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
} }
if (wq->last_comp < wqe_index) if (wq->last_comp < wqe_index)
wq->cur -= wqe_index - wq->last_comp; wq->tail += wqe_index - wq->last_comp;
else else
wq->cur -= wq->max - wq->last_comp + wqe_index; wq->tail += wqe_index + wq->max - wq->last_comp;
wq->last_comp = wqe_index; wq->last_comp = wqe_index;
...@@ -551,9 +540,6 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, ...@@ -551,9 +540,6 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
update_cons_index(dev, cq, freed); update_cons_index(dev, cq, freed);
} }
if (qp)
spin_unlock(&qp->lock);
spin_unlock_irqrestore(&cq->lock, flags); spin_unlock_irqrestore(&cq->lock, flags);
return err == 0 || err == -EAGAIN ? npolled : err; return err == 0 || err == -EAGAIN ? npolled : err;
......
...@@ -166,10 +166,12 @@ struct mthca_cq { ...@@ -166,10 +166,12 @@ struct mthca_cq {
}; };
struct mthca_wq { struct mthca_wq {
spinlock_t lock;
int max; int max;
int cur; unsigned next_ind;
int next; unsigned last_comp;
int last_comp; unsigned head;
unsigned tail;
void *last; void *last;
int max_gs; int max_gs;
int wqe_shift; int wqe_shift;
...@@ -180,7 +182,6 @@ struct mthca_wq { ...@@ -180,7 +182,6 @@ struct mthca_wq {
struct mthca_qp { struct mthca_qp {
struct ib_qp ibqp; struct ib_qp ibqp;
spinlock_t lock;
atomic_t refcount; atomic_t refcount;
u32 qpn; u32 qpn;
int is_direct; int is_direct;
......
...@@ -577,9 +577,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) ...@@ -577,9 +577,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
else else
cur_state = attr->cur_qp_state; cur_state = attr->cur_qp_state;
} else { } else {
spin_lock_irq(&qp->lock); spin_lock_irq(&qp->sq.lock);
spin_lock(&qp->rq.lock);
cur_state = qp->state; cur_state = qp->state;
spin_unlock_irq(&qp->lock); spin_unlock(&qp->rq.lock);
spin_unlock_irq(&qp->sq.lock);
} }
if (attr_mask & IB_QP_STATE) { if (attr_mask & IB_QP_STATE) {
...@@ -1076,6 +1078,16 @@ static void mthca_free_memfree(struct mthca_dev *dev, ...@@ -1076,6 +1078,16 @@ static void mthca_free_memfree(struct mthca_dev *dev,
} }
} }
static void mthca_wq_init(struct mthca_wq* wq)
{
spin_lock_init(&wq->lock);
wq->next_ind = 0;
wq->last_comp = wq->max - 1;
wq->head = 0;
wq->tail = 0;
wq->last = NULL;
}
static int mthca_alloc_qp_common(struct mthca_dev *dev, static int mthca_alloc_qp_common(struct mthca_dev *dev,
struct mthca_pd *pd, struct mthca_pd *pd,
struct mthca_cq *send_cq, struct mthca_cq *send_cq,
...@@ -1087,20 +1099,13 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, ...@@ -1087,20 +1099,13 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
int ret; int ret;
int i; int i;
spin_lock_init(&qp->lock);
atomic_set(&qp->refcount, 1); atomic_set(&qp->refcount, 1);
qp->state = IB_QPS_RESET; qp->state = IB_QPS_RESET;
qp->atomic_rd_en = 0; qp->atomic_rd_en = 0;
qp->resp_depth = 0; qp->resp_depth = 0;
qp->sq_policy = send_policy; qp->sq_policy = send_policy;
qp->rq.cur = 0; mthca_wq_init(&qp->sq);
qp->sq.cur = 0; mthca_wq_init(&qp->rq);
qp->rq.next = 0;
qp->sq.next = 0;
qp->rq.last_comp = qp->rq.max - 1;
qp->sq.last_comp = qp->sq.max - 1;
qp->rq.last = NULL;
qp->sq.last = NULL;
ret = mthca_alloc_memfree(dev, qp); ret = mthca_alloc_memfree(dev, qp);
if (ret) if (ret)
...@@ -1394,6 +1399,24 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, ...@@ -1394,6 +1399,24 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
return 0; return 0;
} }
static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
struct ib_cq *ib_cq)
{
unsigned cur;
struct mthca_cq *cq;
cur = wq->head - wq->tail;
if (likely(cur + nreq < wq->max))
return 0;
cq = to_mcq(ib_cq);
spin_lock(&cq->lock);
cur = wq->head - wq->tail;
spin_unlock(&cq->lock);
return cur + nreq >= wq->max;
}
int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr) struct ib_send_wr **bad_wr)
{ {
...@@ -1411,16 +1434,18 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1411,16 +1434,18 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int ind; int ind;
u8 op0 = 0; u8 op0 = 0;
spin_lock_irqsave(&qp->lock, flags); spin_lock_irqsave(&qp->sq.lock, flags);
/* XXX check that state is OK to post send */ /* XXX check that state is OK to post send */
ind = qp->sq.next; ind = qp->sq.next_ind;
for (nreq = 0; wr; ++nreq, wr = wr->next) { for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (qp->sq.cur + nreq >= qp->sq.max) { if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n", mthca_err(dev, "SQ %06x full (%u head, %u tail,"
qp->sq.cur, qp->sq.max, nreq); " %d max, %d nreq)\n", qp->qpn,
qp->sq.head, qp->sq.tail,
qp->sq.max, nreq);
err = -ENOMEM; err = -ENOMEM;
*bad_wr = wr; *bad_wr = wr;
goto out; goto out;
...@@ -1580,7 +1605,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1580,7 +1605,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (likely(nreq)) { if (likely(nreq)) {
u32 doorbell[2]; u32 doorbell[2];
doorbell[0] = cpu_to_be32(((qp->sq.next << qp->sq.wqe_shift) + doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
qp->send_wqe_offset) | f0 | op0); qp->send_wqe_offset) | f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
...@@ -1591,10 +1616,10 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1591,10 +1616,10 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
qp->sq.cur += nreq; qp->sq.next_ind = ind;
qp->sq.next = ind; qp->sq.head += nreq;
spin_unlock_irqrestore(&qp->lock, flags); spin_unlock_irqrestore(&qp->sq.lock, flags);
return err; return err;
} }
...@@ -1613,15 +1638,18 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1613,15 +1638,18 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
void *wqe; void *wqe;
void *prev_wqe; void *prev_wqe;
spin_lock_irqsave(&qp->lock, flags); spin_lock_irqsave(&qp->rq.lock, flags);
/* XXX check that state is OK to post receive */ /* XXX check that state is OK to post receive */
ind = qp->rq.next; ind = qp->rq.next_ind;
for (nreq = 0; wr; ++nreq, wr = wr->next) { for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) { if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
mthca_err(dev, "RQ %06x full\n", qp->qpn); mthca_err(dev, "RQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
qp->rq.head, qp->rq.tail,
qp->rq.max, nreq);
err = -ENOMEM; err = -ENOMEM;
*bad_wr = wr; *bad_wr = wr;
goto out; goto out;
...@@ -1678,7 +1706,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1678,7 +1706,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
if (likely(nreq)) { if (likely(nreq)) {
u32 doorbell[2]; u32 doorbell[2];
doorbell[0] = cpu_to_be32((qp->rq.next << qp->rq.wqe_shift) | size0); doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
wmb(); wmb();
...@@ -1688,10 +1716,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1688,10 +1716,10 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
qp->rq.cur += nreq; qp->rq.next_ind = ind;
qp->rq.next = ind; qp->rq.head += nreq;
spin_unlock_irqrestore(&qp->lock, flags); spin_unlock_irqrestore(&qp->rq.lock, flags);
return err; return err;
} }
...@@ -1712,16 +1740,18 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1712,16 +1740,18 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int ind; int ind;
u8 op0 = 0; u8 op0 = 0;
spin_lock_irqsave(&qp->lock, flags); spin_lock_irqsave(&qp->sq.lock, flags);
/* XXX check that state is OK to post send */ /* XXX check that state is OK to post send */
ind = qp->sq.next & (qp->sq.max - 1); ind = qp->sq.head & (qp->sq.max - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) { for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (qp->sq.cur + nreq >= qp->sq.max) { if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n", mthca_err(dev, "SQ %06x full (%u head, %u tail,"
qp->sq.cur, qp->sq.max, nreq); " %d max, %d nreq)\n", qp->qpn,
qp->sq.head, qp->sq.tail,
qp->sq.max, nreq);
err = -ENOMEM; err = -ENOMEM;
*bad_wr = wr; *bad_wr = wr;
goto out; goto out;
...@@ -1831,19 +1861,18 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1831,19 +1861,18 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
u32 doorbell[2]; u32 doorbell[2];
doorbell[0] = cpu_to_be32((nreq << 24) | doorbell[0] = cpu_to_be32((nreq << 24) |
((qp->sq.next & 0xffff) << 8) | ((qp->sq.head & 0xffff) << 8) |
f0 | op0); f0 | op0);
doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
qp->sq.cur += nreq; qp->sq.head += nreq;
qp->sq.next += nreq;
/* /*
* Make sure that descriptors are written before * Make sure that descriptors are written before
* doorbell record. * doorbell record.
*/ */
wmb(); wmb();
*qp->sq.db = cpu_to_be32(qp->sq.next & 0xffff); *qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
/* /*
* Make sure doorbell record is written before we * Make sure doorbell record is written before we
...@@ -1855,7 +1884,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, ...@@ -1855,7 +1884,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
} }
spin_unlock_irqrestore(&qp->lock, flags); spin_unlock_irqrestore(&qp->sq.lock, flags);
return err; return err;
} }
...@@ -1871,15 +1900,18 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1871,15 +1900,18 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int i; int i;
void *wqe; void *wqe;
spin_lock_irqsave(&qp->lock, flags); spin_lock_irqsave(&qp->rq.lock, flags);
/* XXX check that state is OK to post receive */ /* XXX check that state is OK to post receive */
ind = qp->rq.next & (qp->rq.max - 1); ind = qp->rq.head & (qp->rq.max - 1);
for (nreq = 0; wr; ++nreq, wr = wr->next) { for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(qp->rq.cur + nreq >= qp->rq.max)) { if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
mthca_err(dev, "RQ %06x full\n", qp->qpn); mthca_err(dev, "RQ %06x full (%u head, %u tail,"
" %d max, %d nreq)\n", qp->qpn,
qp->rq.head, qp->rq.tail,
qp->rq.max, nreq);
err = -ENOMEM; err = -ENOMEM;
*bad_wr = wr; *bad_wr = wr;
goto out; goto out;
...@@ -1921,18 +1953,17 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, ...@@ -1921,18 +1953,17 @@ int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
} }
out: out:
if (likely(nreq)) { if (likely(nreq)) {
qp->rq.cur += nreq; qp->rq.head += nreq;
qp->rq.next += nreq;
/* /*
* Make sure that descriptors are written before * Make sure that descriptors are written before
* doorbell record. * doorbell record.
*/ */
wmb(); wmb();
*qp->rq.db = cpu_to_be32(qp->rq.next & 0xffff); *qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
} }
spin_unlock_irqrestore(&qp->lock, flags); spin_unlock_irqrestore(&qp->rq.lock, flags);
return err; return err;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment