Commit a0d40693 authored by Mitko Haralanov's avatar Mitko Haralanov Committed by Greg Kroah-Hartman

staging/rdma/hfi1: Add page lock limit check for SDMA requests

The driver pins pages on behalf of user processes in two
separate instances - when the process has submitted a
SDMA transfer and when the process programs an expected
receive buffer.

When pinning pages, the driver is required to observe the
locked page limit set by the system administrator and refuse
to lock more pages than allowed. Such a check was done for
expected receives but was missing from the SDMA transfer
code path.

This commit adds the missing check for SDMA transfers. As of
this commit, user SDMA or expected receive requests will be
rejected if the number of pages required to be pinned will
exceed the set limit.

Due to the fact that the driver needs to take the MM semaphore
in order to update the locked page count (which can sleep), this
cannot be done by the callback function as it [the callback] is
executed in interrupt context. Therefore, it is necessary to put
all the completed SDMA tx requests onto a separate list (txcmp) and
offload the actual clean-up and unpinning work to a workqueue.
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Signed-off-by: default avatarMitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent def82284
...@@ -213,12 +213,6 @@ struct user_sdma_request { ...@@ -213,12 +213,6 @@ struct user_sdma_request {
* to 0. * to 0.
*/ */
u8 omfactor; u8 omfactor;
/*
* pointer to the user's task_struct. We are going to
* get a reference to it so we can process io vectors
* at a later time.
*/
struct task_struct *user_proc;
/* /*
* pointer to the user's mm_struct. We are going to * pointer to the user's mm_struct. We are going to
* get a reference to it so it doesn't get freed * get a reference to it so it doesn't get freed
...@@ -245,9 +239,13 @@ struct user_sdma_request { ...@@ -245,9 +239,13 @@ struct user_sdma_request {
u16 tididx; u16 tididx;
u32 sent; u32 sent;
u64 seqnum; u64 seqnum;
spinlock_t list_lock;
struct list_head txps; struct list_head txps;
spinlock_t txcmp_lock; /* protect txcmp list */
struct list_head txcmp;
unsigned long flags; unsigned long flags;
/* status of the last txreq completed */
int status;
struct work_struct worker;
}; };
/* /*
...@@ -260,6 +258,7 @@ struct user_sdma_txreq { ...@@ -260,6 +258,7 @@ struct user_sdma_txreq {
/* Packet header for the txreq */ /* Packet header for the txreq */
struct hfi1_pkt_header hdr; struct hfi1_pkt_header hdr;
struct sdma_txreq txreq; struct sdma_txreq txreq;
struct list_head list;
struct user_sdma_request *req; struct user_sdma_request *req;
struct { struct {
struct user_sdma_iovec *vec; struct user_sdma_iovec *vec;
...@@ -282,10 +281,12 @@ struct user_sdma_txreq { ...@@ -282,10 +281,12 @@ struct user_sdma_txreq {
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned); static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
static int num_user_pages(const struct iovec *); static int num_user_pages(const struct iovec *);
static void user_sdma_txreq_cb(struct sdma_txreq *, int, int); static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
static void user_sdma_delayed_completion(struct work_struct *);
static void user_sdma_free_request(struct user_sdma_request *); static void user_sdma_free_request(struct user_sdma_request *);
static int pin_vector_pages(struct user_sdma_request *, static int pin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *); struct user_sdma_iovec *);
static void unpin_vector_pages(struct user_sdma_iovec *); static void unpin_vector_pages(struct user_sdma_request *,
struct user_sdma_iovec *);
static int check_header_template(struct user_sdma_request *, static int check_header_template(struct user_sdma_request *,
struct hfi1_pkt_header *, u32, u32); struct hfi1_pkt_header *, u32, u32);
static int set_txreq_header(struct user_sdma_request *, static int set_txreq_header(struct user_sdma_request *,
...@@ -391,6 +392,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp) ...@@ -391,6 +392,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
pq->n_max_reqs = hfi1_sdma_comp_ring_size; pq->n_max_reqs = hfi1_sdma_comp_ring_size;
pq->state = SDMA_PKT_Q_INACTIVE; pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0); atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
iowait_init(&pq->busy, 0, NULL, defer_packet_queue, iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
activate_packet_queue); activate_packet_queue);
...@@ -451,26 +453,16 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd) ...@@ -451,26 +453,16 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
uctxt->ctxt, fd->subctxt); uctxt->ctxt, fd->subctxt);
pq = fd->pq; pq = fd->pq;
if (pq) { if (pq) {
u16 i, j;
spin_lock_irqsave(&uctxt->sdma_qlock, flags); spin_lock_irqsave(&uctxt->sdma_qlock, flags);
if (!list_empty(&pq->list)) if (!list_empty(&pq->list))
list_del_init(&pq->list); list_del_init(&pq->list);
spin_unlock_irqrestore(&uctxt->sdma_qlock, flags); spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
iowait_sdma_drain(&pq->busy); iowait_sdma_drain(&pq->busy);
if (pq->reqs) { /* Wait until all requests have been freed. */
for (i = 0, j = 0; i < atomic_read(&pq->n_reqs) && wait_event_interruptible(
j < pq->n_max_reqs; j++) { pq->wait,
struct user_sdma_request *req = &pq->reqs[j]; (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
if (test_bit(SDMA_REQ_IN_USE, &req->flags)) {
set_comp_state(req, ERROR, -ECOMM);
user_sdma_free_request(req);
i++;
}
}
kfree(pq->reqs); kfree(pq->reqs);
}
kmem_cache_destroy(pq->txreq_cache); kmem_cache_destroy(pq->txreq_cache);
kfree(pq); kfree(pq);
fd->pq = NULL; fd->pq = NULL;
...@@ -544,8 +536,12 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -544,8 +536,12 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
req->data_iovs = req_iovcnt(info.ctrl) - 1; req->data_iovs = req_iovcnt(info.ctrl) - 1;
req->pq = pq; req->pq = pq;
req->cq = cq; req->cq = cq;
req->status = -1;
INIT_LIST_HEAD(&req->txps); INIT_LIST_HEAD(&req->txps);
spin_lock_init(&req->list_lock); INIT_LIST_HEAD(&req->txcmp);
INIT_WORK(&req->worker, user_sdma_delayed_completion);
spin_lock_init(&req->txcmp_lock);
memcpy(&req->info, &info, sizeof(info)); memcpy(&req->info, &info, sizeof(info));
if (req_opcode(info.ctrl) == EXPECTED) if (req_opcode(info.ctrl) == EXPECTED)
...@@ -685,18 +681,16 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -685,18 +681,16 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
sent = user_sdma_send_pkts(req, pcount); sent = user_sdma_send_pkts(req, pcount);
if (unlikely(sent < 0)) { if (unlikely(sent < 0)) {
if (sent != -EBUSY) { if (sent != -EBUSY) {
ret = sent; req->status = sent;
goto send_err; set_comp_state(req, ERROR, req->status);
return sent;
} else } else
sent = 0; sent = 0;
} }
atomic_inc(&pq->n_reqs); atomic_inc(&pq->n_reqs);
xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
if (sent < req->info.npkts) { if (sent < req->info.npkts) {
/* Take the references to the user's task and mm_struct */
get_task_struct(current);
req->user_proc = current;
/* /*
* This is a somewhat blocking send implementation. * This is a somewhat blocking send implementation.
* The driver will block the caller until all packets of the * The driver will block the caller until all packets of the
...@@ -706,8 +700,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -706,8 +700,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) { while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
ret = user_sdma_send_pkts(req, pcount); ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) { if (ret < 0) {
if (ret != -EBUSY) if (ret != -EBUSY) {
goto send_err; req->status = ret;
return ret;
}
wait_event_interruptible_timeout( wait_event_interruptible_timeout(
pq->busy.wait_dma, pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE), (pq->state == SDMA_PKT_Q_ACTIVE),
...@@ -717,14 +713,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, ...@@ -717,14 +713,10 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
} }
} }
ret = 0;
*count += idx; *count += idx;
goto done; return 0;
send_err:
set_comp_state(req, ERROR, ret);
free_req: free_req:
user_sdma_free_request(req); user_sdma_free_request(req);
done:
return ret; return ret;
} }
...@@ -825,6 +817,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) ...@@ -825,6 +817,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->req = req; tx->req = req;
tx->busycount = 0; tx->busycount = 0;
tx->idx = -1; tx->idx = -1;
INIT_LIST_HEAD(&tx->list);
memset(tx->iovecs, 0, sizeof(tx->iovecs)); memset(tx->iovecs, 0, sizeof(tx->iovecs));
if (req->seqnum == req->info.npkts - 1) if (req->seqnum == req->info.npkts - 1)
...@@ -949,8 +942,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) ...@@ -949,8 +942,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
if (ret) { if (ret) {
int i; int i;
dd_dev_err(pq->dd, SDMA_DBG(req, "SDMA txreq add page failed %d\n",
"SDMA txreq add page failed %d\n",
ret); ret);
/* Mark all assigned vectors as complete so they /* Mark all assigned vectors as complete so they
* are unpinned in the callback. */ * are unpinned in the callback. */
...@@ -1045,52 +1037,58 @@ static inline int num_user_pages(const struct iovec *iov) ...@@ -1045,52 +1037,58 @@ static inline int num_user_pages(const struct iovec *iov)
static int pin_vector_pages(struct user_sdma_request *req, static int pin_vector_pages(struct user_sdma_request *req,
struct user_sdma_iovec *iovec) { struct user_sdma_iovec *iovec) {
int ret = 0; int pinned, npages;
unsigned pinned;
iovec->npages = num_user_pages(&iovec->iov); npages = num_user_pages(&iovec->iov);
iovec->pages = kcalloc(iovec->npages, sizeof(*iovec->pages), iovec->pages = kcalloc(npages, sizeof(*iovec->pages), GFP_KERNEL);
GFP_KERNEL);
if (!iovec->pages) { if (!iovec->pages) {
SDMA_DBG(req, "Failed page array alloc"); SDMA_DBG(req, "Failed page array alloc");
ret = -ENOMEM; return -ENOMEM;
goto done;
} }
/* If called by the kernel thread, use the user's mm */
if (current->flags & PF_KTHREAD) /*
use_mm(req->user_proc->mm); * Get a reference to the process's mm so we can use it when
pinned = get_user_pages_fast( * unpinning the io vectors.
(unsigned long)iovec->iov.iov_base, */
iovec->npages, 0, iovec->pages); req->pq->user_mm = get_task_mm(current);
/* If called by the kernel thread, unuse the user's mm */
if (current->flags & PF_KTHREAD) pinned = hfi1_acquire_user_pages((unsigned long)iovec->iov.iov_base,
unuse_mm(req->user_proc->mm); npages, 0, iovec->pages);
if (pinned != iovec->npages) {
SDMA_DBG(req, "Failed to pin pages (%u/%u)", pinned, if (pinned < 0)
iovec->npages); return pinned;
ret = -EFAULT;
goto pfree; iovec->npages = pinned;
if (pinned != npages) {
SDMA_DBG(req, "Failed to pin pages (%d/%u)", pinned, npages);
unpin_vector_pages(req, iovec);
return -EFAULT;
} }
goto done; return 0;
pfree:
unpin_vector_pages(iovec);
done:
return ret;
} }
static void unpin_vector_pages(struct user_sdma_iovec *iovec) static void unpin_vector_pages(struct user_sdma_request *req,
struct user_sdma_iovec *iovec)
{ {
unsigned i; /*
* Unpinning is done through the workqueue so use the
* process's mm if we have a reference to it.
*/
if ((current->flags & PF_KTHREAD) && req->pq->user_mm)
use_mm(req->pq->user_mm);
if (ACCESS_ONCE(iovec->offset) != iovec->iov.iov_len) { hfi1_release_user_pages(iovec->pages, iovec->npages, 0);
hfi1_cdbg(SDMA,
"the complete vector has not been sent yet %llu %zu", /*
iovec->offset, iovec->iov.iov_len); * Unuse the user's mm (see above) and release the
return; * reference to it.
*/
if (req->pq->user_mm) {
if (current->flags & PF_KTHREAD)
unuse_mm(req->pq->user_mm);
mmput(req->pq->user_mm);
} }
for (i = 0; i < iovec->npages; i++)
if (iovec->pages[i])
put_page(iovec->pages[i]);
kfree(iovec->pages); kfree(iovec->pages);
iovec->pages = NULL; iovec->pages = NULL;
iovec->npages = 0; iovec->npages = 0;
...@@ -1358,54 +1356,116 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, ...@@ -1358,54 +1356,116 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return diff; return diff;
} }
/*
* SDMA tx request completion callback. Called when the SDMA progress
* state machine gets notification that the SDMA descriptors for this
* tx request have been processed by the DMA engine. Called in
* interrupt context.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status, static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
int drain) int drain)
{ {
struct user_sdma_txreq *tx = struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq); container_of(txreq, struct user_sdma_txreq, txreq);
struct user_sdma_request *req = tx->req; struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq = req ? req->pq : NULL; bool defer;
u64 tx_seqnum; int i;
if (unlikely(!req || !pq)) if (!tx->req)
return; return;
/* If we have any io vectors associated with this txreq, req = tx->req;
* check whether they need to be 'freed'. */ /*
if (tx->idx != -1) { * If this is the callback for the last packet of the request,
int i; * queue up the request for clean up.
*/
defer = (tx->seqnum == req->info.npkts - 1);
/*
* If we have any io vectors associated with this txreq,
* check whether they need to be 'freed'. We can't free them
* here because the unpin function needs to be able to sleep.
*/
for (i = tx->idx; i >= 0; i--) { for (i = tx->idx; i >= 0; i--) {
if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT) {
unpin_vector_pages(tx->iovecs[i].vec); defer = true;
break;
} }
} }
tx_seqnum = tx->seqnum; req->status = status;
kmem_cache_free(pq->txreq_cache, tx);
if (status != SDMA_TXREQ_S_OK) { if (status != SDMA_TXREQ_S_OK) {
dd_dev_err(pq->dd, "SDMA completion with error %d", status); SDMA_DBG(req, "SDMA completion with error %d",
set_comp_state(req, ERROR, status); status);
set_bit(SDMA_REQ_HAS_ERROR, &req->flags); set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
/* Do not free the request until the sender loop has ack'ed defer = true;
* the error and we've seen all txreqs. */ }
if (tx_seqnum == ACCESS_ONCE(req->seqnum) &&
/*
* Defer the clean up of the iovectors and the request until later
* so it can be done outside of interrupt context.
*/
if (defer) {
spin_lock(&req->txcmp_lock);
list_add_tail(&tx->list, &req->txcmp);
spin_unlock(&req->txcmp_lock);
schedule_work(&req->worker);
} else {
kmem_cache_free(req->pq->txreq_cache, tx);
}
}
static void user_sdma_delayed_completion(struct work_struct *work)
{
struct user_sdma_request *req =
container_of(work, struct user_sdma_request, worker);
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct user_sdma_txreq *tx = NULL;
unsigned long flags;
u64 seqnum;
int i;
while (1) {
spin_lock_irqsave(&req->txcmp_lock, flags);
if (!list_empty(&req->txcmp)) {
tx = list_first_entry(&req->txcmp,
struct user_sdma_txreq, list);
list_del(&tx->list);
}
spin_unlock_irqrestore(&req->txcmp_lock, flags);
if (!tx)
break;
for (i = tx->idx; i >= 0; i--)
if (tx->iovecs[i].flags & TXREQ_FLAGS_IOVEC_LAST_PKT)
unpin_vector_pages(req, tx->iovecs[i].vec);
seqnum = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
tx = NULL;
if (req->status != SDMA_TXREQ_S_OK) {
if (seqnum == ACCESS_ONCE(req->seqnum) &&
test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) { test_bit(SDMA_REQ_DONE_ERROR, &req->flags)) {
atomic_dec(&pq->n_reqs); atomic_dec(&pq->n_reqs);
set_comp_state(req, ERROR, req->status);
user_sdma_free_request(req); user_sdma_free_request(req);
break;
} }
} else { } else {
if (tx_seqnum == req->info.npkts - 1) { if (seqnum == req->info.npkts - 1) {
/* We've sent and completed all packets in this
* request. Signal completion to the user */
atomic_dec(&pq->n_reqs); atomic_dec(&pq->n_reqs);
set_comp_state(req, COMPLETE, 0); set_comp_state(req, COMPLETE, 0);
user_sdma_free_request(req); user_sdma_free_request(req);
break;
}
} }
} }
if (!atomic_read(&pq->n_reqs))
if (!atomic_read(&pq->n_reqs)) {
xchg(&pq->state, SDMA_PKT_Q_INACTIVE); xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
wake_up(&pq->wait);
}
} }
static void user_sdma_free_request(struct user_sdma_request *req) static void user_sdma_free_request(struct user_sdma_request *req)
...@@ -1426,10 +1486,8 @@ static void user_sdma_free_request(struct user_sdma_request *req) ...@@ -1426,10 +1486,8 @@ static void user_sdma_free_request(struct user_sdma_request *req)
for (i = 0; i < req->data_iovs; i++) for (i = 0; i < req->data_iovs; i++)
if (req->iovs[i].npages && req->iovs[i].pages) if (req->iovs[i].npages && req->iovs[i].pages)
unpin_vector_pages(&req->iovs[i]); unpin_vector_pages(req, &req->iovs[i]);
} }
if (req->user_proc)
put_task_struct(req->user_proc);
kfree(req->tids); kfree(req->tids);
clear_bit(SDMA_REQ_IN_USE, &req->flags); clear_bit(SDMA_REQ_IN_USE, &req->flags);
} }
......
...@@ -68,6 +68,8 @@ struct hfi1_user_sdma_pkt_q { ...@@ -68,6 +68,8 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs; struct user_sdma_request *reqs;
struct iowait busy; struct iowait busy;
unsigned state; unsigned state;
wait_queue_head_t wait;
struct mm_struct *user_mm;
}; };
struct hfi1_user_sdma_comp_q { struct hfi1_user_sdma_comp_q {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment