Commit da573eaa authored by Logan Gunthorpe's avatar Logan Gunthorpe Committed by Jon Mason

ntb_perf: Improve thread handling to increase robustness

This commit accomplishes a few things:

1) Properly prevent multiple sets of threads from running at once using
a mutex. Lots of race issues existed with the thread_cleanup.

2) The mutex allows us to ensure that threads are finished before
tearing down the device or module.

3) Don't use kthread_stop when the threads can exit by themselves, as
this is counter-indicated by the kthread_create documentation. Threads
now wait for kthread_stop to occur.

4) Writing to the run file now blocks until the threads are complete.
The test can then be safely interrupted by a SIGINT.

Also, while I was at it:

5) debugfs_run_write shouldn't return 0 in the early check cases as this
could cause debugfs_run_write to loop undesirably.
Signed-off-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Acked-by: default avatarDave Jiang <dave.jiang@intel.com>
Signed-off-by: default avatarJon Mason <jdmason@kudzu.us>
parent fd2ecd88
...@@ -58,6 +58,7 @@ ...@@ -58,6 +58,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/sizes.h> #include <linux/sizes.h>
#include <linux/ntb.h> #include <linux/ntb.h>
#include <linux/mutex.h>
#define DRIVER_NAME "ntb_perf" #define DRIVER_NAME "ntb_perf"
#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
...@@ -121,6 +122,7 @@ struct pthr_ctx { ...@@ -121,6 +122,7 @@ struct pthr_ctx {
int dma_prep_err; int dma_prep_err;
int src_idx; int src_idx;
void *srcs[MAX_SRCS]; void *srcs[MAX_SRCS];
wait_queue_head_t *wq;
}; };
struct perf_ctx { struct perf_ctx {
...@@ -134,9 +136,11 @@ struct perf_ctx { ...@@ -134,9 +136,11 @@ struct perf_ctx {
struct dentry *debugfs_run; struct dentry *debugfs_run;
struct dentry *debugfs_threads; struct dentry *debugfs_threads;
u8 perf_threads; u8 perf_threads;
bool run; /* mutex ensures only one set of threads run at once */
struct mutex run_mutex;
struct pthr_ctx pthr_ctx[MAX_THREADS]; struct pthr_ctx pthr_ctx[MAX_THREADS];
atomic_t tsync; atomic_t tsync;
atomic_t tdone;
}; };
enum { enum {
...@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, ...@@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1); schedule_timeout(1);
} }
if (unlikely(kthread_should_stop()))
break;
} }
if (use_dma) { if (use_dma) {
pr_info("%s: All DMA descriptors submitted\n", current->comm); pr_info("%s: All DMA descriptors submitted\n", current->comm);
while (atomic_read(&pctx->dma_sync) != 0) while (atomic_read(&pctx->dma_sync) != 0) {
if (kthread_should_stop())
break;
msleep(20); msleep(20);
}
} }
kstop = ktime_get(); kstop = ktime_get();
...@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data) ...@@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data)
pctx->srcs[i] = NULL; pctx->srcs[i] = NULL;
} }
return 0; atomic_inc(&perf->tdone);
wake_up(pctx->wq);
rc = 0;
goto done;
err: err:
for (i = 0; i < MAX_SRCS; i++) { for (i = 0; i < MAX_SRCS; i++) {
...@@ -406,6 +419,16 @@ static int ntb_perf_thread(void *data) ...@@ -406,6 +419,16 @@ static int ntb_perf_thread(void *data)
pctx->dma_chan = NULL; pctx->dma_chan = NULL;
} }
done:
/* Wait until we are told to stop */
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
break;
schedule();
}
__set_current_state(TASK_RUNNING);
return rc; return rc;
} }
...@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, ...@@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
struct perf_ctx *perf = filp->private_data; struct perf_ctx *perf = filp->private_data;
char *buf; char *buf;
ssize_t ret, out_offset; ssize_t ret, out_offset;
int running;
if (!perf) if (!perf)
return 0; return 0;
...@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, ...@@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
buf = kmalloc(64, GFP_KERNEL); buf = kmalloc(64, GFP_KERNEL);
if (!buf) if (!buf)
return -ENOMEM; return -ENOMEM;
out_offset = snprintf(buf, 64, "%d\n", perf->run);
running = mutex_is_locked(&perf->run_mutex);
out_offset = snprintf(buf, 64, "%d\n", running);
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
kfree(buf); kfree(buf);
...@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf) ...@@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf)
struct pthr_ctx *pctx; struct pthr_ctx *pctx;
int i; int i;
perf->run = false;
for (i = 0; i < MAX_THREADS; i++) { for (i = 0; i < MAX_THREADS; i++) {
pctx = &perf->pthr_ctx[i]; pctx = &perf->pthr_ctx[i];
if (pctx->thread) { if (pctx->thread) {
...@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, ...@@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
{ {
struct perf_ctx *perf = filp->private_data; struct perf_ctx *perf = filp->private_data;
int node, i; int node, i;
DECLARE_WAIT_QUEUE_HEAD(wq);
if (!perf->link_is_up) if (!perf->link_is_up)
return 0; return -ENOLINK;
if (perf->perf_threads == 0) if (perf->perf_threads == 0)
return 0; return -EINVAL;
if (atomic_read(&perf->tsync) == 0) if (!mutex_trylock(&perf->run_mutex))
perf->run = false; return -EBUSY;
if (perf->run) if (perf->perf_threads > MAX_THREADS) {
threads_cleanup(perf); perf->perf_threads = MAX_THREADS;
else { pr_info("Reset total threads to: %u\n", MAX_THREADS);
perf->run = true; }
if (perf->perf_threads > MAX_THREADS) { /* no greater than 1M */
perf->perf_threads = MAX_THREADS; if (seg_order > MAX_SEG_ORDER) {
pr_info("Reset total threads to: %u\n", MAX_THREADS); seg_order = MAX_SEG_ORDER;
} pr_info("Fix seg_order to %u\n", seg_order);
}
/* no greater than 1M */ if (run_order < seg_order) {
if (seg_order > MAX_SEG_ORDER) { run_order = seg_order;
seg_order = MAX_SEG_ORDER; pr_info("Fix run_order to %u\n", run_order);
pr_info("Fix seg_order to %u\n", seg_order); }
}
if (run_order < seg_order) { node = dev_to_node(&perf->ntb->pdev->dev);
run_order = seg_order; atomic_set(&perf->tdone, 0);
pr_info("Fix run_order to %u\n", run_order);
}
node = dev_to_node(&perf->ntb->pdev->dev); /* launch kernel thread */
/* launch kernel thread */ for (i = 0; i < perf->perf_threads; i++) {
for (i = 0; i < perf->perf_threads; i++) { struct pthr_ctx *pctx;
struct pthr_ctx *pctx;
pctx = &perf->pthr_ctx[i];
atomic_set(&pctx->dma_sync, 0);
pctx->perf = perf;
pctx->thread =
kthread_create_on_node(ntb_perf_thread,
(void *)pctx,
node, "ntb_perf %d", i);
if (IS_ERR(pctx->thread)) {
pctx->thread = NULL;
goto err;
} else
wake_up_process(pctx->thread);
if (perf->run == false)
return -ENXIO;
}
pctx = &perf->pthr_ctx[i];
atomic_set(&pctx->dma_sync, 0);
pctx->perf = perf;
pctx->wq = &wq;
pctx->thread =
kthread_create_on_node(ntb_perf_thread,
(void *)pctx,
node, "ntb_perf %d", i);
if (IS_ERR(pctx->thread)) {
pctx->thread = NULL;
goto err;
} else {
wake_up_process(pctx->thread);
}
} }
wait_event_interruptible(wq,
atomic_read(&perf->tdone) == perf->perf_threads);
threads_cleanup(perf);
mutex_unlock(&perf->run_mutex);
return count; return count;
err: err:
threads_cleanup(perf); threads_cleanup(perf);
mutex_unlock(&perf->run_mutex);
return -ENXIO; return -ENXIO;
} }
...@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
perf->ntb = ntb; perf->ntb = ntb;
perf->perf_threads = 1; perf->perf_threads = 1;
atomic_set(&perf->tsync, 0); atomic_set(&perf->tsync, 0);
perf->run = false; mutex_init(&perf->run_mutex);
spin_lock_init(&perf->db_lock); spin_lock_init(&perf->db_lock);
perf_setup_mw(ntb, perf); perf_setup_mw(ntb, perf);
INIT_DELAYED_WORK(&perf->link_work, perf_link_work); INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
...@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) ...@@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
dev_dbg(&perf->ntb->dev, "%s called\n", __func__); dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
mutex_lock(&perf->run_mutex);
cancel_delayed_work_sync(&perf->link_work); cancel_delayed_work_sync(&perf->link_work);
cancel_work_sync(&perf->link_cleanup); cancel_work_sync(&perf->link_cleanup);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment