Commit 8b16cef3 authored by Hoang-Nam Nguyen's avatar Hoang-Nam Nguyen Committed by Roland Dreier

IB/ehca: Fix race condition/locking issues in scaling code

Fix a race condition in find_next_cpu_online() and some other locking
issues in ehca scaling code.
Signed-off-by: default avatarHoang-Nam Nguyen <hnguyen@de.ibm.com>
Signed-off-by: default avatarRoland Dreier <rolandd@cisco.com>
parent 78d8d5f9
...@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data) ...@@ -544,28 +544,30 @@ void ehca_tasklet_eq(unsigned long data)
static inline int find_next_online_cpu(struct ehca_comp_pool* pool) static inline int find_next_online_cpu(struct ehca_comp_pool* pool)
{ {
unsigned long flags_last_cpu; int cpu;
unsigned long flags;
WARN_ON_ONCE(!in_interrupt());
if (ehca_debug_level) if (ehca_debug_level)
ehca_dmp(&cpu_online_map, sizeof(cpumask_t), ""); ehca_dmp(&cpu_online_map, sizeof(cpumask_t), "");
spin_lock_irqsave(&pool->last_cpu_lock, flags_last_cpu); spin_lock_irqsave(&pool->last_cpu_lock, flags);
pool->last_cpu = next_cpu(pool->last_cpu, cpu_online_map); cpu = next_cpu(pool->last_cpu, cpu_online_map);
if (pool->last_cpu == NR_CPUS) if (cpu == NR_CPUS)
pool->last_cpu = first_cpu(cpu_online_map); cpu = first_cpu(cpu_online_map);
spin_unlock_irqrestore(&pool->last_cpu_lock, flags_last_cpu); pool->last_cpu = cpu;
spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
return pool->last_cpu; return cpu;
} }
static void __queue_comp_task(struct ehca_cq *__cq, static void __queue_comp_task(struct ehca_cq *__cq,
struct ehca_cpu_comp_task *cct) struct ehca_cpu_comp_task *cct)
{ {
unsigned long flags_cct; unsigned long flags;
unsigned long flags_cq;
spin_lock_irqsave(&cct->task_lock, flags_cct); spin_lock_irqsave(&cct->task_lock, flags);
spin_lock_irqsave(&__cq->task_lock, flags_cq); spin_lock(&__cq->task_lock);
if (__cq->nr_callbacks == 0) { if (__cq->nr_callbacks == 0) {
__cq->nr_callbacks++; __cq->nr_callbacks++;
...@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehca_cq *__cq, ...@@ -576,8 +578,8 @@ static void __queue_comp_task(struct ehca_cq *__cq,
else else
__cq->nr_callbacks++; __cq->nr_callbacks++;
spin_unlock_irqrestore(&__cq->task_lock, flags_cq); spin_unlock(&__cq->task_lock);
spin_unlock_irqrestore(&cct->task_lock, flags_cct); spin_unlock_irqrestore(&cct->task_lock, flags);
} }
static void queue_comp_task(struct ehca_cq *__cq) static void queue_comp_task(struct ehca_cq *__cq)
...@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_cq *__cq) ...@@ -588,69 +590,69 @@ static void queue_comp_task(struct ehca_cq *__cq)
cpu = get_cpu(); cpu = get_cpu();
cpu_id = find_next_online_cpu(pool); cpu_id = find_next_online_cpu(pool);
BUG_ON(!cpu_online(cpu_id)); BUG_ON(!cpu_online(cpu_id));
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
BUG_ON(!cct);
if (cct->cq_jobs > 0) { if (cct->cq_jobs > 0) {
cpu_id = find_next_online_cpu(pool); cpu_id = find_next_online_cpu(pool);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
BUG_ON(!cct);
} }
__queue_comp_task(__cq, cct); __queue_comp_task(__cq, cct);
put_cpu();
return;
} }
static void run_comp_task(struct ehca_cpu_comp_task* cct) static void run_comp_task(struct ehca_cpu_comp_task* cct)
{ {
struct ehca_cq *cq; struct ehca_cq *cq;
unsigned long flags_cct; unsigned long flags;
unsigned long flags_cq;
spin_lock_irqsave(&cct->task_lock, flags_cct); spin_lock_irqsave(&cct->task_lock, flags);
while (!list_empty(&cct->cq_list)) { while (!list_empty(&cct->cq_list)) {
cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
spin_unlock_irqrestore(&cct->task_lock, flags_cct); spin_unlock_irqrestore(&cct->task_lock, flags);
comp_event_callback(cq); comp_event_callback(cq);
spin_lock_irqsave(&cct->task_lock, flags_cct); spin_lock_irqsave(&cct->task_lock, flags);
spin_lock_irqsave(&cq->task_lock, flags_cq); spin_lock(&cq->task_lock);
cq->nr_callbacks--; cq->nr_callbacks--;
if (cq->nr_callbacks == 0) { if (cq->nr_callbacks == 0) {
list_del_init(cct->cq_list.next); list_del_init(cct->cq_list.next);
cct->cq_jobs--; cct->cq_jobs--;
} }
spin_unlock_irqrestore(&cq->task_lock, flags_cq); spin_unlock(&cq->task_lock);
} }
spin_unlock_irqrestore(&cct->task_lock, flags_cct); spin_unlock_irqrestore(&cct->task_lock, flags);
return;
} }
static int comp_task(void *__cct) static int comp_task(void *__cct)
{ {
struct ehca_cpu_comp_task* cct = __cct; struct ehca_cpu_comp_task* cct = __cct;
int cql_empty;
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
while(!kthread_should_stop()) { while(!kthread_should_stop()) {
add_wait_queue(&cct->wait_queue, &wait); add_wait_queue(&cct->wait_queue, &wait);
if (list_empty(&cct->cq_list)) spin_lock_irq(&cct->task_lock);
cql_empty = list_empty(&cct->cq_list);
spin_unlock_irq(&cct->task_lock);
if (cql_empty)
schedule(); schedule();
else else
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
remove_wait_queue(&cct->wait_queue, &wait); remove_wait_queue(&cct->wait_queue, &wait);
if (!list_empty(&cct->cq_list)) spin_lock_irq(&cct->task_lock);
cql_empty = list_empty(&cct->cq_list);
spin_unlock_irq(&cct->task_lock);
if (!cql_empty)
run_comp_task(__cct); run_comp_task(__cct);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
...@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool, ...@@ -693,8 +695,6 @@ static void destroy_comp_task(struct ehca_comp_pool *pool,
if (task) if (task)
kthread_stop(task); kthread_stop(task);
return;
} }
static void take_over_work(struct ehca_comp_pool *pool, static void take_over_work(struct ehca_comp_pool *pool,
...@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void) ...@@ -815,6 +815,4 @@ void ehca_destroy_comp_pool(void)
free_percpu(pool->cpu_comp_tasks); free_percpu(pool->cpu_comp_tasks);
kfree(pool); kfree(pool);
#endif #endif
return;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment