idr: Fix idr_get_next race with idr_remove

If the entry is deleted from the IDR between the call to
radix_tree_iter_find() and rcu_dereference_raw(), idr_get_next()
will return NULL, which will end the iteration prematurely.  We should
instead continue to the next entry in the IDR.  This only happens if the
iteration is protected by the RCU lock.  Most IDR users use a spinlock
or semaphore to exclude simultaneous modifications.  It was noticed once
the PID allocator was converted to use the IDR, as it uses the RCU lock,
but there may be other users elsewhere in the kernel.

We can't use the normal pattern of calling radix_tree_deref_retry()
(which catches both a retry entry in a leaf node and a node entry in
the root) as the IDR supports storing entries which are unaligned,
which will trigger an infinite loop if they are encountered.  Instead,
we have to explicitly check whether the entry is a retry entry.

Fixes: 0a835c4f ("Reimplement IDR and IDA using the radix tree")
Reported-by: default avatarBrendan Gregg <bgregg@netflix.com>
Tested-by: default avatarBrendan Gregg <bgregg@netflix.com>
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
parent 7b785645
...@@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid) ...@@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid)
{ {
struct radix_tree_iter iter; struct radix_tree_iter iter;
void __rcu **slot; void __rcu **slot;
void *entry = NULL;
unsigned long base = idr->idr_base; unsigned long base = idr->idr_base;
unsigned long id = *nextid; unsigned long id = *nextid;
id = (id < base) ? 0 : id - base; id = (id < base) ? 0 : id - base;
slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) {
entry = rcu_dereference_raw(*slot);
if (!entry)
continue;
if (!xa_is_internal(entry))
break;
if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
break;
slot = radix_tree_iter_retry(&iter);
}
if (!slot) if (!slot)
return NULL; return NULL;
id = iter.index + base; id = iter.index + base;
...@@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid) ...@@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid)
return NULL; return NULL;
*nextid = id; *nextid = id;
return rcu_dereference_raw(*slot); return entry;
} }
EXPORT_SYMBOL(idr_get_next); EXPORT_SYMBOL(idr_get_next);
......
...@@ -287,6 +287,51 @@ static void idr_align_test(struct idr *idr) ...@@ -287,6 +287,51 @@ static void idr_align_test(struct idr *idr)
} }
} }
DEFINE_IDR(find_idr);
static void *idr_throbber(void *arg)
{
time_t start = time(NULL);
int id = *(int *)arg;
rcu_register_thread();
do {
idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL);
idr_remove(&find_idr, id);
} while (time(NULL) < start + 10);
rcu_unregister_thread();
return NULL;
}
void idr_find_test_1(int anchor_id, int throbber_id)
{
pthread_t throbber;
time_t start = time(NULL);
pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
anchor_id + 1, GFP_KERNEL) != anchor_id);
do {
int id = 0;
void *entry = idr_get_next(&find_idr, &id);
BUG_ON(entry != xa_mk_value(id));
} while (time(NULL) < start + 11);
pthread_join(throbber, NULL);
idr_remove(&find_idr, anchor_id);
BUG_ON(!idr_is_empty(&find_idr));
}
void idr_find_test(void)
{
idr_find_test_1(100000, 0);
idr_find_test_1(0, 100000);
}
void idr_checks(void) void idr_checks(void)
{ {
unsigned long i; unsigned long i;
...@@ -368,6 +413,7 @@ void idr_checks(void) ...@@ -368,6 +413,7 @@ void idr_checks(void)
idr_u32_test(1); idr_u32_test(1);
idr_u32_test(0); idr_u32_test(0);
idr_align_test(&idr); idr_align_test(&idr);
idr_find_test();
} }
#define module_init(x) #define module_init(x)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment