Commit d4fce2e2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.epoll' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull epoll fixes from Al Viro:
 "Several race fixes in epoll"

* 'work.epoll' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ep_create_wakeup_source(): dentry name can change under you...
  epoll: EPOLL_CTL_ADD: close the race in decision to take fast path
  epoll: replace ->visited/visited_list with generation count
  epoll: do not insert into poll queues until all sanity checks are done
parents db23baa2 3701cb59
...@@ -218,8 +218,7 @@ struct eventpoll { ...@@ -218,8 +218,7 @@ struct eventpoll {
struct file *file; struct file *file;
/* used to optimize loop detection check */ /* used to optimize loop detection check */
struct list_head visited_list_link; u64 gen;
int visited;
#ifdef CONFIG_NET_RX_BUSY_POLL #ifdef CONFIG_NET_RX_BUSY_POLL
/* used to track busy poll napi_id */ /* used to track busy poll napi_id */
...@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly; ...@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
*/ */
static DEFINE_MUTEX(epmutex); static DEFINE_MUTEX(epmutex);
static u64 loop_check_gen = 0;
/* Used to check for epoll file descriptor inclusion loops */ /* Used to check for epoll file descriptor inclusion loops */
static struct nested_calls poll_loop_ncalls; static struct nested_calls poll_loop_ncalls;
...@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly; ...@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */ /* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly; static struct kmem_cache *pwq_cache __read_mostly;
/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
static LIST_HEAD(visited_list);
/* /*
* List of files with newly added links, where we may need to limit the number * List of files with newly added links, where we may need to limit the number
* of emanating paths. Protected by the epmutex. * of emanating paths. Protected by the epmutex.
...@@ -1450,7 +1448,7 @@ static int reverse_path_check(void) ...@@ -1450,7 +1448,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi) static int ep_create_wakeup_source(struct epitem *epi)
{ {
const char *name; struct name_snapshot n;
struct wakeup_source *ws; struct wakeup_source *ws;
if (!epi->ep->ws) { if (!epi->ep->ws) {
...@@ -1459,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi) ...@@ -1459,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
return -ENOMEM; return -ENOMEM;
} }
name = epi->ffd.file->f_path.dentry->d_name.name; take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
ws = wakeup_source_register(NULL, name); ws = wakeup_source_register(NULL, n.name.name);
release_dentry_name_snapshot(&n);
if (!ws) if (!ws)
return -ENOMEM; return -ENOMEM;
...@@ -1522,6 +1521,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, ...@@ -1522,6 +1521,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
RCU_INIT_POINTER(epi->ws, NULL); RCU_INIT_POINTER(epi->ws, NULL);
} }
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
*/
ep_rbtree_insert(ep, epi);
/* now check if we've created too many backpaths */
error = -EINVAL;
if (full_check && reverse_path_check())
goto error_remove_epi;
/* Initialize the poll table using the queue callback */ /* Initialize the poll table using the queue callback */
epq.epi = epi; epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
...@@ -1544,22 +1559,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, ...@@ -1544,22 +1559,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (epi->nwait < 0) if (epi->nwait < 0)
goto error_unregister; goto error_unregister;
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
* Add the current item to the RB tree. All RB tree operations are
* protected by "mtx", and ep_insert() is called with "mtx" held.
*/
ep_rbtree_insert(ep, epi);
/* now check if we've created too many backpaths */
error = -EINVAL;
if (full_check && reverse_path_check())
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */ /* We have to drop the new item inside our item list to keep track of it */
write_lock_irq(&ep->lock); write_lock_irq(&ep->lock);
...@@ -1588,6 +1587,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, ...@@ -1588,6 +1587,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return 0; return 0;
error_unregister:
ep_unregister_pollwait(ep, epi);
error_remove_epi: error_remove_epi:
spin_lock(&tfile->f_lock); spin_lock(&tfile->f_lock);
list_del_rcu(&epi->fllink); list_del_rcu(&epi->fllink);
...@@ -1595,9 +1596,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, ...@@ -1595,9 +1596,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
rb_erase_cached(&epi->rbn, &ep->rbr); rb_erase_cached(&epi->rbn, &ep->rbr);
error_unregister:
ep_unregister_pollwait(ep, epi);
/* /*
* We need to do this because an event could have been arrived on some * We need to do this because an event could have been arrived on some
* allocated wait queue. Note that we don't care about the ep->ovflist * allocated wait queue. Note that we don't care about the ep->ovflist
...@@ -1972,13 +1970,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) ...@@ -1972,13 +1970,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
struct epitem *epi; struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1); mutex_lock_nested(&ep->mtx, call_nests + 1);
ep->visited = 1; ep->gen = loop_check_gen;
list_add(&ep->visited_list_link, &visited_list);
for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn); epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) { if (unlikely(is_file_epoll(epi->ffd.file))) {
ep_tovisit = epi->ffd.file->private_data; ep_tovisit = epi->ffd.file->private_data;
if (ep_tovisit->visited) if (ep_tovisit->gen == loop_check_gen)
continue; continue;
error = ep_call_nested(&poll_loop_ncalls, error = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, epi->ffd.file, ep_loop_check_proc, epi->ffd.file,
...@@ -2019,18 +2016,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) ...@@ -2019,18 +2016,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/ */
static int ep_loop_check(struct eventpoll *ep, struct file *file) static int ep_loop_check(struct eventpoll *ep, struct file *file)
{ {
int ret; return ep_call_nested(&poll_loop_ncalls,
struct eventpoll *ep_cur, *ep_next;
ret = ep_call_nested(&poll_loop_ncalls,
ep_loop_check_proc, file, ep, current); ep_loop_check_proc, file, ep, current);
/* clear visited list */
list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
visited_list_link) {
ep_cur->visited = 0;
list_del(&ep_cur->visited_list_link);
}
return ret;
} }
static void clear_tfile_check_list(void) static void clear_tfile_check_list(void)
...@@ -2195,11 +2182,13 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, ...@@ -2195,11 +2182,13 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
goto error_tgt_fput; goto error_tgt_fput;
if (op == EPOLL_CTL_ADD) { if (op == EPOLL_CTL_ADD) {
if (!list_empty(&f.file->f_ep_links) || if (!list_empty(&f.file->f_ep_links) ||
ep->gen == loop_check_gen ||
is_file_epoll(tf.file)) { is_file_epoll(tf.file)) {
mutex_unlock(&ep->mtx); mutex_unlock(&ep->mtx);
error = epoll_mutex_lock(&epmutex, 0, nonblock); error = epoll_mutex_lock(&epmutex, 0, nonblock);
if (error) if (error)
goto error_tgt_fput; goto error_tgt_fput;
loop_check_gen++;
full_check = 1; full_check = 1;
if (is_file_epoll(tf.file)) { if (is_file_epoll(tf.file)) {
error = -ELOOP; error = -ELOOP;
...@@ -2263,6 +2252,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, ...@@ -2263,6 +2252,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
error_tgt_fput: error_tgt_fput:
if (full_check) { if (full_check) {
clear_tfile_check_list(); clear_tfile_check_list();
loop_check_gen++;
mutex_unlock(&epmutex); mutex_unlock(&epmutex);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment