Commit f292e873 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.12-2021-03-05' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "A bit of a mix between fallout from the worker change, cleanups and
  reductions now possible from that change, and fixes in general. In
  detail:

   - Fully serialize manager and worker creation, fixing races due to
     that.

   - Clean up some naming that had gone stale.

   - SQPOLL fixes.

   - Fix race condition around task_work rework that went into this
     merge window.

   - Implement unshare. Used for when the original task does unshare(2)
     or setuid/seteuid and friends, drops the original workers and forks
     new ones.

   - Drop the only remaining piece of state shuffling we had left, which
     was cred. Move it into issue instead, and we can drop all of that
     code too.

   - Kill f_op->flush() usage. That was such a nasty hack that we had
     out of necessity, we no longer need it.

   - Following from ->flush() removal, we can also drop various bits of
     ctx state related to SQPOLL and cancelations.

   - Fix an issue with IOPOLL retry, which originally was fallout from a
     filemap change (removing iov_iter_revert()), but uncovered an issue
     with iovec re-import too late.

   - Fix an issue with system suspend.

   - Use xchg() for fallback work, instead of cmpxchg().

   - Properly destroy io-wq on exec.

   - Add create_io_thread() core helper, and use that in io-wq and
     io_uring. This allows us to remove various silly completion events
     related to thread setup.

   - A few error handling fixes.

  This should be the grunt of fixes necessary for the new workers, next
  week should be quieter. We've got a pending series from Pavel on
  cancelations, and how tasks and rings are indexed. Outside of that,
  should just be minor fixes. Even with these fixes, we're still killing
  a net ~80 lines"

* tag 'io_uring-5.12-2021-03-05' of git://git.kernel.dk/linux-block: (41 commits)
  io_uring: don't restrict issue_flags for io_openat
  io_uring: make SQPOLL thread parking saner
  io-wq: kill hashed waitqueue before manager exits
  io_uring: clear IOCB_WAITQ for non -EIOCBQUEUED return
  io_uring: don't keep looping for more events if we can't flush overflow
  io_uring: move to using create_io_thread()
  kernel: provide create_io_thread() helper
  io_uring: reliably cancel linked timeouts
  io_uring: cancel-match based on flags
  io-wq: ensure all pending work is canceled on exit
  io_uring: ensure that threads freeze on suspend
  io_uring: remove extra in_idle wake up
  io_uring: inline __io_queue_async_work()
  io_uring: inline io_req_clean_work()
  io_uring: choose right tctx->io_wq for try cancel
  io_uring: fix -EAGAIN retry with IOPOLL
  io-wq: fix error path leak of buffered write hash map
  io_uring: remove sqo_task
  io_uring: kill sqo_dead and sqo submission halting
  io_uring: ignore double poll add on the same waitqueue head
  ...
parents 6d47254c e45cff58
This diff is collapsed.
...@@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list, ...@@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
const struct cred *creds;
unsigned flags; unsigned flags;
unsigned short personality;
}; };
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
...@@ -114,12 +114,11 @@ struct io_wq_data { ...@@ -114,12 +114,11 @@ struct io_wq_data {
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
void io_wq_put(struct io_wq *wq); void io_wq_put(struct io_wq *wq);
void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val); void io_wq_hash_work(struct io_wq_work *work, void *val);
pid_t io_wq_fork_thread(int (*fn)(void *), void *arg);
static inline bool io_wq_is_hashed(struct io_wq_work *work) static inline bool io_wq_is_hashed(struct io_wq_work *work)
{ {
return work->flags & IO_WQ_WORK_HASHED; return work->flags & IO_WQ_WORK_HASHED;
......
This diff is collapsed.
...@@ -38,7 +38,7 @@ void __io_uring_free(struct task_struct *tsk); ...@@ -38,7 +38,7 @@ void __io_uring_free(struct task_struct *tsk);
static inline void io_uring_task_cancel(void) static inline void io_uring_task_cancel(void)
{ {
if (current->io_uring && !xa_empty(&current->io_uring->xa)) if (current->io_uring)
__io_uring_task_cancel(); __io_uring_task_cancel();
} }
static inline void io_uring_files_cancel(struct files_struct *files) static inline void io_uring_files_cancel(struct files_struct *files)
......
...@@ -31,6 +31,7 @@ struct kernel_clone_args { ...@@ -31,6 +31,7 @@ struct kernel_clone_args {
/* Number of elements in *set_tid */ /* Number of elements in *set_tid */
size_t set_tid_size; size_t set_tid_size;
int cgroup; int cgroup;
int io_thread;
struct cgroup *cgrp; struct cgroup *cgrp;
struct css_set *cset; struct css_set *cset;
}; };
...@@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *); ...@@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct signal_struct *); extern void exit_itimers(struct signal_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs); extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int); struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void); struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
......
...@@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node); p = dup_task_struct(current, node);
if (!p) if (!p)
goto fork_out; goto fork_out;
if (args->io_thread)
p->flags |= PF_IO_WORKER;
/* /*
* This _must_ happen before we call free_task(), i.e. before we jump * This _must_ happen before we call free_task(), i.e. before we jump
...@@ -2410,6 +2412,34 @@ struct mm_struct *copy_init_mm(void) ...@@ -2410,6 +2412,34 @@ struct mm_struct *copy_init_mm(void)
return dup_mm(NULL, &init_mm); return dup_mm(NULL, &init_mm);
} }
/*
* This is like kernel_clone(), but shaved down and tailored to just
* creating io_uring workers. It returns a created task, or an error pointer.
* The returned task is inactive, and the caller must fire it up through
* wake_up_new_task(p). All signals are blocked in the created task.
*/
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
{
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
CLONE_IO;
struct kernel_clone_args args = {
.flags = ((lower_32_bits(flags) | CLONE_VM |
CLONE_UNTRACED) & ~CSIGNAL),
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
.stack = (unsigned long)fn,
.stack_size = (unsigned long)arg,
.io_thread = 1,
};
struct task_struct *tsk;
tsk = copy_process(NULL, 0, node, &args);
if (!IS_ERR(tsk)) {
sigfillset(&tsk->blocked);
sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
}
return tsk;
}
/* /*
* Ok, this is the main fork-routine. * Ok, this is the main fork-routine.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment