Commit 7a702b4e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190627' of gitolite.kernel.org:pub/scm/linux/kernel/git/brauner/linux

Pull pidfd fixes from Christian Brauner:
 "Userspace tools and libraries such as strace or glibc need a cheap and
  reliable way to tell whether CLONE_PIDFD is supported. The easiest way
  is to pass an invalid fd value in the return argument, perform the
  syscall and verify the value in the return argument has been changed
  to a valid fd.

  However, if CLONE_PIDFD is specified we currently check if pidfd == 0
  and return EINVAL if not.

  The check for pidfd == 0 was originally added to enable us to abuse
  the return argument for passing additional flags along with
  CLONE_PIDFD in the future.

  However, extending legacy clone this way would be a terrible idea and
  with clone3 on the horizon and the ability to reuse CLONE_DETACHED
  with CLONE_PIDFD there's no real need for this clutch. So remove the
  pidfd == 0 check and help userspace out.

  Also, accordig to Al, anon_inode_getfd() should only be used past the
  point of no failure and ksys_close() should not be used at all since
  it is far too easy to get wrong. Al's motto being "basically, once
  it's in descriptor table, it's out of your control". So Al's patch
  switches back to what we already had in v1 of the original patchset
  and uses a anon_inode_getfile() + put_user() + fd_install() sequence
  in the success path and a fput() + put_unused_fd() in the failure
  path.

  The other two changes should be trivial"

* tag 'for-linus-20190627' of gitolite.kernel.org:pub/scm/linux/kernel/git/brauner/linux:
  proc: remove useless d_is_dir() check
  copy_process(): don't use ksys_close() on cleanups
  samples: make pidfd-metadata fail gracefully on older kernels
  fork: don't check parent_tidptr with CLONE_PIDFD
parents 763cf1f2 30d158b1
...@@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = { ...@@ -3077,8 +3077,7 @@ static const struct file_operations proc_tgid_base_operations = {
struct pid *tgid_pidfd_to_pid(const struct file *file) struct pid *tgid_pidfd_to_pid(const struct file *file)
{ {
if (!d_is_dir(file->f_path.dentry) || if (file->f_op != &proc_tgid_base_operations)
(file->f_op != &proc_tgid_base_operations))
return ERR_PTR(-EBADF); return ERR_PTR(-EBADF);
return proc_pid(file_inode(file)); return proc_pid(file_inode(file));
......
...@@ -1712,31 +1712,6 @@ const struct file_operations pidfd_fops = { ...@@ -1712,31 +1712,6 @@ const struct file_operations pidfd_fops = {
#endif #endif
}; };
/**
* pidfd_create() - Create a new pid file descriptor.
*
* @pid: struct pid that the pidfd will reference
*
* This creates a new pid file descriptor with the O_CLOEXEC flag set.
*
* Note, that this function can only be called after the fd table has
* been unshared to avoid leaking the pidfd to the new process.
*
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
*/
static int pidfd_create(struct pid *pid)
{
int fd;
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
O_RDWR | O_CLOEXEC);
if (fd < 0)
put_pid(pid);
return fd;
}
static void __delayed_free_task(struct rcu_head *rhp) static void __delayed_free_task(struct rcu_head *rhp)
{ {
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
...@@ -1774,6 +1749,7 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -1774,6 +1749,7 @@ static __latent_entropy struct task_struct *copy_process(
int pidfd = -1, retval; int pidfd = -1, retval;
struct task_struct *p; struct task_struct *p;
struct multiprocess_signals delayed; struct multiprocess_signals delayed;
struct file *pidfile = NULL;
/* /*
* Don't allow sharing the root directory with processes in a different * Don't allow sharing the root directory with processes in a different
...@@ -1822,8 +1798,6 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -1822,8 +1798,6 @@ static __latent_entropy struct task_struct *copy_process(
} }
if (clone_flags & CLONE_PIDFD) { if (clone_flags & CLONE_PIDFD) {
int reserved;
/* /*
* - CLONE_PARENT_SETTID is useless for pidfds and also * - CLONE_PARENT_SETTID is useless for pidfds and also
* parent_tidptr is used to return pidfds. * parent_tidptr is used to return pidfds.
...@@ -1834,16 +1808,6 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -1834,16 +1808,6 @@ static __latent_entropy struct task_struct *copy_process(
if (clone_flags & if (clone_flags &
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD)) (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/*
* Verify that parent_tidptr is sane so we can potentially
* reuse it later.
*/
if (get_user(reserved, parent_tidptr))
return ERR_PTR(-EFAULT);
if (reserved != 0)
return ERR_PTR(-EINVAL);
} }
/* /*
...@@ -2058,11 +2022,20 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -2058,11 +2022,20 @@ static __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared). * if the fd table isn't shared).
*/ */
if (clone_flags & CLONE_PIDFD) { if (clone_flags & CLONE_PIDFD) {
retval = pidfd_create(pid); retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (retval < 0) if (retval < 0)
goto bad_fork_free_pid; goto bad_fork_free_pid;
pidfd = retval; pidfd = retval;
pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
O_RDWR | O_CLOEXEC);
if (IS_ERR(pidfile)) {
put_unused_fd(pidfd);
goto bad_fork_free_pid;
}
get_pid(pid); /* held by pidfile now */
retval = put_user(pidfd, parent_tidptr); retval = put_user(pidfd, parent_tidptr);
if (retval) if (retval)
goto bad_fork_put_pidfd; goto bad_fork_put_pidfd;
...@@ -2180,6 +2153,9 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -2180,6 +2153,9 @@ static __latent_entropy struct task_struct *copy_process(
goto bad_fork_cancel_cgroup; goto bad_fork_cancel_cgroup;
} }
/* past the last point of failure */
if (pidfile)
fd_install(pidfd, pidfile);
init_task_pid_links(p); init_task_pid_links(p);
if (likely(p->pid)) { if (likely(p->pid)) {
...@@ -2246,8 +2222,10 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -2246,8 +2222,10 @@ static __latent_entropy struct task_struct *copy_process(
bad_fork_cgroup_threadgroup_change_end: bad_fork_cgroup_threadgroup_change_end:
cgroup_threadgroup_change_end(current); cgroup_threadgroup_change_end(current);
bad_fork_put_pidfd: bad_fork_put_pidfd:
if (clone_flags & CLONE_PIDFD) if (clone_flags & CLONE_PIDFD) {
ksys_close(pidfd); fput(pidfile);
put_unused_fd(pidfd);
}
bad_fork_free_pid: bad_fork_free_pid:
if (pid != &init_struct_pid) if (pid != &init_struct_pid)
free_pid(pid); free_pid(pid);
......
...@@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd) ...@@ -83,7 +83,7 @@ static int pidfd_metadata_fd(pid_t pid, int pidfd)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int pidfd = 0, ret = EXIT_FAILURE; int pidfd = -1, ret = EXIT_FAILURE;
char buf[4096] = { 0 }; char buf[4096] = { 0 };
pid_t pid; pid_t pid;
int procfd, statusfd; int procfd, statusfd;
...@@ -91,7 +91,11 @@ int main(int argc, char *argv[]) ...@@ -91,7 +91,11 @@ int main(int argc, char *argv[])
pid = pidfd_clone(CLONE_PIDFD, &pidfd); pid = pidfd_clone(CLONE_PIDFD, &pidfd);
if (pid < 0) if (pid < 0)
exit(ret); err(ret, "CLONE_PIDFD");
if (pidfd == -1) {
warnx("CLONE_PIDFD is not supported by the kernel");
goto out;
}
procfd = pidfd_metadata_fd(pid, pidfd); procfd = pidfd_metadata_fd(pid, pidfd);
close(pidfd); close(pidfd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment