Commit 77e40aae authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace updates from Eric Biederman:
 "This is a bunch of small changes built against 3.16-rc6.  The most
  significant change for users is the first patch which makes setns
  drmatically faster by removing unneded rcu handling.

  The next chunk of changes are so that "mount -o remount,.." will not
  allow the user namespace root to drop flags on a mount set by the
  system wide root.  Aks this forces read-only mounts to stay read-only,
  no-dev mounts to stay no-dev, no-suid mounts to stay no-suid, no-exec
  mounts to stay no exec and it prevents unprivileged users from messing
  with a mounts atime settings.  I have included my test case as the
  last patch in this series so people performing backports can verify
  this change works correctly.

  The next change fixes a bug in NFS that was discovered while auditing
  nsproxy users for the first optimization.  Today you can oops the
  kernel by reading /proc/fs/nfsfs/{servers,volumes} if you are clever
  with pid namespaces.  I rebased and fixed the build of the
  !CONFIG_NFS_FS case yesterday when a build bot caught my typo.  Given
  that no one to my knowledge bases anything on my tree fixing the typo
  in place seems more responsible that requiring a typo-fix to be
  backported as well.

  The last change is a small semantic cleanup introducing
  /proc/thread-self and pointing /proc/mounts and /proc/net at it.  This
  prevents several kinds of problemantic corner cases.  It is a
  user-visible change so it has a minute chance of causing regressions
  so the change to /proc/mounts and /proc/net are individual one line
  commits that can be trivially reverted.  Unfortunately I lost and
  could not find the email of the original reporter so he is not
  credited.  From at least one perspective this change to /proc/net is a
  refgression fix to allow pthread /proc/net uses that were broken by
  the introduction of the network namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  proc: Point /proc/mounts at /proc/thread-self/mounts instead of /proc/self/mounts
  proc: Point /proc/net at /proc/thread-self/net instead of /proc/self/net
  proc: Implement /proc/thread-self to point at the directory of the current thread
  proc: Have net show up under /proc/<tgid>/task/<tid>
  NFS: Fix /proc/fs/nfsfs/servers and /proc/fs/nfsfs/volumes
  mnt: Add tests for unprivileged remount cases that have found to be faulty
  mnt: Change the default remount atime from relatime to the existing value
  mnt: Correct permission checks in do_remount
  mnt: Move the test for MNT_LOCK_READONLY from change_mount_flags into do_remount
  mnt: Only change user settable mount flags in remount
  namespaces: Use task_lock and not rcu to protect nsproxy
parents 96784de5 344470ca
......@@ -890,8 +890,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
/* Don't allow unprivileged users to change mount flags */
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
if (flag & CL_UNPRIVILEGED) {
mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
if (mnt->mnt.mnt_flags & MNT_READONLY)
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
if (mnt->mnt.mnt_flags & MNT_NODEV)
mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
if (mnt->mnt.mnt_flags & MNT_NOSUID)
mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
if (mnt->mnt.mnt_flags & MNT_NOEXEC)
mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
}
/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
......@@ -1896,9 +1909,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
if (readonly_request == __mnt_is_readonly(mnt))
return 0;
if (mnt->mnt_flags & MNT_LOCK_READONLY)
return -EPERM;
if (readonly_request)
error = mnt_make_readonly(real_mount(mnt));
else
......@@ -1924,6 +1934,33 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
/* Don't allow changing of locked mnt flags.
*
* No locks need to be held here while testing the various
* MNT_LOCK flags because those flags can never be cleared
* once they are set.
*/
if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
!(mnt_flags & MNT_READONLY)) {
return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
!(mnt_flags & MNT_NOEXEC)) {
return -EPERM;
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
return -EPERM;
}
err = security_sb_remount(sb, data);
if (err)
return err;
......@@ -1937,7 +1974,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
lock_mount_hash();
mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
mnt->mnt.mnt_flags = mnt_flags;
touch_mnt_namespace(mnt->mnt_ns);
unlock_mount_hash();
......@@ -2122,7 +2159,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
*/
if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
flags |= MS_NODEV;
mnt_flags |= MNT_NODEV;
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
}
......@@ -2436,6 +2473,14 @@ long do_mount(const char *dev_name, const char *dir_name,
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
/* The default atime for remount is preservation */
if ((flags & MS_REMOUNT) &&
((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
MS_STRICTATIME)) == 0)) {
mnt_flags &= ~MNT_ATIME_MASK;
mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
}
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
......@@ -2972,13 +3017,13 @@ static void *mntns_get(struct task_struct *task)
struct mnt_namespace *ns = NULL;
struct nsproxy *nsproxy;
rcu_read_lock();
nsproxy = task_nsproxy(task);
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->mnt_ns;
get_mnt_ns(ns);
}
rcu_read_unlock();
task_unlock(task);
return ns;
}
......
......@@ -1205,7 +1205,7 @@ static const struct file_operations nfs_server_list_fops = {
.open = nfs_server_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.release = seq_release_net,
.owner = THIS_MODULE,
};
......@@ -1226,7 +1226,7 @@ static const struct file_operations nfs_volume_list_fops = {
.open = nfs_volume_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
.release = seq_release_net,
.owner = THIS_MODULE,
};
......@@ -1236,19 +1236,8 @@ static const struct file_operations nfs_volume_list_fops = {
*/
static int nfs_server_list_open(struct inode *inode, struct file *file)
{
struct seq_file *m;
int ret;
struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
ret = seq_open(file, &nfs_server_list_ops);
if (ret < 0)
return ret;
m = file->private_data;
m->private = net;
return 0;
return seq_open_net(inode, file, &nfs_server_list_ops,
sizeof(struct seq_net_private));
}
/*
......@@ -1256,7 +1245,7 @@ static int nfs_server_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
{
struct nfs_net *nn = net_generic(m->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* lock the list against modification */
spin_lock(&nn->nfs_client_lock);
......@@ -1268,7 +1257,7 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
{
struct nfs_net *nn = net_generic(p->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
return seq_list_next(v, &nn->nfs_client_list, pos);
}
......@@ -1278,7 +1267,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_server_list_stop(struct seq_file *p, void *v)
{
struct nfs_net *nn = net_generic(p->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
spin_unlock(&nn->nfs_client_lock);
}
......@@ -1289,7 +1278,7 @@ static void nfs_server_list_stop(struct seq_file *p, void *v)
static int nfs_server_list_show(struct seq_file *m, void *v)
{
struct nfs_client *clp;
struct nfs_net *nn = net_generic(m->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_client_list) {
......@@ -1321,19 +1310,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
*/
static int nfs_volume_list_open(struct inode *inode, struct file *file)
{
struct seq_file *m;
int ret;
struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info;
struct net *net = pid_ns->child_reaper->nsproxy->net_ns;
ret = seq_open(file, &nfs_volume_list_ops);
if (ret < 0)
return ret;
m = file->private_data;
m->private = net;
return 0;
return seq_open_net(inode, file, &nfs_server_list_ops,
sizeof(struct seq_net_private));
}
/*
......@@ -1341,7 +1319,7 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file)
*/
static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
{
struct nfs_net *nn = net_generic(m->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* lock the list against modification */
spin_lock(&nn->nfs_client_lock);
......@@ -1353,7 +1331,7 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos)
*/
static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
{
struct nfs_net *nn = net_generic(p->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
return seq_list_next(v, &nn->nfs_volume_list, pos);
}
......@@ -1363,7 +1341,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos)
*/
static void nfs_volume_list_stop(struct seq_file *p, void *v)
{
struct nfs_net *nn = net_generic(p->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(p), nfs_net_id);
spin_unlock(&nn->nfs_client_lock);
}
......@@ -1376,7 +1354,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
struct nfs_server *server;
struct nfs_client *clp;
char dev[8], fsid[17];
struct nfs_net *nn = net_generic(m->private, nfs_net_id);
struct nfs_net *nn = net_generic(seq_file_net(m), nfs_net_id);
/* display header on line 1 */
if (v == &nn->nfs_volume_list) {
......@@ -1407,6 +1385,45 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
return 0;
}
int nfs_fs_proc_net_init(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
struct proc_dir_entry *p;
nn->proc_nfsfs = proc_net_mkdir(net, "nfsfs", net->proc_net);
if (!nn->proc_nfsfs)
goto error_0;
/* a file of servers with which we're dealing */
p = proc_create("servers", S_IFREG|S_IRUGO,
nn->proc_nfsfs, &nfs_server_list_fops);
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
p = proc_create("volumes", S_IFREG|S_IRUGO,
nn->proc_nfsfs, &nfs_volume_list_fops);
if (!p)
goto error_2;
return 0;
error_2:
remove_proc_entry("servers", nn->proc_nfsfs);
error_1:
remove_proc_entry("fs/nfsfs", NULL);
error_0:
return -ENOMEM;
}
void nfs_fs_proc_net_exit(struct net *net)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
remove_proc_entry("volumes", nn->proc_nfsfs);
remove_proc_entry("servers", nn->proc_nfsfs);
remove_proc_entry("fs/nfsfs", NULL);
}
/*
* initialise the /proc/fs/nfsfs/ directory
*/
......@@ -1419,14 +1436,12 @@ int __init nfs_fs_proc_init(void)
goto error_0;
/* a file of servers with which we're dealing */
p = proc_create("servers", S_IFREG|S_IRUGO,
proc_fs_nfs, &nfs_server_list_fops);
p = proc_symlink("servers", proc_fs_nfs, "../../net/nfsfs/servers");
if (!p)
goto error_1;
/* a file of volumes that we have mounted */
p = proc_create("volumes", S_IFREG|S_IRUGO,
proc_fs_nfs, &nfs_volume_list_fops);
p = proc_symlink("volumes", proc_fs_nfs, "../../net/nfsfs/volumes");
if (!p)
goto error_2;
return 0;
......
......@@ -1840,11 +1840,12 @@ EXPORT_SYMBOL_GPL(nfs_net_id);
static int nfs_net_init(struct net *net)
{
nfs_clients_init(net);
return 0;
return nfs_fs_proc_net_init(net);
}
static void nfs_net_exit(struct net *net)
{
nfs_fs_proc_net_exit(net);
nfs_cleanup_cb_ident_idr(net);
}
......
......@@ -195,7 +195,16 @@ extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *,
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
extern int nfs_fs_proc_net_init(struct net *net);
extern void nfs_fs_proc_net_exit(struct net *net);
#else
static inline int nfs_fs_proc_net_init(struct net *net)
{
return 0;
}
static inline void nfs_fs_proc_net_exit(struct net *net)
{
}
static inline int nfs_fs_proc_init(void)
{
return 0;
......
......@@ -29,6 +29,9 @@ struct nfs_net {
#endif
spinlock_t nfs_client_lock;
struct timespec boot_time;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_nfsfs;
#endif
};
extern int nfs_net_id;
......
......@@ -23,6 +23,7 @@ proc-y += version.o
proc-y += softirqs.o
proc-y += namespaces.o
proc-y += self.o
proc-y += thread_self.o
proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
......
......@@ -2814,7 +2814,7 @@ static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter ite
return iter;
}
#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file *file, struct dir_context *ctx)
......@@ -2826,14 +2826,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
return 0;
if (pos == TGID_OFFSET - 1) {
if (pos == TGID_OFFSET - 2) {
struct inode *inode = ns->proc_self->d_inode;
if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
return 0;
iter.tgid = 0;
} else {
iter.tgid = pos - TGID_OFFSET;
ctx->pos = pos = pos + 1;
}
if (pos == TGID_OFFSET - 1) {
struct inode *inode = ns->proc_thread_self->d_inode;
if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
return 0;
ctx->pos = pos = pos + 1;
}
iter.tgid = pos - TGID_OFFSET;
iter.task = NULL;
for (iter = next_tgid(ns, iter);
iter.task;
......@@ -2862,6 +2867,9 @@ static const struct pid_entry tid_base_stuff[] = {
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
#endif
REG("environ", S_IRUSR, proc_environ_operations),
ONE("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
......
......@@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
int proc_fill_super(struct super_block *s)
{
struct inode *root_inode;
int ret;
s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
s->s_blocksize = 1024;
......@@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
return -ENOMEM;
}
return proc_setup_self(s);
ret = proc_setup_self(s);
if (ret) {
return ret;
}
return proc_setup_thread_self(s);
}
......@@ -230,6 +230,12 @@ static inline int proc_net_init(void) { return 0; }
*/
extern int proc_setup_self(struct super_block *);
/*
* proc_thread_self.c
*/
extern int proc_setup_thread_self(struct super_block *);
extern void proc_thread_self_init(void);
/*
* proc_sysctl.c
*/
......
......@@ -113,9 +113,11 @@ static struct net *get_proc_task_net(struct inode *dir)
rcu_read_lock();
task = pid_task(proc_pid(dir), PIDTYPE_PID);
if (task != NULL) {
ns = task_nsproxy(task);
task_lock(task);
ns = task->nsproxy;
if (ns != NULL)
net = get_net(ns->net_ns);
task_unlock(task);
}
rcu_read_unlock();
......@@ -224,7 +226,7 @@ static struct pernet_operations __net_initdata proc_net_ns_ops = {
int __init proc_net_init(void)
{
proc_symlink("net", NULL, "self/net");
proc_symlink("net", NULL, "thread-self/net");
return register_pernet_subsys(&proc_net_ns_ops);
}
......@@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
ns = (struct pid_namespace *)sb->s_fs_info;
if (ns->proc_self)
dput(ns->proc_self);
if (ns->proc_thread_self)
dput(ns->proc_thread_self);
kill_anon_super(sb);
put_pid_ns(ns);
}
......@@ -170,7 +172,8 @@ void __init proc_root_init(void)
return;
proc_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_thread_self_init();
proc_symlink("mounts", NULL, "thread-self/mounts");
proc_net_init();
......
#include <linux/sched.h>
#include <linux/namei.h>
#include <linux/slab.h>
#include <linux/pid_namespace.h>
#include "internal.h"
/*
* /proc/thread_self:
*/
static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
int buflen)
{
struct pid_namespace *ns = dentry->d_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];
if (!pid)
return -ENOENT;
sprintf(tmp, "%d/task/%d", tgid, pid);
return readlink_copy(buffer, buflen, tmp);
}
static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct pid_namespace *ns = dentry->d_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
pid_t pid = task_pid_nr_ns(current, ns);
char *name = ERR_PTR(-ENOENT);
if (pid) {
name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);
if (!name)
name = ERR_PTR(-ENOMEM);
else
sprintf(name, "%d/task/%d", tgid, pid);
}
nd_set_link(nd, name);
return NULL;
}
static const struct inode_operations proc_thread_self_inode_operations = {
.readlink = proc_thread_self_readlink,
.follow_link = proc_thread_self_follow_link,
.put_link = kfree_put_link,
};
static unsigned thread_self_inum;
int proc_setup_thread_self(struct super_block *s)
{
struct inode *root_inode = s->s_root->d_inode;
struct pid_namespace *ns = s->s_fs_info;
struct dentry *thread_self;
mutex_lock(&root_inode->i_mutex);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
struct inode *inode = new_inode_pseudo(s);
if (inode) {
inode->i_ino = thread_self_inum;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_op = &proc_thread_self_inode_operations;
d_add(thread_self, inode);
} else {
dput(thread_self);
thread_self = ERR_PTR(-ENOMEM);
}
} else {
thread_self = ERR_PTR(-ENOMEM);
}
mutex_unlock(&root_inode->i_mutex);
if (IS_ERR(thread_self)) {
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
return PTR_ERR(thread_self);
}
ns->proc_thread_self = thread_self;
return 0;
}
void __init proc_thread_self_init(void)
{
proc_alloc_inum(&thread_self_inum);
}
......@@ -232,17 +232,15 @@ static int mounts_open_common(struct inode *inode, struct file *file,
if (!task)
goto err;
rcu_read_lock();
nsp = task_nsproxy(task);
task_lock(task);
nsp = task->nsproxy;
if (!nsp || !nsp->mnt_ns) {
rcu_read_unlock();
task_unlock(task);
put_task_struct(task);
goto err;
}
ns = nsp->mnt_ns;
get_mnt_ns(ns);
rcu_read_unlock();
task_lock(task);
if (!task->fs) {
task_unlock(task);
put_task_struct(task);
......
......@@ -42,13 +42,20 @@ struct mnt_namespace;
* flag, consider how it interacts with shared mounts.
*/
#define MNT_SHARED_MASK (MNT_UNBINDABLE)
#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE)
#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \
| MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \
| MNT_READONLY)
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
#define MNT_INTERNAL 0x4000
#define MNT_LOCK_ATIME 0x040000
#define MNT_LOCK_NOEXEC 0x080000
#define MNT_LOCK_NOSUID 0x100000
#define MNT_LOCK_NODEV 0x200000
#define MNT_LOCK_READONLY 0x400000
#define MNT_LOCKED 0x800000
#define MNT_DOOMED 0x1000000
......
......@@ -40,32 +40,28 @@ extern struct nsproxy init_nsproxy;
* the namespaces access rules are:
*
* 1. only current task is allowed to change tsk->nsproxy pointer or
* any pointer on the nsproxy itself
* any pointer on the nsproxy itself. Current must hold the task_lock
* when changing tsk->nsproxy.
*
* 2. when accessing (i.e. reading) current task's namespaces - no
* precautions should be taken - just dereference the pointers
*
* 3. the access to other task namespaces is performed like this
* rcu_read_lock();
* nsproxy = task_nsproxy(tsk);
* task_lock(task);
* nsproxy = task->nsproxy;
* if (nsproxy != NULL) {
* / *
* * work with the namespaces here
* * e.g. get the reference on one of them
* * /
* } / *
* * NULL task_nsproxy() means that this task is
* * NULL task->nsproxy means that this task is
* * almost dead (zombie)
* * /
* rcu_read_unlock();
* task_unlock(task);
*
*/
static inline struct nsproxy *task_nsproxy(struct task_struct *tsk)
{
return rcu_dereference(tsk->nsproxy);
}
int copy_namespaces(unsigned long flags, struct task_struct *tsk);
void exit_task_namespaces(struct task_struct *tsk);
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
......
......@@ -33,6 +33,7 @@ struct pid_namespace {
#ifdef CONFIG_PROC_FS
struct vfsmount *proc_mnt;
struct dentry *proc_self;
struct dentry *proc_thread_self;
#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
struct bsd_acct_struct *bacct;
......
......@@ -154,11 +154,11 @@ static void *ipcns_get(struct task_struct *task)
struct ipc_namespace *ns = NULL;
struct nsproxy *nsproxy;
rcu_read_lock();
nsproxy = task_nsproxy(task);
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy)
ns = get_ipc_ns(nsproxy->ipc_ns);
rcu_read_unlock();
task_unlock(task);
return ns;
}
......
......@@ -204,20 +204,13 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
might_sleep();
task_lock(p);
ns = p->nsproxy;
p->nsproxy = new;
task_unlock(p);
rcu_assign_pointer(p->nsproxy, new);
if (ns && atomic_dec_and_test(&ns->count)) {
/*
* wait for others to get what they want from this nsproxy.
*
* cannot release this nsproxy via the call_rcu() since
* put_mnt_ns() will want to sleep
*/
synchronize_rcu();
if (ns && atomic_dec_and_test(&ns->count))
free_nsproxy(ns);
}
}
void exit_task_namespaces(struct task_struct *p)
......
......@@ -93,13 +93,13 @@ static void *utsns_get(struct task_struct *task)
struct uts_namespace *ns = NULL;
struct nsproxy *nsproxy;
rcu_read_lock();
nsproxy = task_nsproxy(task);
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy) {
ns = nsproxy->uts_ns;
get_uts_ns(ns);
}
rcu_read_unlock();
task_unlock(task);
return ns;
}
......
......@@ -373,9 +373,11 @@ struct net *get_net_ns_by_pid(pid_t pid)
tsk = find_task_by_vpid(pid);
if (tsk) {
struct nsproxy *nsproxy;
nsproxy = task_nsproxy(tsk);
task_lock(tsk);
nsproxy = tsk->nsproxy;
if (nsproxy)
net = get_net(nsproxy->net_ns);
task_unlock(tsk);
}
rcu_read_unlock();
return net;
......@@ -632,11 +634,11 @@ static void *netns_get(struct task_struct *task)
struct net *net = NULL;
struct nsproxy *nsproxy;
rcu_read_lock();
nsproxy = task_nsproxy(task);
task_lock(task);
nsproxy = task->nsproxy;
if (nsproxy)
net = get_net(nsproxy->net_ns);
rcu_read_unlock();
task_unlock(task);
return net;
}
......
......@@ -5,6 +5,7 @@ TARGETS += kcmp
TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mqueue
TARGETS += mount
TARGETS += net
TARGETS += ptrace
TARGETS += timers
......
# Makefile for mount selftests.
all: unprivileged-remount-test
unprivileged-remount-test: unprivileged-remount-test.c
gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
# Allow specific tests to be selected.
test_unprivileged_remount: unprivileged-remount-test
@if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
run_tests: all test_unprivileged_remount
clean:
rm -f unprivileged-remount-test
.PHONY: all test_unprivileged_remount
#define _GNU_SOURCE
#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <grp.h>
#include <stdbool.h>
#include <stdarg.h>
#ifndef CLONE_NEWNS
# define CLONE_NEWNS 0x00020000
#endif
#ifndef CLONE_NEWUTS
# define CLONE_NEWUTS 0x04000000
#endif
#ifndef CLONE_NEWIPC
# define CLONE_NEWIPC 0x08000000
#endif
#ifndef CLONE_NEWNET
# define CLONE_NEWNET 0x40000000
#endif
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
#ifndef CLONE_NEWPID
# define CLONE_NEWPID 0x20000000
#endif
#ifndef MS_RELATIME
#define MS_RELATIME (1 << 21)
#endif
#ifndef MS_STRICTATIME
#define MS_STRICTATIME (1 << 24)
#endif
static void die(char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
exit(EXIT_FAILURE);
}
static void write_file(char *filename, char *fmt, ...)
{
char buf[4096];
int fd;
ssize_t written;
int buf_len;
va_list ap;
va_start(ap, fmt);
buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
va_end(ap);
if (buf_len < 0) {
die("vsnprintf failed: %s\n",
strerror(errno));
}
if (buf_len >= sizeof(buf)) {
die("vsnprintf output truncated\n");
}
fd = open(filename, O_WRONLY);
if (fd < 0) {
die("open of %s failed: %s\n",
filename, strerror(errno));
}
written = write(fd, buf, buf_len);
if (written != buf_len) {
if (written >= 0) {
die("short write to %s\n", filename);
} else {
die("write to %s failed: %s\n",
filename, strerror(errno));
}
}
if (close(fd) != 0) {
die("close of %s failed: %s\n",
filename, strerror(errno));
}
}
static void create_and_enter_userns(void)
{
uid_t uid;
gid_t gid;
uid = getuid();
gid = getgid();
if (unshare(CLONE_NEWUSER) !=0) {
die("unshare(CLONE_NEWUSER) failed: %s\n",
strerror(errno));
}
write_file("/proc/self/uid_map", "0 %d 1", uid);
write_file("/proc/self/gid_map", "0 %d 1", gid);
if (setgroups(0, NULL) != 0) {
die("setgroups failed: %s\n",
strerror(errno));
}
if (setgid(0) != 0) {
die ("setgid(0) failed %s\n",
strerror(errno));
}
if (setuid(0) != 0) {
die("setuid(0) failed %s\n",
strerror(errno));
}
}
static
bool test_unpriv_remount(int mount_flags, int remount_flags, int invalid_flags)
{
pid_t child;
child = fork();
if (child == -1) {
die("fork failed: %s\n",
strerror(errno));
}
if (child != 0) { /* parent */
pid_t pid;
int status;
pid = waitpid(child, &status, 0);
if (pid == -1) {
die("waitpid failed: %s\n",
strerror(errno));
}
if (pid != child) {
die("waited for %d got %d\n",
child, pid);
}
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
}
create_and_enter_userns();
if (unshare(CLONE_NEWNS) != 0) {
die("unshare(CLONE_NEWNS) failed: %s\n",
strerror(errno));
}
if (mount("testing", "/tmp", "ramfs", mount_flags, NULL) != 0) {
die("mount of /tmp failed: %s\n",
strerror(errno));
}
create_and_enter_userns();
if (unshare(CLONE_NEWNS) != 0) {
die("unshare(CLONE_NEWNS) failed: %s\n",
strerror(errno));
}
if (mount("/tmp", "/tmp", "none",
MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
/* system("cat /proc/self/mounts"); */
die("remount of /tmp failed: %s\n",
strerror(errno));
}
if (mount("/tmp", "/tmp", "none",
MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
/* system("cat /proc/self/mounts"); */
die("remount of /tmp with invalid flags "
"succeeded unexpectedly\n");
}
exit(EXIT_SUCCESS);
}
static bool test_unpriv_remount_simple(int mount_flags)
{
return test_unpriv_remount(mount_flags, mount_flags, 0);
}
static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
{
return test_unpriv_remount(mount_flags, mount_flags, invalid_flags);
}
int main(int argc, char **argv)
{
if (!test_unpriv_remount_simple(MS_RDONLY|MS_NODEV)) {
die("MS_RDONLY malfunctions\n");
}
if (!test_unpriv_remount_simple(MS_NODEV)) {
die("MS_NODEV malfunctions\n");
}
if (!test_unpriv_remount_simple(MS_NOSUID|MS_NODEV)) {
die("MS_NOSUID malfunctions\n");
}
if (!test_unpriv_remount_simple(MS_NOEXEC|MS_NODEV)) {
die("MS_NOEXEC malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODEV,
MS_NOATIME|MS_NODEV))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODEV,
MS_NOATIME|MS_NODEV))
{
die("MS_STRICTATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODEV,
MS_STRICTATIME|MS_NODEV))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME|MS_NODEV,
MS_NOATIME|MS_NODEV))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME|MS_NODEV,
MS_NOATIME|MS_NODEV))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME|MS_NODEV,
MS_STRICTATIME|MS_NODEV))
{
die("MS_RELATIME malfunctions\n");
}
if (!test_unpriv_remount(MS_STRICTATIME|MS_NODEV, MS_NODEV,
MS_NOATIME|MS_NODEV))
{
die("Default atime malfunctions\n");
}
return EXIT_SUCCESS;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment