Commit 92dbc9de authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ovl-update-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs

Pull overlayfs updates from Miklos Szeredi:

 - Allow unprivileged mounting in a user namespace.

   For quite some time the security model of overlayfs has been that
   operations on underlying layers shall be performed with the
   privileges of the mounting task.

   This way an unprvileged user cannot gain privileges by the act of
   mounting an overlayfs instance. A full audit of all function calls
   made by the overlayfs code has been performed to see whether they
   conform to this model, and this branch contains some fixes in this
   regard.

 - Support running on copied filesystem images by optionally disabling
   UUID verification.

 - Bug fixes as well as documentation updates.

* tag 'ovl-update-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs:
  ovl: unprivieged mounts
  ovl: do not get metacopy for userxattr
  ovl: do not fail because of O_NOATIME
  ovl: do not fail when setting origin xattr
  ovl: user xattr
  ovl: simplify file splice
  ovl: make ioctl() safe
  ovl: check privs before decoding file handle
  vfs: verify source area in vfs_dedupe_file_range_one()
  vfs: move cap_convert_nscap() call into vfs_setxattr()
  ovl: fix incorrect extent info in metacopy case
  ovl: expand warning in ovl_d_real()
  ovl: document lower modification caveats
  ovl: warn about orphan metacopy
  ovl: doc clarification
  ovl: introduce new "uuid=off" option for inodes index feature
  ovl: propagate ovl_fs to ovl_decode_real_fh and ovl_encode_real_fh
parents 65de0b89 459c7c56
......@@ -97,11 +97,13 @@ directory trees to be in the same filesystem and there is no
requirement that the root of a filesystem be given for either upper or
lower.
The lower filesystem can be any filesystem supported by Linux and does
not need to be writable. The lower filesystem can even be another
overlayfs. The upper filesystem will normally be writable and if it
is it must support the creation of trusted.* extended attributes, and
must provide valid d_type in readdir responses, so NFS is not suitable.
A wide range of filesystems supported by Linux can be the lower filesystem,
but not all filesystems that are mountable by Linux have the features
needed for OverlayFS to work. The lower filesystem does not need to be
writable. The lower filesystem can even be another overlayfs. The upper
filesystem will normally be writable and if it is it must support the
creation of trusted.* and/or user.* extended attributes, and must provide
valid d_type in readdir responses, so NFS is not suitable.
A read-only overlay of two read-only filesystems may use any
filesystem type.
......@@ -467,14 +469,18 @@ summarized in the `Inode properties`_ table above.
Changes to underlying filesystems
---------------------------------
Offline changes, when the overlay is not mounted, are allowed to either
the upper or the lower trees.
Changes to the underlying filesystems while part of a mounted overlay
filesystem are not allowed. If the underlying filesystem is changed,
the behavior of the overlay is undefined, though it will not result in
a crash or deadlock.
Offline changes, when the overlay is not mounted, are allowed to the
upper tree. Offline changes to the lower tree are only allowed if the
"metadata only copy up", "inode index", and "redirect_dir" features
have not been used. If the lower tree is modified and any of these
features has been used, the behavior of the overlay is undefined,
though it will not result in a crash or deadlock.
When the overlay NFS export feature is enabled, overlay filesystems
behavior on offline changes of the underlying lower layer is different
than the behavior when NFS export is disabled.
......@@ -563,6 +569,11 @@ This verification may cause significant overhead in some cases.
Note: the mount options index=off,nfs_export=on are conflicting for a
read-write mount and will result in an error.
Note: the mount option uuid=off can be used to replace UUID of the underlying
filesystem in file handles with null, and effectively disable UUID checks. This
can be useful in case the underlying disk is copied and the UUID of this copy
is changed. This is only applicable if all lower/upper/work directories are on
the same filesystem, otherwise it will fallback to normal behaviour.
Volatile mount
--------------
......@@ -583,6 +594,15 @@ fresh one. In very limited cases where the user knows that the system has
not crashed and contents of upperdir are intact, The "volatile" directory
can be removed.
User xattr
----------
The the "-o userxattr" mount option forces overlayfs to use the
"user.overlay." xattr namespace instead of "trusted.overlay.". This is
useful for unprivileged mounting of overlayfs.
Testsuite
---------
......
......@@ -275,7 +275,8 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
return err;
}
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
bool is_upper)
{
struct ovl_fh *fh;
int fh_type, dwords;
......@@ -319,7 +320,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
if (is_upper)
fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
fh->fb.len = sizeof(fh->fb) + buflen;
fh->fb.uuid = *uuid;
if (ofs->config.uuid)
fh->fb.uuid = *uuid;
return fh;
......@@ -328,8 +330,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
return ERR_PTR(err);
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper)
int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
struct dentry *lower, struct dentry *upper)
{
const struct ovl_fh *fh = NULL;
int err;
......@@ -340,7 +342,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
* up and a pure upper inode.
*/
if (ovl_can_decode_fh(lower->d_sb)) {
fh = ovl_encode_real_fh(lower, false);
fh = ovl_encode_real_fh(ofs, lower, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
}
......@@ -352,7 +354,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
fh ? fh->fb.len : 0, 0);
kfree(fh);
return err;
/* Ignore -EPERM from setting "user.*" on symlink/special */
return err == -EPERM ? 0 : err;
}
/* Store file handle of @upper dir in @index dir entry */
......@@ -362,7 +365,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
const struct ovl_fh *fh;
int err;
fh = ovl_encode_real_fh(upper, true);
fh = ovl_encode_real_fh(ofs, upper, true);
if (IS_ERR(fh))
return PTR_ERR(fh);
......@@ -380,6 +383,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
struct dentry *upper)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = d_inode(indexdir);
struct dentry *index = NULL;
......@@ -402,7 +406,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
return -EIO;
err = ovl_get_index_name(origin, &name);
err = ovl_get_index_name(ofs, origin, &name);
if (err)
return err;
......@@ -411,7 +415,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
if (IS_ERR(temp))
goto free_name;
err = ovl_set_upper_fh(OVL_FS(dentry->d_sb), upper, temp);
err = ovl_set_upper_fh(ofs, upper, temp);
if (err)
goto out;
......@@ -521,7 +525,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
* hard link.
*/
if (c->origin) {
err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp);
if (err)
return err;
}
......@@ -700,7 +704,7 @@ static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
{
int err;
struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
bool to_index = false;
/*
......@@ -722,7 +726,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
if (to_index) {
c->destdir = ovl_indexdir(c->dentry->d_sb);
err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
if (err)
return err;
} else if (WARN_ON(!c->parent)) {
......
......@@ -211,7 +211,8 @@ static int ovl_check_encode_origin(struct dentry *dentry)
return 1;
}
static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
......@@ -226,7 +227,7 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
goto fail;
/* Encode an upper or lower file handle */
fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
ovl_dentry_upper(dentry), !enc_lower);
if (IS_ERR(fh))
return PTR_ERR(fh);
......@@ -249,6 +250,7 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct ovl_fs *ofs = OVL_FS(inode->i_sb);
struct dentry *dentry;
int bytes, buflen = *max_len << 2;
......@@ -260,7 +262,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
if (WARN_ON(!dentry))
return FILEID_INVALID;
bytes = ovl_dentry_to_fid(dentry, fid, buflen);
bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
dput(dentry);
if (bytes <= 0)
return FILEID_INVALID;
......@@ -680,7 +682,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
if (!ovl_upper_mnt(ofs))
return ERR_PTR(-EACCES);
upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
if (IS_ERR_OR_NULL(upper))
return upper;
......
......@@ -53,9 +53,10 @@ static struct file *ovl_open_realfile(const struct file *file,
err = inode_permission(realinode, MAY_OPEN | acc_mode);
if (err) {
realfile = ERR_PTR(err);
} else if (!inode_owner_or_capable(realinode)) {
realfile = ERR_PTR(-EPERM);
} else {
if (!inode_owner_or_capable(realinode))
flags &= ~O_NOATIME;
realfile = open_with_fake_path(&file->f_path, flags, realinode,
current_cred());
}
......@@ -75,12 +76,6 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
struct inode *inode = file_inode(file);
int err;
flags |= OVL_OPEN_FLAGS;
/* If some flag changed that cannot be changed then something's amiss */
if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
return -EIO;
flags &= OVL_SETFL_MASK;
if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
......@@ -397,48 +392,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
ssize_t ret;
struct fd real;
const struct cred *old_cred;
ret = ovl_real_fdget(in, &real);
if (ret)
return ret;
old_cred = ovl_override_creds(file_inode(in)->i_sb);
ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
revert_creds(old_cred);
ovl_file_accessed(in);
fdput(real);
return ret;
}
static ssize_t
ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
struct fd real;
const struct cred *old_cred;
ssize_t ret;
ret = ovl_real_fdget(out, &real);
if (ret)
return ret;
old_cred = ovl_override_creds(file_inode(out)->i_sb);
ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
revert_creds(old_cred);
ovl_file_accessed(out);
fdput(real);
return ret;
}
static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct fd real;
......@@ -541,46 +494,31 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fd real;
const struct cred *old_cred;
long ret;
ret = ovl_real_fdget(file, &real);
if (ret)
return ret;
old_cred = ovl_override_creds(file_inode(file)->i_sb);
ret = security_file_ioctl(real.file, cmd, arg);
if (!ret)
if (!ret) {
/*
* Don't override creds, since we currently can't safely check
* permissions before doing so.
*/
ret = vfs_ioctl(real.file, cmd, arg);
revert_creds(old_cred);
}
fdput(real);
return ret;
}
static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
{
unsigned int flags = 0;
if (iflags & S_SYNC)
flags |= FS_SYNC_FL;
if (iflags & S_APPEND)
flags |= FS_APPEND_FL;
if (iflags & S_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
if (iflags & S_NOATIME)
flags |= FS_NOATIME_FL;
return flags;
}
static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
unsigned long arg, unsigned int flags)
unsigned long arg)
{
long ret;
struct inode *inode = file_inode(file);
unsigned int oldflags;
if (!inode_owner_or_capable(inode))
return -EACCES;
......@@ -591,10 +529,13 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
inode_lock(inode);
/* Check the capability before cred override */
oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
if (ret)
/*
* Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
* capability.
*/
ret = -EPERM;
if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
!capable(CAP_LINUX_IMMUTABLE))
goto unlock;
ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
......@@ -613,46 +554,6 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
}
static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
unsigned long arg)
{
unsigned int flags;
if (get_user(flags, (int __user *) arg))
return -EFAULT;
return ovl_ioctl_set_flags(file, cmd, arg, flags);
}
static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
{
unsigned int flags = 0;
if (xflags & FS_XFLAG_SYNC)
flags |= FS_SYNC_FL;
if (xflags & FS_XFLAG_APPEND)
flags |= FS_APPEND_FL;
if (xflags & FS_XFLAG_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
if (xflags & FS_XFLAG_NOATIME)
flags |= FS_NOATIME_FL;
return flags;
}
static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fsxattr fa;
memset(&fa, 0, sizeof(fa));
if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
return -EFAULT;
return ovl_ioctl_set_flags(file, cmd, arg,
ovl_fsxflags_to_fsflags(fa.fsx_xflags));
}
long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long ret;
......@@ -663,12 +564,9 @@ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
ret = ovl_real_ioctl(file, cmd, arg);
break;
case FS_IOC_SETFLAGS:
ret = ovl_ioctl_set_fsflags(file, cmd, arg);
break;
case FS_IOC_FSSETXATTR:
ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
case FS_IOC_SETFLAGS:
ret = ovl_ioctl_set_flags(file, cmd, arg);
break;
default:
......@@ -801,8 +699,8 @@ const struct file_operations ovl_file_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ovl_compat_ioctl,
#endif
.splice_read = ovl_splice_read,
.splice_write = ovl_splice_write,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.copy_file_range = ovl_copy_file_range,
.remap_file_range = ovl_remap_file_range,
......
......@@ -329,8 +329,14 @@ static const char *ovl_get_link(struct dentry *dentry,
bool ovl_is_private_xattr(struct super_block *sb, const char *name)
{
return strncmp(name, OVL_XATTR_PREFIX,
sizeof(OVL_XATTR_PREFIX) - 1) == 0;
struct ovl_fs *ofs = sb->s_fs_info;
if (ofs->config.userxattr)
return strncmp(name, OVL_XATTR_USER_PREFIX,
sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
else
return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
}
int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
......@@ -476,7 +482,7 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
int err;
struct inode *realinode = ovl_inode_real(inode);
struct inode *realinode = ovl_inode_realdata(inode);
const struct cred *old_cred;
if (!realinode->i_op->fiemap)
......@@ -690,7 +696,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
* For the first, copy up case, the union nlink does not change, whether the
* operation succeeds or fails, but the upper inode nlink may change.
* Therefore, before copy up, we store the union nlink value relative to the
* lower inode nlink in the index inode xattr trusted.overlay.nlink.
* lower inode nlink in the index inode xattr .overlay.nlink.
*
* For the second, upper hardlink case, the union nlink should be incremented
* or decremented IFF the operation succeeds, aligned with nlink change of the
......
......@@ -150,17 +150,22 @@ static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *dentry,
goto out;
}
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected)
struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
struct vfsmount *mnt, bool connected)
{
struct dentry *real;
int bytes;
if (!capable(CAP_DAC_READ_SEARCH))
return NULL;
/*
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
* In case of uuid=off option just make sure that stored uuid is null.
*/
if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
!uuid_is_null(&fh->fb.uuid))
return NULL;
bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
......@@ -354,7 +359,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
ofs->layers[i].fs->bad_uuid)
continue;
origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
connected);
if (origin)
break;
......@@ -450,7 +455,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
struct ovl_fh *fh;
int err;
fh = ovl_encode_real_fh(real, is_upper);
fh = ovl_encode_real_fh(ofs, real, is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh)) {
fh = NULL;
......@@ -488,7 +493,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
if (IS_ERR_OR_NULL(fh))
return ERR_CAST(fh);
upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
kfree(fh);
if (IS_ERR_OR_NULL(upper))
......@@ -640,12 +645,13 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
* index dir was cleared. Either way, that index cannot be used to indentify
* the overlay inode.
*/
int ovl_get_index_name(struct dentry *origin, struct qstr *name)
int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
struct qstr *name)
{
struct ovl_fh *fh;
int err;
fh = ovl_encode_real_fh(origin, false);
fh = ovl_encode_real_fh(ofs, origin, false);
if (IS_ERR(fh))
return PTR_ERR(fh);
......@@ -694,7 +700,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
bool is_dir = d_is_dir(origin);
int err;
err = ovl_get_index_name(origin, &name);
err = ovl_get_index_name(ofs, origin, &name);
if (err)
return ERR_PTR(err);
......@@ -805,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
if (err)
return err;
err = ovl_set_origin(dentry, lower, upper);
err = ovl_set_origin(ofs, dentry, lower, upper);
if (!err)
err = ovl_set_impure(dentry->d_parent, upper->d_parent);
......@@ -1003,6 +1009,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
* Just make sure a corresponding data dentry has been found.
*/
if (d.metacopy || (uppermetacopy && !ctr)) {
pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
dentry);
err = -EIO;
goto out_put;
} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
......
......@@ -22,7 +22,9 @@ enum ovl_path_type {
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
#define OVL_TYPE_ORIGIN(type) ((type) & __OVL_PATH_ORIGIN)
#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
#define OVL_XATTR_NAMESPACE "overlay."
#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
enum ovl_xattr {
OVL_XATTR_OPAQUE,
......@@ -113,10 +115,10 @@ struct ovl_fh {
#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
offsetof(struct ovl_fb, fid))
extern const char *ovl_xattr_table[];
extern const char *const ovl_xattr_table[][2];
static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
{
return ovl_xattr_table[ox];
return ovl_xattr_table[ox][ofs->config.userxattr];
}
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
......@@ -383,8 +385,8 @@ static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
}
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
struct vfsmount *mnt, bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
struct dentry *upperdentry, struct ovl_path **stackp);
int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
......@@ -392,7 +394,8 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
int ovl_get_index_name(struct dentry *origin, struct qstr *name);
int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
struct qstr *name);
struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
struct dentry *origin, bool verify);
......@@ -514,9 +517,10 @@ int ovl_maybe_copy_up(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
struct dentry *new);
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
struct dentry *upper);
struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
bool is_upper);
int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
struct dentry *lower, struct dentry *upper);
/* export.c */
extern const struct export_operations ovl_export_operations;
......@@ -14,9 +14,11 @@ struct ovl_config {
bool redirect_follow;
const char *redirect_mode;
bool index;
bool uuid;
bool nfs_export;
int xino;
bool metacopy;
bool userxattr;
bool ovl_volatile;
};
......
......@@ -79,7 +79,7 @@ static void ovl_dentry_release(struct dentry *dentry)
static struct dentry *ovl_d_real(struct dentry *dentry,
const struct inode *inode)
{
struct dentry *real;
struct dentry *real = NULL, *lower;
/* It's an overlay file */
if (inode && d_inode(dentry) == inode)
......@@ -98,9 +98,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
return real;
real = ovl_dentry_lowerdata(dentry);
if (!real)
lower = ovl_dentry_lowerdata(dentry);
if (!lower)
goto bug;
real = lower;
/* Handle recursion */
real = d_real(real, inode);
......@@ -108,8 +109,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
if (!inode || inode == d_inode(real))
return real;
bug:
WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
__func__, dentry, inode ? inode->i_sb->s_id : "NULL",
inode ? inode->i_ino : 0, real,
real && d_inode(real) ? d_inode(real)->i_ino : 0);
return dentry;
}
......@@ -356,6 +359,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
if (ofs->config.index != ovl_index_def)
seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
if (!ofs->config.uuid)
seq_puts(m, ",uuid=off");
if (ofs->config.nfs_export != ovl_nfs_export_def)
seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
"on" : "off");
......@@ -410,7 +415,10 @@ enum {
OPT_REDIRECT_DIR,
OPT_INDEX_ON,
OPT_INDEX_OFF,
OPT_UUID_ON,
OPT_UUID_OFF,
OPT_NFS_EXPORT_ON,
OPT_USERXATTR,
OPT_NFS_EXPORT_OFF,
OPT_XINO_ON,
OPT_XINO_OFF,
......@@ -429,6 +437,9 @@ static const match_table_t ovl_tokens = {
{OPT_REDIRECT_DIR, "redirect_dir=%s"},
{OPT_INDEX_ON, "index=on"},
{OPT_INDEX_OFF, "index=off"},
{OPT_USERXATTR, "userxattr"},
{OPT_UUID_ON, "uuid=on"},
{OPT_UUID_OFF, "uuid=off"},
{OPT_NFS_EXPORT_ON, "nfs_export=on"},
{OPT_NFS_EXPORT_OFF, "nfs_export=off"},
{OPT_XINO_ON, "xino=on"},
......@@ -549,6 +560,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
index_opt = true;
break;
case OPT_UUID_ON:
config->uuid = true;
break;
case OPT_UUID_OFF:
config->uuid = false;
break;
case OPT_NFS_EXPORT_ON:
config->nfs_export = true;
nfs_export_opt = true;
......@@ -585,6 +604,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
config->ovl_volatile = true;
break;
case OPT_USERXATTR:
config->userxattr = true;
break;
default:
pr_err("unrecognized mount option \"%s\" or missing value\n",
p);
......@@ -688,6 +711,28 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
}
}
/* Resolve userxattr -> !redirect && !metacopy dependency */
if (config->userxattr) {
if (config->redirect_follow && redirect_opt) {
pr_err("conflicting options: userxattr,redirect_dir=%s\n",
config->redirect_mode);
return -EINVAL;
}
if (config->metacopy && metacopy_opt) {
pr_err("conflicting options: userxattr,metacopy=on\n");
return -EINVAL;
}
/*
* Silently disable default setting of redirect and metacopy.
* This shall be the default in the future as well: these
* options must be explicitly enabled if used together with
* userxattr.
*/
config->redirect_dir = config->redirect_follow = false;
config->metacopy = false;
}
return 0;
}
......@@ -1037,8 +1082,14 @@ ovl_posix_acl_default_xattr_handler = {
.set = ovl_posix_acl_xattr_set,
};
static const struct xattr_handler ovl_own_xattr_handler = {
.prefix = OVL_XATTR_PREFIX,
static const struct xattr_handler ovl_own_trusted_xattr_handler = {
.prefix = OVL_XATTR_TRUSTED_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
static const struct xattr_handler ovl_own_user_xattr_handler = {
.prefix = OVL_XATTR_USER_PREFIX,
.get = ovl_own_xattr_get,
.set = ovl_own_xattr_set,
};
......@@ -1049,12 +1100,22 @@ static const struct xattr_handler ovl_other_xattr_handler = {
.set = ovl_other_xattr_set,
};
static const struct xattr_handler *ovl_xattr_handlers[] = {
static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
#ifdef CONFIG_FS_POSIX_ACL
&ovl_posix_acl_access_xattr_handler,
&ovl_posix_acl_default_xattr_handler,
#endif
&ovl_own_xattr_handler,
&ovl_own_trusted_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
static const struct xattr_handler *ovl_user_xattr_handlers[] = {
#ifdef CONFIG_FS_POSIX_ACL
&ovl_posix_acl_access_xattr_handler,
&ovl_posix_acl_default_xattr_handler,
#endif
&ovl_own_user_xattr_handler,
&ovl_other_xattr_handler,
NULL
};
......@@ -1317,7 +1378,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
/*
* Check if upper/work fs supports trusted.overlay.* xattr
* Check if upper/work fs supports (trusted|user).overlay.* xattr
*/
err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
if (err) {
......@@ -1456,10 +1517,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
/*
* Verify upper root is exclusively associated with index dir.
* Older kernels stored upper fh in "trusted.overlay.origin"
* Older kernels stored upper fh in ".overlay.origin"
* xattr. If that xattr exists, verify that it is a match to
* upper dir file handle. In any case, verify or set xattr
* "trusted.overlay.upper" to indicate that index may have
* ".overlay.upper" to indicate that index may have
* directory entries.
*/
if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
......@@ -1877,6 +1938,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
ofs->share_whiteout = true;
ofs->config.index = ovl_index_def;
ofs->config.uuid = true;
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
......@@ -1956,6 +2018,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
if (!ovl_upper_mnt(ofs))
sb->s_flags |= SB_RDONLY;
if (!ofs->config.uuid && ofs->numfs > 1) {
pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
ofs->config.uuid = true;
}
if (!ovl_force_readonly(ofs) && ofs->config.index) {
err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
if (err)
......@@ -1991,7 +2058,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
sb->s_magic = OVERLAYFS_SUPER_MAGIC;
sb->s_xattr = ovl_xattr_handlers;
sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
sb->s_flags |= SB_POSIXACL;
sb->s_iflags |= SB_I_SKIP_SYNC;
......@@ -2028,6 +2096,7 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
static struct file_system_type ovl_fs_type = {
.owner = THIS_MODULE,
.name = "overlay",
.fs_flags = FS_USERNS_MOUNT,
.mount = ovl_mount,
.kill_sb = kill_anon_super,
};
......
......@@ -50,6 +50,9 @@ const struct cred *ovl_override_creds(struct super_block *sb)
*/
int ovl_can_decode_fh(struct super_block *sb)
{
if (!capable(CAP_DAC_READ_SEARCH))
return 0;
if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
return 0;
......@@ -582,9 +585,10 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
#define OVL_XATTR_METACOPY_POSTFIX "metacopy"
#define OVL_XATTR_TAB_ENTRY(x) \
[x] = OVL_XATTR_PREFIX x ## _POSTFIX
[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
[true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
const char *ovl_xattr_table[] = {
const char *const ovl_xattr_table[][2] = {
OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
......@@ -716,6 +720,7 @@ bool ovl_need_index(struct dentry *dentry)
/* Caller must hold OVL_I(inode)->lock */
static void ovl_cleanup_index(struct dentry *dentry)
{
struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
struct inode *dir = indexdir->d_inode;
struct dentry *lowerdentry = ovl_dentry_lower(dentry);
......@@ -725,7 +730,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
struct qstr name = { };
int err;
err = ovl_get_index_name(lowerdentry, &name);
err = ovl_get_index_name(ofs, lowerdentry, &name);
if (err)
goto fail;
......@@ -879,6 +884,13 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry)
if (res < 0) {
if (res == -ENODATA || res == -EOPNOTSUPP)
return 0;
/*
* getxattr on user.* may fail with EACCES in case there's no
* read permission on the inode. Not much we can do, other than
* tell the caller that this is not a metacopy inode.
*/
if (ofs->config.userxattr && res == -EACCES)
return 0;
goto out;
}
......
......@@ -456,8 +456,16 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
if (ret)
return ret;
/*
* This is redundant if called from vfs_dedupe_file_range(), but other
* callers need it and it's not performance sesitive...
*/
ret = remap_verify_area(src_file, src_pos, len, false);
if (ret)
goto out_drop_write;
ret = remap_verify_area(dst_file, dst_pos, len, true);
if (ret < 0)
if (ret)
goto out_drop_write;
ret = -EPERM;
......
......@@ -276,8 +276,16 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
{
struct inode *inode = dentry->d_inode;
struct inode *delegated_inode = NULL;
const void *orig_value = value;
int error;
if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
error = cap_convert_nscap(dentry, &value, size);
if (error < 0)
return error;
size = error;
}
retry_deleg:
inode_lock(inode);
error = __vfs_setxattr_locked(dentry, name, value, size, flags,
......@@ -289,6 +297,9 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (!error)
goto retry_deleg;
}
if (value != orig_value)
kfree(value);
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
......@@ -537,12 +548,6 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_from_user(kvalue, size);
else if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
error = cap_convert_nscap(d, &kvalue, size);
if (error < 0)
goto out;
size = error;
}
}
error = vfs_setxattr(d, kname, kvalue, size, flags);
......
......@@ -270,6 +270,6 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
/* audit system wants to get cap info from files as well */
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
extern int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size);
extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size);
#endif /* !_LINUX_CAPABILITY_H */
......@@ -473,7 +473,7 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
*
* If all is ok, we return the new size, on error return < 0.
*/
int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
{
struct vfs_ns_cap_data *nscap;
uid_t nsrootid;
......@@ -516,7 +516,6 @@ int cap_convert_nscap(struct dentry *dentry, void **ivalue, size_t size)
nscap->magic_etc = cpu_to_le32(nsmagic);
memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
kvfree(*ivalue);
*ivalue = nscap;
return newsize;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment