Commit 47d83c19 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'pptrs-6.10_2024-04-23' of...

Merge tag 'pptrs-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeC

xfs: Parent Pointers

This is the latest parent pointer attributes for xfs.  The goal of this
patch set is to add a parent pointer attribute to each inode.  The
attribute name containing the parent inode, generation, and directory
offset, while the  attribute value contains the file name.  This feature
will enable future optimizations for online scrub, shrink, nfs handles,
verity, or any other feature that could make use of quickly deriving an
inodes path from the mount point.

Directory parent pointers are stored as namespaced extended attributes
of a file.  Because parent pointers are an indivisible tuple of
(dirent_name, parent_ino, parent_gen) we cannot use the usual attr name
lookup functions to find a parent pointer.  This is solvable by
introducing a new lookup mode that checks both the name and the value of
the xattr.

Therefore, introduce this new name-value lookup mode that's gated on the
XFS_ATTR_PARENT namespace.  This requires the introduction of new
opcodes for the extended attribute update log intent items, which
actually means that parent pointers (itself an INCOMPAT feature) does
not depend on the LOGGED_XATTRS log incompat feature bit.

To reduce collisions on the dirent names of parent pointers, introduce a
new attr hash mode that is the dir2 namehash of the dirent name xor'd
with the parent inode number.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'pptrs-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: enable parent pointers
  xfs: drop compatibility minimum log size computations for reflink
  xfs: fix unit conversion error in xfs_log_calc_max_attrsetm_res
  xfs: add a incompat feature bit for parent pointers
  xfs: don't remove the attr fork when parent pointers are enabled
  xfs: add parent pointer ioctls
  xfs: split out handle management helpers a bit
  xfs: move handle ioctl code to xfs_handle.c
  xfs: pass the attr value to put_listent when possible
  xfs: don't return XFS_ATTR_PARENT attributes via listxattr
  xfs: Add parent pointers to xfs_cross_rename
  xfs: Add parent pointers to rename
  xfs: remove parent pointers in unlink
  xfs: add parent attributes to symlink
  xfs: add parent attributes to link
  xfs: parent pointer attribute creation
  xfs: create a hashname function for parent pointers
  xfs: extend transaction reservations for parent attributes
  xfs: add parent pointer validator functions
  xfs: Expose init_xattrs in xfs_create_tmpfile
  xfs: record inode generation in xattr update log intent items
  xfs: create attr log item opcodes and formats for parent pointers
  xfs: refactor xfs_is_using_logged_xattrs checks in attr item recovery
  xfs: allow xattr matching on name and value for parent pointers
  xfs: define parent pointer ondisk extended attribute format
  xfs: add parent pointer support to attribute code
  xfs: create a separate hashname function for extended attributes
  xfs: move xfs_attr_defer_add to xfs_attr_item.c
  xfs: check the flags earlier in xfs_attr_match
  xfs: rearrange xfs_attr_match parameters
parents d7d02f75 67ac7091
......@@ -42,6 +42,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_inode_buf.o \
xfs_log_rlimit.o \
xfs_ag_resv.o \
xfs_parent.o \
xfs_rmap.o \
xfs_rmap_btree.o \
xfs_refcount.o \
......@@ -50,6 +51,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_symlink_remote.o \
xfs_trans_inode.o \
xfs_trans_resv.o \
xfs_trans_space.o \
xfs_types.o \
)
# xfs_rtbitmap is shared with libxfs
......@@ -76,6 +78,7 @@ xfs-y += xfs_aops.o \
xfs_fsmap.o \
xfs_fsops.o \
xfs_globals.o \
xfs_handle.o \
xfs_health.o \
xfs_icache.o \
xfs_ioctl.o \
......
......@@ -26,6 +26,7 @@
#include "xfs_trace.h"
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
#include "xfs_parent.h"
struct kmem_cache *xfs_attr_intent_cache;
......@@ -280,7 +281,7 @@ xfs_attr_get(
args->owner = args->dp->i_ino;
args->geo = args->dp->i_mount->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
args->hashval = xfs_da_hashname(args->name, args->namelen);
xfs_attr_sethash(args);
/* Entirely possible to look up a name which doesn't exist */
args->op_flags = XFS_DA_OP_OKNOENT;
......@@ -415,6 +416,50 @@ xfs_attr_sf_addname(
return error;
}
/* Compute the hash value for a user/root/secure extended attribute */
xfs_dahash_t
xfs_attr_hashname(
const uint8_t *name,
int namelen)
{
return xfs_da_hashname(name, namelen);
}
/* Compute the hash value for any extended attribute from any namespace. */
xfs_dahash_t
xfs_attr_hashval(
struct xfs_mount *mp,
unsigned int attr_flags,
const uint8_t *name,
int namelen,
const void *value,
int valuelen)
{
ASSERT(xfs_attr_check_namespace(attr_flags));
if (attr_flags & XFS_ATTR_PARENT)
return xfs_parent_hashattr(mp, name, namelen, value, valuelen);
return xfs_attr_hashname(name, namelen);
}
/*
* PPTR_REPLACE operations require the caller to set the old and new names and
* values explicitly. Update the canonical fields to the new name and value
* here now that the removal phase has finished.
*/
static void
xfs_attr_update_pptr_replace_args(
struct xfs_da_args *args)
{
ASSERT(args->new_namelen > 0);
args->name = args->new_name;
args->namelen = args->new_namelen;
args->value = args->new_value;
args->valuelen = args->new_valuelen;
xfs_attr_sethash(args);
}
/*
* Handle the state change on completion of a multi-state attr operation.
*
......@@ -435,6 +480,8 @@ xfs_attr_complete_op(
if (!(args->op_flags & XFS_DA_OP_REPLACE))
replace_state = XFS_DAS_DONE;
else if (xfs_attr_intent_op(attr) == XFS_ATTRI_OP_FLAGS_PPTR_REPLACE)
xfs_attr_update_pptr_replace_args(args);
args->op_flags &= ~XFS_DA_OP_REPLACE;
args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
......@@ -901,37 +948,6 @@ xfs_attr_lookup(
return error;
}
static void
xfs_attr_defer_add(
struct xfs_da_args *args,
unsigned int op_flags)
{
struct xfs_attr_intent *new;
new = kmem_cache_zalloc(xfs_attr_intent_cache,
GFP_KERNEL | __GFP_NOFAIL);
new->xattri_op_flags = op_flags;
new->xattri_da_args = args;
switch (op_flags) {
case XFS_ATTRI_OP_FLAGS_SET:
new->xattri_dela_state = xfs_attr_init_add_state(args);
break;
case XFS_ATTRI_OP_FLAGS_REPLACE:
new->xattri_dela_state = xfs_attr_init_replace_state(args);
break;
case XFS_ATTRI_OP_FLAGS_REMOVE:
new->xattri_dela_state = xfs_attr_init_remove_state(args);
break;
default:
ASSERT(0);
}
xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type);
trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
}
int
xfs_attr_set(
struct xfs_da_args *args,
......@@ -956,7 +972,7 @@ xfs_attr_set(
args->owner = args->dp->i_ino;
args->geo = mp->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
args->hashval = xfs_da_hashname(args->name, args->namelen);
xfs_attr_sethash(args);
/*
* We have no control over the attribute names that userspace passes us
......@@ -1021,14 +1037,14 @@ xfs_attr_set(
case -EEXIST:
if (op == XFS_ATTRUPDATE_REMOVE) {
/* if no value, we are performing a remove operation */
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE);
xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE);
break;
}
/* Pure create fails if the attr already exists */
if (op == XFS_ATTRUPDATE_CREATE)
goto out_trans_cancel;
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE);
xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE);
break;
case -ENOATTR:
/* Can't remove what isn't there. */
......@@ -1038,7 +1054,7 @@ xfs_attr_set(
/* Pure replace fails if no existing attr to replace. */
if (op == XFS_ATTRUPDATE_REPLACE)
goto out_trans_cancel;
xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET);
xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET);
break;
default:
goto out_trans_cancel;
......@@ -1556,6 +1572,10 @@ xfs_attr_namecheck(
if (length >= MAXNAMELEN)
return false;
/* Parent pointers have their own validation. */
if (attr_flags & XFS_ATTR_PARENT)
return xfs_parent_namecheck(attr_flags, name, length);
/* There shouldn't be any nulls here */
return !memchr(name, 0, length);
}
......
......@@ -47,8 +47,9 @@ struct xfs_attrlist_cursor_kern {
/* void; state communicated via *context */
typedef void (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int);
typedef void (*put_listent_func_t)(struct xfs_attr_list_context *context,
int flags, unsigned char *name, int namelen, void *value,
int valuelen);
struct xfs_attr_list_context {
struct xfs_trans *tp;
......@@ -510,8 +511,8 @@ struct xfs_attr_intent {
struct xfs_da_args *xattri_da_args;
/*
* Shared buffer containing the attr name and value so that the logging
* code can share large memory buffers between log items.
* Shared buffer containing the attr name, new name, and value so that
* the logging code can share large memory buffers between log items.
*/
struct xfs_attri_log_nameval *xattri_nameval;
......@@ -628,6 +629,20 @@ xfs_attr_init_replace_state(struct xfs_da_args *args)
return xfs_attr_init_add_state(args);
}
xfs_dahash_t xfs_attr_hashname(const uint8_t *name, int namelen);
xfs_dahash_t xfs_attr_hashval(struct xfs_mount *mp, unsigned int attr_flags,
const uint8_t *name, int namelen, const void *value,
int valuelen);
/* Set the hash value for any extended attribute from any namespace. */
static inline void xfs_attr_sethash(struct xfs_da_args *args)
{
args->hashval = xfs_attr_hashval(args->dp->i_mount, args->attr_filter,
args->name, args->namelen,
args->value, args->valuelen);
}
extern struct kmem_cache *xfs_attr_intent_cache;
int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void);
......
......@@ -507,28 +507,57 @@ xfs_attr3_leaf_read(
* INCOMPLETE flag will not be set in attr->attr_filter, but rather
* XFS_DA_OP_RECOVERY will be set in args->op_flags.
*/
static inline unsigned int xfs_attr_match_mask(const struct xfs_da_args *args)
{
if (args->op_flags & XFS_DA_OP_RECOVERY)
return XFS_ATTR_NSP_ONDISK_MASK;
return XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE;
}
static inline bool
xfs_attr_parent_match(
const struct xfs_da_args *args,
const void *value,
unsigned int valuelen)
{
ASSERT(args->value != NULL);
/* Parent pointers do not use remote values */
if (!value)
return false;
/*
* The only value we support is a parent rec. However, we'll accept
* any valuelen so that offline repair can delete ATTR_PARENT values
* that are not parent pointers.
*/
if (valuelen != args->valuelen)
return false;
return memcmp(args->value, value, valuelen) == 0;
}
static bool
xfs_attr_match(
struct xfs_da_args *args,
uint8_t namelen,
unsigned char *name,
int flags)
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen)
{
unsigned int mask = xfs_attr_match_mask(args);
if (args->namelen != namelen)
return false;
if ((args->attr_filter & mask) != (attr_flags & mask))
return false;
if (memcmp(args->name, name, namelen) != 0)
return false;
/* Recovery ignores the INCOMPLETE flag. */
if ((args->op_flags & XFS_DA_OP_RECOVERY) &&
args->attr_filter == (flags & XFS_ATTR_NSP_ONDISK_MASK))
return true;
if (attr_flags & XFS_ATTR_PARENT)
return xfs_attr_parent_match(args, value, valuelen);
/* All remaining matches need to be filtered by INCOMPLETE state. */
if (args->attr_filter !=
(flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE)))
return false;
return true;
}
......@@ -538,6 +567,13 @@ xfs_attr_copy_value(
unsigned char *value,
int valuelen)
{
/*
* Parent pointer lookups require the caller to specify the name and
* value, so don't copy anything.
*/
if (args->attr_filter & XFS_ATTR_PARENT)
return 0;
/*
* No copy if all we have to do is get the length
*/
......@@ -746,8 +782,9 @@ xfs_attr_sf_findname(
for (sfe = xfs_attr_sf_firstentry(sf);
sfe < xfs_attr_sf_endptr(sf);
sfe = xfs_attr_sf_nextentry(sfe)) {
if (xfs_attr_match(args, sfe->namelen, sfe->nameval,
sfe->flags))
if (xfs_attr_match(args, sfe->flags, sfe->nameval,
sfe->namelen, &sfe->nameval[sfe->namelen],
sfe->valuelen))
return sfe;
}
......@@ -854,7 +891,8 @@ xfs_attr_sf_removename(
*/
if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) {
!(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE)) &&
!xfs_has_parent(mp)) {
xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
......@@ -863,7 +901,8 @@ xfs_attr_sf_removename(
ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) ||
(args->op_flags & XFS_DA_OP_ADDNAME) ||
!xfs_has_attr2(mp) ||
dp->i_df.if_format == XFS_DINODE_FMT_BTREE);
dp->i_df.if_format == XFS_DINODE_FMT_BTREE ||
xfs_has_parent(mp));
xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
......@@ -947,14 +986,13 @@ xfs_attr_shortform_to_leaf(
nargs.namelen = sfe->namelen;
nargs.value = &sfe->nameval[nargs.namelen];
nargs.valuelen = sfe->valuelen;
nargs.hashval = xfs_da_hashname(sfe->nameval,
sfe->namelen);
nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK;
if (!xfs_attr_check_namespace(sfe->flags)) {
xfs_da_mark_sick(args);
error = -EFSCORRUPTED;
goto out;
}
xfs_attr_sethash(&nargs);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == -ENOATTR);
error = xfs_attr3_leaf_add(bp, &nargs);
......@@ -2443,18 +2481,23 @@ xfs_attr3_leaf_lookup_int(
*/
if (entry->flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr3_leaf_name_local(leaf, probe);
if (!xfs_attr_match(args, name_loc->namelen,
name_loc->nameval, entry->flags))
if (!xfs_attr_match(args, entry->flags,
name_loc->nameval, name_loc->namelen,
&name_loc->nameval[name_loc->namelen],
be16_to_cpu(name_loc->valuelen)))
continue;
args->index = probe;
return -EEXIST;
} else {
unsigned int valuelen;
name_rmt = xfs_attr3_leaf_name_remote(leaf, probe);
if (!xfs_attr_match(args, name_rmt->namelen,
name_rmt->name, entry->flags))
valuelen = be32_to_cpu(name_rmt->valuelen);
if (!xfs_attr_match(args, entry->flags, name_rmt->name,
name_rmt->namelen, NULL, valuelen))
continue;
args->index = probe;
args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
args->rmtvaluelen = valuelen;
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
......
......@@ -16,6 +16,7 @@ typedef struct xfs_attr_sf_sort {
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
xfs_dahash_t hash; /* this entry's hash value */
unsigned char *name; /* name value, pointer into buffer */
void *value;
} xfs_attr_sf_sort_t;
#define XFS_ATTR_SF_ENTSIZE_MAX /* max space for name&value */ \
......
......@@ -55,7 +55,9 @@ enum xfs_dacmp {
typedef struct xfs_da_args {
struct xfs_da_geometry *geo; /* da block geometry */
const uint8_t *name; /* string (maybe not NULL terminated) */
const uint8_t *new_name; /* new attr name */
void *value; /* set of bytes (maybe contain NULLs) */
void *new_value; /* new xattr value (may contain NULLs) */
struct xfs_inode *dp; /* directory inode to manipulate */
struct xfs_trans *trans; /* current trans (changes over time) */
......@@ -63,10 +65,12 @@ typedef struct xfs_da_args {
xfs_ino_t owner; /* inode that owns the dir/attr data */
int valuelen; /* length of value */
int new_valuelen; /* length of new_value */
uint8_t filetype; /* filetype of inode for directories */
uint8_t op_flags; /* operation flags */
uint8_t attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */
short namelen; /* length of string (maybe no NULL) */
short new_namelen; /* length of new attr name */
xfs_dahash_t hashval; /* hash value of name */
xfs_extlen_t total; /* total blocks needed, for 1st bmap */
int whichfork; /* data or attribute fork */
......
......@@ -714,13 +714,20 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
#define XFS_ATTR_PARENT_BIT 3 /* parent pointer attrs */
#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
#define XFS_ATTR_LOCAL (1u << XFS_ATTR_LOCAL_BIT)
#define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT)
#define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT)
#define XFS_ATTR_PARENT (1u << XFS_ATTR_PARENT_BIT)
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | \
XFS_ATTR_SECURE | \
XFS_ATTR_PARENT)
/* Private attr namespaces not exposed to userspace */
#define XFS_ATTR_PRIVATE_NSP_MASK (XFS_ATTR_PARENT)
#define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \
XFS_ATTR_LOCAL | \
......@@ -729,7 +736,8 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_NAMESPACE_STR \
{ XFS_ATTR_LOCAL, "local" }, \
{ XFS_ATTR_ROOT, "root" }, \
{ XFS_ATTR_SECURE, "secure" }
{ XFS_ATTR_SECURE, "secure" }, \
{ XFS_ATTR_PARENT, "parent" }
/*
* Alignment for namelist and valuelist entries (since they are mixed
......@@ -885,4 +893,17 @@ static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
xfs_failaddr_t xfs_da3_blkinfo_verify(struct xfs_buf *bp,
struct xfs_da3_blkinfo *hdr3);
/*
* Parent pointer attribute format definition
*
* The xattr name contains the dirent name.
* The xattr value encodes the parent inode number and generation to ease
* opening parents by handle.
* The xattr hashval is xfs_dir2_namehash() ^ p_ino
*/
struct xfs_parent_rec {
__be64 p_ino;
__be32 p_gen;
} __packed;
#endif /* __XFS_DA_FORMAT_H__ */
......@@ -374,6 +374,7 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */
#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE | \
XFS_SB_FEAT_INCOMPAT_SPINODES | \
......@@ -381,7 +382,8 @@ xfs_sb_has_ro_compat_feature(
XFS_SB_FEAT_INCOMPAT_BIGTIME | \
XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \
XFS_SB_FEAT_INCOMPAT_NREXT64 | \
XFS_SB_FEAT_INCOMPAT_EXCHRANGE)
XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
XFS_SB_FEAT_INCOMPAT_PARENT)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
......
......@@ -240,6 +240,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_INOBTCNT (1 << 22) /* inobt btree counter */
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
/*
* Minimum and maximum sizes need for growth checks.
......@@ -633,7 +634,9 @@ typedef struct xfs_fsop_attrmulti_handlereq {
/*
* per machine unique filesystem identifier types.
*/
typedef struct { __u32 val[2]; } xfs_fsid_t; /* file system id type */
typedef struct xfs_fsid {
__u32 val[2]; /* file system id type */
} xfs_fsid_t;
typedef struct xfs_fid {
__u16 fid_len; /* length of remainder */
......@@ -814,6 +817,78 @@ struct xfs_exchange_range {
XFS_EXCHANGE_RANGE_DRY_RUN | \
XFS_EXCHANGE_RANGE_FILE1_WRITTEN)
/* Iterating parent pointers of files. */
/* target was the root directory */
#define XFS_GETPARENTS_OFLAG_ROOT (1U << 0)
/* Cursor is done iterating pptrs */
#define XFS_GETPARENTS_OFLAG_DONE (1U << 1)
#define XFS_GETPARENTS_OFLAGS_ALL (XFS_GETPARENTS_OFLAG_ROOT | \
XFS_GETPARENTS_OFLAG_DONE)
#define XFS_GETPARENTS_IFLAGS_ALL (0)
struct xfs_getparents_rec {
struct xfs_handle gpr_parent; /* Handle to parent */
__u32 gpr_reclen; /* Length of entire record */
__u32 gpr_reserved; /* zero */
char gpr_name[]; /* Null-terminated filename */
};
/* Iterate through this file's directory parent pointers */
struct xfs_getparents {
/*
* Structure to track progress in iterating the parent pointers.
* Must be initialized to zeroes before the first ioctl call, and
* not touched by callers after that.
*/
struct xfs_attrlist_cursor gp_cursor;
/* Input flags: XFS_GETPARENTS_IFLAG* */
__u16 gp_iflags;
/* Output flags: XFS_GETPARENTS_OFLAG* */
__u16 gp_oflags;
/* Size of the gp_buffer in bytes */
__u32 gp_bufsize;
/* Must be set to zero */
__u64 gp_reserved;
/* Pointer to a buffer in which to place xfs_getparents_rec */
__u64 gp_buffer;
};
static inline struct xfs_getparents_rec *
xfs_getparents_first_rec(struct xfs_getparents *gp)
{
return (struct xfs_getparents_rec *)(uintptr_t)gp->gp_buffer;
}
static inline struct xfs_getparents_rec *
xfs_getparents_next_rec(struct xfs_getparents *gp,
struct xfs_getparents_rec *gpr)
{
void *next = ((void *)gpr + gpr->gpr_reclen);
void *end = (void *)(uintptr_t)(gp->gp_buffer + gp->gp_bufsize);
if (next >= end)
return NULL;
return next;
}
/* Iterate through this file handle's directory parent pointers. */
struct xfs_getparents_by_handle {
/* Handle to file whose parents we want. */
struct xfs_handle gph_handle;
struct xfs_getparents gph_request;
};
/*
* ioctl commands that are used by Linux filesystems
*/
......@@ -849,6 +924,8 @@ struct xfs_exchange_range {
/* XFS_IOC_GETFSMAP ------ hoisted 59 */
#define XFS_IOC_SCRUB_METADATA _IOWR('X', 60, struct xfs_scrub_metadata)
#define XFS_IOC_AG_GEOMETRY _IOWR('X', 61, struct xfs_ag_geometry)
#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
/*
* ioctl commands that replace IRIX syssgi()'s
......
......@@ -119,7 +119,9 @@ struct xfs_unmount_log_format {
#define XLOG_REG_TYPE_ATTR_VALUE 30
#define XLOG_REG_TYPE_XMI_FORMAT 31
#define XLOG_REG_TYPE_XMD_FORMAT 32
#define XLOG_REG_TYPE_MAX 32
#define XLOG_REG_TYPE_ATTR_NEWNAME 33
#define XLOG_REG_TYPE_ATTR_NEWVALUE 34
#define XLOG_REG_TYPE_MAX 34
/*
* Flags to log operation header
......@@ -1026,6 +1028,9 @@ struct xfs_icreate_log {
#define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */
#define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */
#define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */
#define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */
#define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */
#define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */
#define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
/*
......@@ -1034,6 +1039,7 @@ struct xfs_icreate_log {
*/
#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \
XFS_ATTR_SECURE | \
XFS_ATTR_PARENT | \
XFS_ATTR_INCOMPLETE)
/*
......@@ -1043,11 +1049,22 @@ struct xfs_icreate_log {
struct xfs_attri_log_format {
uint16_t alfi_type; /* attri log item type */
uint16_t alfi_size; /* size of this item */
uint32_t __pad; /* pad to 64 bit aligned */
uint32_t alfi_igen; /* generation of alfi_ino for pptr ops */
uint64_t alfi_id; /* attri identifier */
uint64_t alfi_ino; /* the inode for this attr operation */
uint32_t alfi_op_flags; /* marks the op as a set or remove */
union {
uint32_t alfi_name_len; /* attr name length */
struct {
/*
* For PPTR_REPLACE, these are the lengths of the old
* and new attr names. The new and old values must
* have the same length.
*/
uint16_t alfi_old_name_len;
uint16_t alfi_new_name_len;
};
};
uint32_t alfi_value_len; /* attr value length */
uint32_t alfi_attr_filter;/* attr filter flags */
};
......
......@@ -16,6 +16,34 @@
#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
/*
* Shortly after enabling the large extents count feature in 2023, longstanding
* bugs were found in the code that computes the minimum log size. Luckily,
* the bugs resulted in over-estimates of that size, so there's no impact to
* existing users. However, we don't want to reduce the minimum log size
* because that can create the situation where a newer mkfs writes a new
* filesystem that an older kernel won't mount.
*
* Several years prior, we also discovered that the transaction reservations
* for rmap and reflink operations were unnecessarily large. That was fixed,
* but the minimum log size computation was left alone to avoid the
* compatibility problems noted above. Fix that too.
*
* Therefore, we only may correct the computation starting with filesystem
* features that didn't exist in 2023. In other words, only turn this on if
* the filesystem has parent pointers.
*
* This function can be called before the XFS_HAS_* flags have been set up,
* (e.g. mkfs) so we must check the ondisk superblock.
*/
static inline bool
xfs_want_minlogsize_fixes(
struct xfs_sb *sb)
{
return xfs_sb_is_v5(sb) &&
xfs_sb_has_incompat_feature(sb, XFS_SB_FEAT_INCOMPAT_PARENT);
}
/*
* Calculate the maximum length in bytes that would be required for a local
* attribute value as large attributes out of line are not logged.
......@@ -31,6 +59,15 @@ xfs_log_calc_max_attrsetm_res(
MAXNAMELEN - 1;
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
nblks += XFS_B_TO_FSB(mp, size);
/*
* If the feature set is new enough, correct a unit conversion error in
* the xattr transaction reservation code that resulted in oversized
* minimum log size computations.
*/
if (xfs_want_minlogsize_fixes(&mp->m_sb))
size = XFS_B_TO_FSB(mp, size);
nblks += XFS_NEXTENTADD_SPACE_RES(mp, size, XFS_ATTR_FORK);
return M_RES(mp)->tr_attrsetm.tr_logres +
......@@ -48,6 +85,15 @@ xfs_log_calc_trans_resv_for_minlogblocks(
{
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
/*
* If the feature set is new enough, drop the oversized minimum log
* size computation introduced by the original reflink code.
*/
if (xfs_want_minlogsize_fixes(&mp->m_sb)) {
xfs_trans_resv_calc(mp, resv);
return;
}
/*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
......
......@@ -119,6 +119,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1);
XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3);
XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12);
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
......@@ -155,6 +156,11 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
/* parent pointer ioctls */
XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40);
XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_by_handle, 64);
/*
* The v5 superblock format extended several v4 header structures with
* additional data. While new fields are only accessible on v5
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2022-2024 Oracle.
* All rights reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_da_format.h"
#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_sf.h"
#include "xfs_bmap.h"
#include "xfs_defer.h"
#include "xfs_log.h"
#include "xfs_xattr.h"
#include "xfs_parent.h"
#include "xfs_trans_space.h"
#include "xfs_attr_item.h"
#include "xfs_health.h"
struct kmem_cache *xfs_parent_args_cache;
/*
* Parent pointer attribute handling.
*
* Because the attribute name is a filename component, it will never be longer
* than 255 bytes and must not contain nulls or slashes. These are roughly the
* same constraints that apply to attribute names.
*
* The attribute value must always be a struct xfs_parent_rec. This means the
* attribute will never be in remote format because 12 bytes is nowhere near
* xfs_attr_leaf_entsize_local_max() (~75% of block size).
*
* Creating a new parent attribute will always create a new attribute - there
* should never, ever be an existing attribute in the tree for a new inode.
* ENOSPC behavior is problematic - creating the inode without the parent
* pointer is effectively a corruption, so we allow parent attribute creation
* to dip into the reserve block pool to avoid unexpected ENOSPC errors from
* occurring.
*/
/* Return true if parent pointer attr name is valid. */
bool
xfs_parent_namecheck(
unsigned int attr_flags,
const void *name,
size_t length)
{
/*
* Parent pointers always use logged operations, so there should never
* be incomplete xattrs.
*/
if (attr_flags & XFS_ATTR_INCOMPLETE)
return false;
return xfs_dir2_namecheck(name, length);
}
/* Return true if parent pointer attr value is valid. */
bool
xfs_parent_valuecheck(
struct xfs_mount *mp,
const void *value,
size_t valuelen)
{
const struct xfs_parent_rec *rec = value;
if (!xfs_has_parent(mp))
return false;
/* The xattr value must be a parent record. */
if (valuelen != sizeof(struct xfs_parent_rec))
return false;
/* The parent record must be local. */
if (value == NULL)
return false;
/* The parent inumber must be valid. */
if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino)))
return false;
return true;
}
/* Compute the attribute name hash for a parent pointer. */
xfs_dahash_t
xfs_parent_hashval(
struct xfs_mount *mp,
const uint8_t *name,
int namelen,
xfs_ino_t parent_ino)
{
struct xfs_name xname = {
.name = name,
.len = namelen,
};
/*
* Use the same dirent name hash as would be used on the directory, but
* mix in the parent inode number to avoid collisions on hardlinked
* files with identical names but different parents.
*/
return xfs_dir2_hashname(mp, &xname) ^
upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino);
}
/* Compute the attribute name hash from the xattr components. */
xfs_dahash_t
xfs_parent_hashattr(
struct xfs_mount *mp,
const uint8_t *name,
int namelen,
const void *value,
int valuelen)
{
const struct xfs_parent_rec *rec = value;
/* Requires a local attr value in xfs_parent_rec format */
if (valuelen != sizeof(struct xfs_parent_rec)) {
ASSERT(valuelen == sizeof(struct xfs_parent_rec));
return 0;
}
if (!value) {
ASSERT(value != NULL);
return 0;
}
return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino));
}
/*
* Initialize the parent pointer arguments structure. Caller must have zeroed
* the contents of @args. @tp is only required for updates.
*/
static void
xfs_parent_da_args_init(
struct xfs_da_args *args,
struct xfs_trans *tp,
struct xfs_parent_rec *rec,
struct xfs_inode *child,
xfs_ino_t owner,
const struct xfs_name *parent_name)
{
args->geo = child->i_mount->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
args->attr_filter = XFS_ATTR_PARENT;
args->op_flags = XFS_DA_OP_LOGGED | XFS_DA_OP_OKNOENT;
args->trans = tp;
args->dp = child;
args->owner = owner;
args->name = parent_name->name;
args->namelen = parent_name->len;
args->value = rec;
args->valuelen = sizeof(struct xfs_parent_rec);
xfs_attr_sethash(args);
}
/* Make sure the incore state is ready for a parent pointer query/update. */
static inline int
xfs_parent_iread_extents(
struct xfs_trans *tp,
struct xfs_inode *child)
{
/* Parent pointers require that the attr fork must exist. */
if (XFS_IS_CORRUPT(child->i_mount, !xfs_inode_has_attr_fork(child))) {
xfs_inode_mark_sick(child, XFS_SICK_INO_PARENT);
return -EFSCORRUPTED;
}
return xfs_iread_extents(tp, child, XFS_ATTR_FORK);
}
/* Add a parent pointer to reflect a dirent addition. */
int
xfs_parent_addname(
struct xfs_trans *tp,
struct xfs_parent_args *ppargs,
struct xfs_inode *dp,
const struct xfs_name *parent_name,
struct xfs_inode *child)
{
int error;
error = xfs_parent_iread_extents(tp, child);
if (error)
return error;
xfs_inode_to_parent_rec(&ppargs->rec, dp);
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
child->i_ino, parent_name);
xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_SET);
return 0;
}
/* Remove a parent pointer to reflect a dirent removal. */
int
xfs_parent_removename(
struct xfs_trans *tp,
struct xfs_parent_args *ppargs,
struct xfs_inode *dp,
const struct xfs_name *parent_name,
struct xfs_inode *child)
{
int error;
error = xfs_parent_iread_extents(tp, child);
if (error)
return error;
xfs_inode_to_parent_rec(&ppargs->rec, dp);
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
child->i_ino, parent_name);
xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REMOVE);
return 0;
}
/* Replace one parent pointer with another to reflect a rename. */
int
xfs_parent_replacename(
struct xfs_trans *tp,
struct xfs_parent_args *ppargs,
struct xfs_inode *old_dp,
const struct xfs_name *old_name,
struct xfs_inode *new_dp,
const struct xfs_name *new_name,
struct xfs_inode *child)
{
int error;
error = xfs_parent_iread_extents(tp, child);
if (error)
return error;
xfs_inode_to_parent_rec(&ppargs->rec, old_dp);
xfs_parent_da_args_init(&ppargs->args, tp, &ppargs->rec, child,
child->i_ino, old_name);
xfs_inode_to_parent_rec(&ppargs->new_rec, new_dp);
ppargs->args.new_name = new_name->name;
ppargs->args.new_namelen = new_name->len;
ppargs->args.new_value = &ppargs->new_rec;
ppargs->args.new_valuelen = sizeof(struct xfs_parent_rec);
xfs_attr_defer_add(&ppargs->args, XFS_ATTR_DEFER_REPLACE);
return 0;
}
/*
* Extract parent pointer information from any parent pointer xattr into
* @parent_ino/gen. The last two parameters can be NULL pointers.
*
* Returns 0 if this is not a parent pointer xattr at all; or -EFSCORRUPTED for
* garbage.
*/
int
xfs_parent_from_attr(
struct xfs_mount *mp,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
xfs_ino_t *parent_ino,
uint32_t *parent_gen)
{
const struct xfs_parent_rec *rec = value;
ASSERT(attr_flags & XFS_ATTR_PARENT);
if (!xfs_parent_namecheck(attr_flags, name, namelen))
return -EFSCORRUPTED;
if (!xfs_parent_valuecheck(mp, value, valuelen))
return -EFSCORRUPTED;
if (parent_ino)
*parent_ino = be64_to_cpu(rec->p_ino);
if (parent_gen)
*parent_gen = be32_to_cpu(rec->p_gen);
return 0;
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2022-2024 Oracle.
* All Rights Reserved.
*/
#ifndef __XFS_PARENT_H__
#define __XFS_PARENT_H__
/* Metadata validators */
bool xfs_parent_namecheck(unsigned int attr_flags, const void *name,
size_t length);
bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value,
size_t valuelen);
xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name,
int namelen, xfs_ino_t parent_ino);
xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name,
int namelen, const void *value, int valuelen);
/* Initializes a xfs_parent_rec to be stored as an attribute name. */
static inline void
xfs_parent_rec_init(
struct xfs_parent_rec *rec,
xfs_ino_t ino,
uint32_t gen)
{
rec->p_ino = cpu_to_be64(ino);
rec->p_gen = cpu_to_be32(gen);
}
/* Initializes a xfs_parent_rec to be stored as an attribute name. */
static inline void
xfs_inode_to_parent_rec(
struct xfs_parent_rec *rec,
const struct xfs_inode *dp)
{
xfs_parent_rec_init(rec, dp->i_ino, VFS_IC(dp)->i_generation);
}
extern struct kmem_cache *xfs_parent_args_cache;
/*
* Parent pointer information needed to pass around the deferred xattr update
* machinery.
*/
struct xfs_parent_args {
struct xfs_parent_rec rec;
struct xfs_parent_rec new_rec;
struct xfs_da_args args;
};
/*
* Start a parent pointer update by allocating the context object we need to
* perform a parent pointer update.
*/
static inline int
xfs_parent_start(
struct xfs_mount *mp,
struct xfs_parent_args **ppargsp)
{
if (!xfs_has_parent(mp)) {
*ppargsp = NULL;
return 0;
}
*ppargsp = kmem_cache_zalloc(xfs_parent_args_cache, GFP_KERNEL);
if (!*ppargsp)
return -ENOMEM;
return 0;
}
/* Finish a parent pointer update by freeing the context object. */
static inline void
xfs_parent_finish(
struct xfs_mount *mp,
struct xfs_parent_args *ppargs)
{
if (ppargs)
kmem_cache_free(xfs_parent_args_cache, ppargs);
}
int xfs_parent_addname(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
struct xfs_inode *dp, const struct xfs_name *parent_name,
struct xfs_inode *child);
int xfs_parent_removename(struct xfs_trans *tp, struct xfs_parent_args *ppargs,
struct xfs_inode *dp, const struct xfs_name *parent_name,
struct xfs_inode *child);
int xfs_parent_replacename(struct xfs_trans *tp,
struct xfs_parent_args *ppargs,
struct xfs_inode *old_dp, const struct xfs_name *old_name,
struct xfs_inode *new_dp, const struct xfs_name *new_name,
struct xfs_inode *child);
int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags,
const unsigned char *name, unsigned int namelen,
const void *value, unsigned int valuelen,
xfs_ino_t *parent_ino, uint32_t *parent_gen);
#endif /* __XFS_PARENT_H__ */
......@@ -178,6 +178,8 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_NREXT64;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)
features |= XFS_FEAT_EXCHANGE_RANGE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT)
features |= XFS_FEAT_PARENT;
return features;
}
......@@ -1254,6 +1256,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
if (xfs_has_inobtcounts(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT;
if (xfs_has_parent(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT;
if (xfs_has_sector(mp)) {
geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
geo->logsectsize = sbp->sb_logsectsize;
......
......@@ -20,6 +20,9 @@
#include "xfs_qm.h"
#include "xfs_trans_space.h"
#include "xfs_rtbitmap.h"
#include "xfs_attr_item.h"
#include "xfs_log.h"
#include "xfs_da_format.h"
#define _ALLOC true
#define _FREE false
......@@ -422,29 +425,110 @@ xfs_calc_itruncate_reservation_minlogsize(
return xfs_calc_itruncate_reservation(mp, true);
}
static inline unsigned int xfs_calc_pptr_link_overhead(void)
{
return sizeof(struct xfs_attri_log_format) +
xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
xlog_calc_iovec_len(MAXNAMELEN - 1);
}
static inline unsigned int xfs_calc_pptr_unlink_overhead(void)
{
return sizeof(struct xfs_attri_log_format) +
xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
xlog_calc_iovec_len(MAXNAMELEN - 1);
}
static inline unsigned int xfs_calc_pptr_replace_overhead(void)
{
return sizeof(struct xfs_attri_log_format) +
xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
xlog_calc_iovec_len(MAXNAMELEN - 1) +
xlog_calc_iovec_len(sizeof(struct xfs_parent_rec)) +
xlog_calc_iovec_len(MAXNAMELEN - 1);
}
/*
* In renaming a files we can modify:
* the five inodes involved: 5 * inode size
* the two directory btrees: 2 * (max depth + v2) * dir block size
* the two directory bmap btrees: 2 * max depth * block size
* And the bmap_finish transaction can free dir and bmap blocks (two sets
* of bmap blocks) giving:
* of bmap blocks) giving (t2):
* the agf for the ags in which the blocks live: 3 * sector size
* the agfl for the ags in which the blocks live: 3 * sector size
* the superblock for the free block count: sector size
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
* If parent pointers are enabled (t3), then each transaction in the chain
* must be capable of setting or removing the extended attribute
* containing the parent information. It must also be able to handle
* the three xattr intent items that track the progress of the parent
* pointer update.
*/
STATIC uint
xfs_calc_rename_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
max((xfs_calc_inode_res(mp, 5) +
unsigned int overhead = XFS_DQUOT_LOGRES(mp);
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
t1 = xfs_calc_inode_res(mp, 5) +
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
XFS_FSB_TO_B(mp, 1));
t2 = xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
XFS_FSB_TO_B(mp, 1))));
XFS_FSB_TO_B(mp, 1));
if (xfs_has_parent(mp)) {
unsigned int rename_overhead, exchange_overhead;
t3 = max(resp->tr_attrsetm.tr_logres,
resp->tr_attrrm.tr_logres);
/*
* For a standard rename, the three xattr intent log items
* are (1) replacing the pptr for the source file; (2)
* removing the pptr on the dest file; and (3) adding a
* pptr for the whiteout file in the src dir.
*
* For an RENAME_EXCHANGE, there are two xattr intent
* items to replace the pptr for both src and dest
* files. Link counts don't change and there is no
* whiteout.
*
* In the worst case we can end up relogging all log
* intent items to allow the log tail to move ahead, so
* they become overhead added to each transaction in a
* processing chain.
*/
rename_overhead = xfs_calc_pptr_replace_overhead() +
xfs_calc_pptr_unlink_overhead() +
xfs_calc_pptr_link_overhead();
exchange_overhead = 2 * xfs_calc_pptr_replace_overhead();
overhead += max(rename_overhead, exchange_overhead);
}
return overhead + max3(t1, t2, t3);
}
static inline unsigned int
xfs_rename_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
/* One for the rename, one more for freeing blocks */
unsigned int ret = XFS_RENAME_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to remove or add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += max(resp->tr_attrsetm.tr_logcount,
resp->tr_attrrm.tr_logcount);
return ret;
}
/*
......@@ -461,6 +545,23 @@ xfs_calc_iunlink_remove_reservation(
2 * M_IGEO(mp)->inode_cluster_size;
}
static inline unsigned int
xfs_link_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int ret = XFS_LINK_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/*
* For creating a link to an inode:
* the parent directory inode: inode size
......@@ -477,14 +578,23 @@ STATIC uint
xfs_calc_link_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_remove_reservation(mp) +
max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
unsigned int overhead = XFS_DQUOT_LOGRES(mp);
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
overhead += xfs_calc_iunlink_remove_reservation(mp);
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
t2 = xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1))));
XFS_FSB_TO_B(mp, 1));
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrsetm.tr_logres;
overhead += xfs_calc_pptr_link_overhead();
}
return overhead + max3(t1, t2, t3);
}
/*
......@@ -499,6 +609,23 @@ xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
M_IGEO(mp)->inode_cluster_size;
}
static inline unsigned int
xfs_remove_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int ret = XFS_REMOVE_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += resp->tr_attrrm.tr_logcount;
return ret;
}
/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
......@@ -515,14 +642,24 @@ STATIC uint
xfs_calc_remove_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_add_reservation(mp) +
max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
unsigned int overhead = XFS_DQUOT_LOGRES(mp);
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int t1, t2, t3 = 0;
overhead += xfs_calc_iunlink_add_reservation(mp);
t1 = xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1));
t2 = xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
XFS_FSB_TO_B(mp, 1));
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrrm.tr_logres;
overhead += xfs_calc_pptr_unlink_overhead();
}
return overhead + max3(t1, t2, t3);
}
/*
......@@ -571,12 +708,40 @@ xfs_calc_icreate_resv_alloc(
xfs_calc_finobt_res(mp);
}
static inline unsigned int
xfs_icreate_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int ret = XFS_CREATE_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
STATIC uint
xfs_calc_icreate_reservation(xfs_mount_t *mp)
xfs_calc_icreate_reservation(
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
max(xfs_calc_icreate_resv_alloc(mp),
xfs_calc_create_resv_modify(mp));
struct xfs_trans_resv *resp = M_RES(mp);
unsigned int overhead = XFS_DQUOT_LOGRES(mp);
unsigned int t1, t2, t3 = 0;
t1 = xfs_calc_icreate_resv_alloc(mp);
t2 = xfs_calc_create_resv_modify(mp);
if (xfs_has_parent(mp)) {
t3 = resp->tr_attrsetm.tr_logres;
overhead += xfs_calc_pptr_link_overhead();
}
return overhead + max3(t1, t2, t3);
}
STATIC uint
......@@ -589,6 +754,23 @@ xfs_calc_create_tmpfile_reservation(
return res + xfs_calc_iunlink_add_reservation(mp);
}
static inline unsigned int
xfs_mkdir_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int ret = XFS_MKDIR_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/*
* Making a new directory is the same as creating a new file.
*/
......@@ -599,6 +781,22 @@ xfs_calc_mkdir_reservation(
return xfs_calc_icreate_reservation(mp);
}
static inline unsigned int
xfs_symlink_log_count(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int ret = XFS_SYMLINK_LOG_COUNT;
/*
* Pre-reserve enough log reservation to handle the transaction
* rolling needed to add one parent pointer.
*/
if (xfs_has_parent(mp))
ret += resp->tr_attrsetm.tr_logcount;
return ret;
}
/*
* Making a new symplink is the same as creating a new file, but
......@@ -911,54 +1109,76 @@ xfs_calc_sb_reservation(
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize);
}
void
xfs_trans_resv_calc(
/*
* Namespace reservations.
*
* These get tricky when parent pointers are enabled as we have attribute
* modifications occurring from within these transactions. Rather than confuse
* each of these reservation calculations with the conditional attribute
* reservations, add them here in a clear and concise manner. This requires that
* the attribute reservations have already been calculated.
*
* Note that we only include the static attribute reservation here; the runtime
* reservation will have to be modified by the size of the attributes being
* added/removed/modified. See the comments on the attribute reservation
* calculations for more details.
*/
STATIC void
xfs_calc_namespace_reservations(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
int logcount_adj = 0;
/*
* The following transactions are logged in physical format and
* require a permanent reservation on space.
*/
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
ASSERT(resp->tr_attrsetm.tr_logres > 0);
resp->tr_rename.tr_logres = xfs_calc_rename_reservation(mp);
resp->tr_rename.tr_logcount = XFS_RENAME_LOG_COUNT;
resp->tr_rename.tr_logcount = xfs_rename_log_count(mp, resp);
resp->tr_rename.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_link.tr_logres = xfs_calc_link_reservation(mp);
resp->tr_link.tr_logcount = XFS_LINK_LOG_COUNT;
resp->tr_link.tr_logcount = xfs_link_log_count(mp, resp);
resp->tr_link.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_remove.tr_logres = xfs_calc_remove_reservation(mp);
resp->tr_remove.tr_logcount = XFS_REMOVE_LOG_COUNT;
resp->tr_remove.tr_logcount = xfs_remove_log_count(mp, resp);
resp->tr_remove.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_symlink.tr_logres = xfs_calc_symlink_reservation(mp);
resp->tr_symlink.tr_logcount = XFS_SYMLINK_LOG_COUNT;
resp->tr_symlink.tr_logcount = xfs_symlink_log_count(mp, resp);
resp->tr_symlink.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create.tr_logres = xfs_calc_icreate_reservation(mp);
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logcount = xfs_icreate_log_count(mp, resp);
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
resp->tr_mkdir.tr_logcount = xfs_mkdir_log_count(mp, resp);
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
}
void
xfs_trans_resv_calc(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
int logcount_adj = 0;
/*
* The following transactions are logged in physical format and
* require a permanent reservation on space.
*/
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_create_tmpfile.tr_logres =
xfs_calc_create_tmpfile_reservation(mp);
resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_ifree.tr_logres = xfs_calc_ifree_reservation(mp);
resp->tr_ifree.tr_logcount = XFS_INACTIVE_LOG_COUNT;
resp->tr_ifree.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
......@@ -988,6 +1208,8 @@ xfs_trans_resv_calc(
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
xfs_calc_namespace_reservations(mp, resp);
/*
* The following transactions are logged in logical format with
* a default log count.
......
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_da_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
/* Calculate the disk space required to add a parent pointer. */
unsigned int
xfs_parent_calc_space_res(
struct xfs_mount *mp,
unsigned int namelen)
{
/*
* Parent pointers are always the first attr in an attr tree, and never
* larger than a block
*/
return XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) +
XFS_NEXTENTADD_SPACE_RES(mp, namelen, XFS_ATTR_FORK);
}
unsigned int
xfs_create_space_res(
struct xfs_mount *mp,
unsigned int namelen)
{
unsigned int ret;
ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen);
if (xfs_has_parent(mp))
ret += xfs_parent_calc_space_res(mp, namelen);
return ret;
}
unsigned int
xfs_mkdir_space_res(
struct xfs_mount *mp,
unsigned int namelen)
{
return xfs_create_space_res(mp, namelen);
}
unsigned int
xfs_link_space_res(
struct xfs_mount *mp,
unsigned int namelen)
{
unsigned int ret;
ret = XFS_DIRENTER_SPACE_RES(mp, namelen);
if (xfs_has_parent(mp))
ret += xfs_parent_calc_space_res(mp, namelen);
return ret;
}
unsigned int
xfs_symlink_space_res(
struct xfs_mount *mp,
unsigned int namelen,
unsigned int fsblocks)
{
unsigned int ret;
ret = XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp, namelen) +
fsblocks;
if (xfs_has_parent(mp))
ret += xfs_parent_calc_space_res(mp, namelen);
return ret;
}
unsigned int
xfs_remove_space_res(
struct xfs_mount *mp,
unsigned int namelen)
{
unsigned int ret = XFS_DIRREMOVE_SPACE_RES(mp);
if (xfs_has_parent(mp))
ret += xfs_parent_calc_space_res(mp, namelen);
return ret;
}
unsigned int
xfs_rename_space_res(
struct xfs_mount *mp,
unsigned int src_namelen,
bool target_exists,
unsigned int target_namelen,
bool has_whiteout)
{
unsigned int ret;
ret = XFS_DIRREMOVE_SPACE_RES(mp) +
XFS_DIRENTER_SPACE_RES(mp, target_namelen);
if (xfs_has_parent(mp)) {
if (has_whiteout)
ret += xfs_parent_calc_space_res(mp, src_namelen);
ret += 2 * xfs_parent_calc_space_res(mp, target_namelen);
}
if (target_exists)
ret += xfs_parent_calc_space_res(mp, target_namelen);
return ret;
}
......@@ -80,31 +80,32 @@
/* This macro is not used - see inline code in xfs_attr_set */
#define XFS_ATTRSET_SPACE_RES(mp, v) \
(XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK) + XFS_B_TO_FSB(mp, v))
#define XFS_CREATE_SPACE_RES(mp,nl) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_DIOSTRAT_SPACE_RES(mp, v) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + (v))
#define XFS_GROWFS_SPACE_RES(mp) \
(2 * (mp)->m_alloc_maxlevels)
#define XFS_GROWFSRT_SPACE_RES(mp,b) \
((b) + XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK))
#define XFS_LINK_SPACE_RES(mp,nl) \
XFS_DIRENTER_SPACE_RES(mp,nl)
#define XFS_MKDIR_SPACE_RES(mp,nl) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_QM_DQALLOC_SPACE_RES(mp) \
(XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK) + \
XFS_DQUOT_CLUSTER_SIZE_FSB)
#define XFS_QM_QINOCREATE_SPACE_RES(mp) \
XFS_IALLOC_SPACE_RES(mp)
#define XFS_REMOVE_SPACE_RES(mp) \
XFS_DIRREMOVE_SPACE_RES(mp)
#define XFS_RENAME_SPACE_RES(mp,nl) \
(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
(xfs_has_finobt(mp) ? M_IGEO(mp)->inobt_maxlevels : 0)
unsigned int xfs_parent_calc_space_res(struct xfs_mount *mp,
unsigned int namelen);
unsigned int xfs_create_space_res(struct xfs_mount *mp, unsigned int namelen);
unsigned int xfs_mkdir_space_res(struct xfs_mount *mp, unsigned int namelen);
unsigned int xfs_link_space_res(struct xfs_mount *mp, unsigned int namelen);
unsigned int xfs_symlink_space_res(struct xfs_mount *mp, unsigned int namelen,
unsigned int fsblocks);
unsigned int xfs_remove_space_res(struct xfs_mount *mp, unsigned int namelen);
unsigned int xfs_rename_space_res(struct xfs_mount *mp,
unsigned int src_namelen, bool target_exists,
unsigned int target_namelen, bool has_whiteout);
#endif /* __XFS_TRANS_SPACE_H__ */
......@@ -179,7 +179,6 @@ xchk_xattr_actor(
.dp = ip,
.name = name,
.namelen = namelen,
.hashval = xfs_da_hashname(name, namelen),
.trans = sc->tp,
.valuelen = valuelen,
.owner = ip->i_ino,
......@@ -230,6 +229,7 @@ xchk_xattr_actor(
args.value = ab->value;
xfs_attr_sethash(&args);
error = xfs_attr_get_ilocked(&args);
/* ENODATA means the hash lookup failed and the attr is bad */
if (error == -ENODATA)
......@@ -525,7 +525,10 @@ xchk_xattr_rec(
xchk_da_set_corrupt(ds, level);
goto out;
}
calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen);
calc_hash = xfs_attr_hashval(mp, ent->flags, lentry->nameval,
lentry->namelen,
lentry->nameval + lentry->namelen,
be16_to_cpu(lentry->valuelen));
} else {
rentry = (struct xfs_attr_leaf_name_remote *)
(((char *)bp->b_addr) + nameidx);
......@@ -533,7 +536,13 @@ xchk_xattr_rec(
xchk_da_set_corrupt(ds, level);
goto out;
}
calc_hash = xfs_da_hashname(rentry->name, rentry->namelen);
if (ent->flags & XFS_ATTR_PARENT) {
xchk_da_set_corrupt(ds, level);
goto out;
}
calc_hash = xfs_attr_hashval(mp, ent->flags, rentry->name,
rentry->namelen, NULL,
be32_to_cpu(rentry->valuelen));
}
if (calc_hash != hash)
xchk_da_set_corrupt(ds, level);
......
......@@ -704,7 +704,7 @@ xrep_dir_replay_update(
uint resblks;
int error;
resblks = XFS_LINK_SPACE_RES(mp, xname->len);
resblks = xfs_link_space_res(mp, xname->len);
error = xchk_trans_alloc(rd->sc, resblks);
if (error)
return error;
......
......@@ -326,9 +326,10 @@ xrep_adoption_trans_alloc(
/* Compute the worst case space reservation that we need. */
adopt->sc = sc;
adopt->orphanage_blkres = XFS_LINK_SPACE_RES(mp, MAXNAMELEN);
adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
if (S_ISDIR(VFS_I(sc->ip)->i_mode))
child_blkres = XFS_RENAME_SPACE_RES(mp, xfs_name_dotdot.len);
child_blkres = xfs_rename_space_res(mp, 0, false,
xfs_name_dotdot.len, false);
adopt->child_blkres = child_blkres;
/*
......
......@@ -171,7 +171,8 @@ xrep_parent_reset_dotdot(
* Reserve more space just in case we have to expand the dir. We're
* allowed to exceed quota to repair inconsistent metadata.
*/
spaceres = XFS_RENAME_SPACE_RES(sc->mp, xfs_name_dotdot.len);
spaceres = xfs_rename_space_res(sc->mp, 0, false, xfs_name_dotdot.len,
false);
error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, spaceres, 0,
true);
if (error)
......
......@@ -421,7 +421,7 @@ xrep_symlink_rebuild(
* unlikely.
*/
fs_blocks = xfs_symlink_blocks(sc->mp, target_len);
resblks = XFS_SYMLINK_SPACE_RES(sc->mp, target_len, fs_blocks);
resblks = xfs_symlink_space_res(sc->mp, target_len, fs_blocks);
error = xfs_trans_reserve_quota_nblks(sc->tp, sc->tempip, resblks, 0,
true);
if (error)
......
......@@ -71,7 +71,7 @@ xrep_tempfile_create(
return error;
if (is_dir) {
resblks = XFS_MKDIR_SPACE_RES(mp, 0);
resblks = xfs_mkdir_space_res(mp, 0);
tres = &M_RES(mp)->tr_mkdir;
} else {
resblks = XFS_IALLOC_SPACE_RES(mp);
......
......@@ -27,6 +27,7 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
#include "xfs_parent.h"
struct kmem_cache *xfs_attri_cache;
struct kmem_cache *xfs_attrd_cache;
......@@ -73,8 +74,12 @@ static inline struct xfs_attri_log_nameval *
xfs_attri_log_nameval_alloc(
const void *name,
unsigned int name_len,
const void *new_name,
unsigned int new_name_len,
const void *value,
unsigned int value_len)
unsigned int value_len,
const void *new_value,
unsigned int new_value_len)
{
struct xfs_attri_log_nameval *nv;
......@@ -83,15 +88,26 @@ xfs_attri_log_nameval_alloc(
* this. But kvmalloc() utterly sucks, so we use our own version.
*/
nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) +
name_len + value_len);
name_len + new_name_len + value_len +
new_value_len);
nv->name.i_addr = nv + 1;
nv->name.i_len = name_len;
nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME;
memcpy(nv->name.i_addr, name, name_len);
if (new_name_len) {
nv->new_name.i_addr = nv->name.i_addr + name_len;
nv->new_name.i_len = new_name_len;
memcpy(nv->new_name.i_addr, new_name, new_name_len);
} else {
nv->new_name.i_addr = NULL;
nv->new_name.i_len = 0;
}
nv->new_name.i_type = XLOG_REG_TYPE_ATTR_NEWNAME;
if (value_len) {
nv->value.i_addr = nv->name.i_addr + name_len;
nv->value.i_addr = nv->name.i_addr + name_len + new_name_len;
nv->value.i_len = value_len;
memcpy(nv->value.i_addr, value, value_len);
} else {
......@@ -100,6 +116,17 @@ xfs_attri_log_nameval_alloc(
}
nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE;
if (new_value_len) {
nv->new_value.i_addr = nv->name.i_addr + name_len +
new_name_len + value_len;
nv->new_value.i_len = new_value_len;
memcpy(nv->new_value.i_addr, new_value, new_value_len);
} else {
nv->new_value.i_addr = NULL;
nv->new_value.i_len = 0;
}
nv->new_value.i_type = XLOG_REG_TYPE_ATTR_NEWVALUE;
refcount_set(&nv->refcount, 1);
return nv;
}
......@@ -145,11 +172,20 @@ xfs_attri_item_size(
*nbytes += sizeof(struct xfs_attri_log_format) +
xlog_calc_iovec_len(nv->name.i_len);
if (!nv->value.i_len)
return;
if (nv->new_name.i_len) {
*nvecs += 1;
*nbytes += xlog_calc_iovec_len(nv->new_name.i_len);
}
if (nv->value.i_len) {
*nvecs += 1;
*nbytes += xlog_calc_iovec_len(nv->value.i_len);
}
if (nv->new_value.i_len) {
*nvecs += 1;
*nbytes += xlog_calc_iovec_len(nv->new_value.i_len);
}
}
/*
......@@ -179,15 +215,28 @@ xfs_attri_item_format(
ASSERT(nv->name.i_len > 0);
attrip->attri_format.alfi_size++;
if (nv->new_name.i_len > 0)
attrip->attri_format.alfi_size++;
if (nv->value.i_len > 0)
attrip->attri_format.alfi_size++;
if (nv->new_value.i_len > 0)
attrip->attri_format.alfi_size++;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
&attrip->attri_format,
sizeof(struct xfs_attri_log_format));
xlog_copy_from_iovec(lv, &vecp, &nv->name);
if (nv->new_name.i_len > 0)
xlog_copy_from_iovec(lv, &vecp, &nv->new_name);
if (nv->value.i_len > 0)
xlog_copy_from_iovec(lv, &vecp, &nv->value);
if (nv->new_value.i_len > 0)
xlog_copy_from_iovec(lv, &vecp, &nv->new_value);
}
/*
......@@ -322,6 +371,8 @@ xfs_attr_log_item(
const struct xfs_attr_intent *attr)
{
struct xfs_attri_log_format *attrp;
struct xfs_attri_log_nameval *nv = attr->xattri_nameval;
struct xfs_da_args *args = attr->xattri_da_args;
/*
* At this point the xfs_attr_intent has been constructed, and we've
......@@ -329,13 +380,30 @@ xfs_attr_log_item(
* structure with fields from this xfs_attr_intent
*/
attrp = &attrip->attri_format;
attrp->alfi_ino = attr->xattri_da_args->dp->i_ino;
attrp->alfi_ino = args->dp->i_ino;
ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK));
attrp->alfi_op_flags = attr->xattri_op_flags;
attrp->alfi_value_len = attr->xattri_nameval->value.i_len;
attrp->alfi_name_len = attr->xattri_nameval->name.i_len;
ASSERT(!(attr->xattri_da_args->attr_filter & ~XFS_ATTRI_FILTER_MASK));
attrp->alfi_attr_filter = attr->xattri_da_args->attr_filter;
attrp->alfi_value_len = nv->value.i_len;
switch (xfs_attr_log_item_op(attrp)) {
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
ASSERT(nv->value.i_len == nv->new_value.i_len);
attrp->alfi_igen = VFS_I(args->dp)->i_generation;
attrp->alfi_old_name_len = nv->name.i_len;
attrp->alfi_new_name_len = nv->new_name.i_len;
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
attrp->alfi_igen = VFS_I(args->dp)->i_generation;
fallthrough;
default:
attrp->alfi_name_len = nv->name.i_len;
break;
}
ASSERT(!(args->attr_filter & ~XFS_ATTRI_FILTER_MASK));
attrp->alfi_attr_filter = args->attr_filter;
}
/* Get an ATTRI. */
......@@ -374,8 +442,11 @@ xfs_attr_create_intent(
* Transfer our reference to the name/value buffer to the
* deferred work state structure.
*/
attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name,
args->namelen, args->value, args->valuelen);
attr->xattri_nameval = xfs_attri_log_nameval_alloc(
args->name, args->namelen,
args->new_name, args->new_namelen,
args->value, args->valuelen,
args->new_value, args->new_valuelen);
}
attrip = xfs_attri_init(mp, attr->xattri_nameval);
......@@ -480,12 +551,6 @@ xfs_attri_validate(
{
unsigned int op = xfs_attr_log_item_op(attrp);
if (!xfs_is_using_logged_xattrs(mp))
return false;
if (attrp->__pad != 0)
return false;
if (attrp->alfi_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)
return false;
......@@ -497,19 +562,46 @@ xfs_attri_validate(
return false;
switch (op) {
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
if (!xfs_has_parent(mp))
return false;
if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec))
return false;
if (!xfs_attri_validate_namelen(attrp->alfi_name_len))
return false;
if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT))
return false;
break;
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
if (!xfs_is_using_logged_xattrs(mp))
return false;
if (attrp->alfi_value_len > XATTR_SIZE_MAX)
return false;
if (!xfs_attri_validate_namelen(attrp->alfi_name_len))
return false;
break;
case XFS_ATTRI_OP_FLAGS_REMOVE:
if (!xfs_is_using_logged_xattrs(mp))
return false;
if (attrp->alfi_value_len != 0)
return false;
if (!xfs_attri_validate_namelen(attrp->alfi_name_len))
return false;
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
if (!xfs_has_parent(mp))
return false;
if (!xfs_attri_validate_namelen(attrp->alfi_old_name_len))
return false;
if (!xfs_attri_validate_namelen(attrp->alfi_new_name_len))
return false;
if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec))
return false;
if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT))
return false;
break;
default:
return false;
}
......@@ -550,9 +642,27 @@ xfs_attri_recover_work(
int local;
int error;
/*
* Parent pointer attr items record the generation but regular logged
* xattrs do not; select the right iget function.
*/
switch (xfs_attr_log_item_op(attrp)) {
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
error = xlog_recover_iget_handle(mp, attrp->alfi_ino,
attrp->alfi_igen, &ip);
break;
default:
error = xlog_recover_iget(mp, attrp->alfi_ino, &ip);
if (error)
return ERR_PTR(error);
break;
}
if (error) {
xfs_irele(ip);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp,
sizeof(*attrp));
return ERR_PTR(-EFSCORRUPTED);
}
if (xfs_inode_has_attr_fork(ip)) {
error = xfs_attri_iread_extents(ip);
......@@ -582,15 +692,21 @@ xfs_attri_recover_work(
args->whichfork = XFS_ATTR_FORK;
args->name = nv->name.i_addr;
args->namelen = nv->name.i_len;
args->hashval = xfs_da_hashname(args->name, args->namelen);
args->new_name = nv->new_name.i_addr;
args->new_namelen = nv->new_name.i_len;
args->value = nv->value.i_addr;
args->valuelen = nv->value.i_len;
args->new_value = nv->new_value.i_addr;
args->new_valuelen = nv->new_value.i_len;
args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK;
args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
XFS_DA_OP_LOGGED;
args->owner = args->dp->i_ino;
xfs_attr_sethash(args);
switch (xfs_attr_intent_op(attr)) {
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
args->total = xfs_attr_calc_size(args, &local);
......@@ -599,6 +715,7 @@ xfs_attri_recover_work(
else
attr->xattri_dela_state = xfs_attr_init_add_state(args);
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_REMOVE:
attr->xattri_dela_state = xfs_attr_init_remove_state(args);
break;
......@@ -697,9 +814,20 @@ xfs_attr_relog_intent(
new_attrp = &new_attrip->attri_format;
new_attrp->alfi_ino = old_attrp->alfi_ino;
new_attrp->alfi_igen = old_attrp->alfi_igen;
new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
new_attrp->alfi_value_len = old_attrp->alfi_value_len;
switch (xfs_attr_log_item_op(old_attrp)) {
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
new_attrp->alfi_new_name_len = old_attrp->alfi_new_name_len;
new_attrp->alfi_old_name_len = old_attrp->alfi_old_name_len;
break;
default:
new_attrp->alfi_name_len = old_attrp->alfi_name_len;
break;
}
new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter;
return &new_attrip->attri_item;
......@@ -727,6 +855,75 @@ xfs_attr_create_done(
return &attrdp->attrd_item;
}
void
xfs_attr_defer_add(
struct xfs_da_args *args,
enum xfs_attr_defer_op op)
{
struct xfs_attr_intent *new;
unsigned int log_op = 0;
bool is_pptr = args->attr_filter & XFS_ATTR_PARENT;
if (is_pptr) {
ASSERT(xfs_has_parent(args->dp->i_mount));
ASSERT((args->attr_filter & ~XFS_ATTR_PARENT) == 0);
ASSERT(args->op_flags & XFS_DA_OP_LOGGED);
ASSERT(args->valuelen == sizeof(struct xfs_parent_rec));
}
new = kmem_cache_zalloc(xfs_attr_intent_cache,
GFP_NOFS | __GFP_NOFAIL);
new->xattri_da_args = args;
/* Compute log operation from the higher level op and namespace. */
switch (op) {
case XFS_ATTR_DEFER_SET:
if (is_pptr)
log_op = XFS_ATTRI_OP_FLAGS_PPTR_SET;
else
log_op = XFS_ATTRI_OP_FLAGS_SET;
break;
case XFS_ATTR_DEFER_REPLACE:
if (is_pptr)
log_op = XFS_ATTRI_OP_FLAGS_PPTR_REPLACE;
else
log_op = XFS_ATTRI_OP_FLAGS_REPLACE;
break;
case XFS_ATTR_DEFER_REMOVE:
if (is_pptr)
log_op = XFS_ATTRI_OP_FLAGS_PPTR_REMOVE;
else
log_op = XFS_ATTRI_OP_FLAGS_REMOVE;
break;
default:
ASSERT(0);
break;
}
new->xattri_op_flags = log_op;
/* Set up initial attr operation state. */
switch (log_op) {
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
case XFS_ATTRI_OP_FLAGS_SET:
new->xattri_dela_state = xfs_attr_init_add_state(args);
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
ASSERT(args->new_valuelen == args->valuelen);
new->xattri_dela_state = xfs_attr_init_replace_state(args);
break;
case XFS_ATTRI_OP_FLAGS_REPLACE:
new->xattri_dela_state = xfs_attr_init_replace_state(args);
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_REMOVE:
new->xattri_dela_state = xfs_attr_init_remove_state(args);
break;
}
xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type);
trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
}
const struct xfs_defer_op_type xfs_attr_defer_type = {
.name = "attr",
.max_items = 1,
......@@ -777,6 +974,15 @@ xfs_attri_validate_value_iovec(
return NULL;
}
if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) &&
!xfs_parent_valuecheck(mp, iovec->i_addr, value_len)) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, sizeof(*attri_formatp));
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
iovec->i_addr, iovec->i_len);
return NULL;
}
return iovec->i_addr;
}
......@@ -793,9 +999,13 @@ xlog_recover_attri_commit_pass2(
struct xfs_attri_log_nameval *nv;
const void *attr_name;
const void *attr_value = NULL;
const void *attr_new_name = NULL;
const void *attr_new_value = NULL;
size_t len;
unsigned int name_len = 0;
unsigned int value_len = 0;
unsigned int new_name_len = 0;
unsigned int new_value_len = 0;
unsigned int op, i = 0;
/* Validate xfs_attri_log_format before the large memory allocation */
......@@ -816,6 +1026,17 @@ xlog_recover_attri_commit_pass2(
/* Check the number of log iovecs makes sense for the op code. */
op = xfs_attr_log_item_op(attri_formatp);
switch (op) {
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
/* Log item, attr name, attr value */
if (item->ri_total != 3) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
}
name_len = attri_formatp->alfi_name_len;
value_len = attri_formatp->alfi_value_len;
break;
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
/* Log item, attr name, attr value */
......@@ -836,6 +1057,20 @@ xlog_recover_attri_commit_pass2(
}
name_len = attri_formatp->alfi_name_len;
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
/*
* Log item, attr name, new attr name, attr value, new attr
* value
*/
if (item->ri_total != 5) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
}
name_len = attri_formatp->alfi_old_name_len;
new_name_len = attri_formatp->alfi_new_name_len;
new_value_len = value_len = attri_formatp->alfi_value_len;
break;
default:
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
......@@ -850,6 +1085,16 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
i++;
/* Validate the new attr name */
if (new_name_len > 0) {
attr_new_name = xfs_attri_validate_name_iovec(mp,
attri_formatp, &item->ri_buf[i],
new_name_len);
if (!attr_new_name)
return -EFSCORRUPTED;
i++;
}
/* Validate the attr value, if present */
if (value_len != 0) {
attr_value = xfs_attri_validate_value_iovec(mp, attri_formatp,
......@@ -859,6 +1104,16 @@ xlog_recover_attri_commit_pass2(
i++;
}
/* Validate the new attr value, if present */
if (new_value_len != 0) {
attr_new_value = xfs_attri_validate_value_iovec(mp,
attri_formatp, &item->ri_buf[i],
new_value_len);
if (!attr_new_value)
return -EFSCORRUPTED;
i++;
}
/*
* Make sure we got the correct number of buffers for the operation
* that we just loaded.
......@@ -878,12 +1133,17 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
}
fallthrough;
case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
case XFS_ATTRI_OP_FLAGS_PPTR_SET:
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
/*
* Regular xattr set/remove/replace operations require a name
* and do not take a newname. Values are optional for set and
* replace.
*
* Name-value set/remove operations must have a name, do not
* take a newname, and can take a value.
*/
if (attr_name == NULL || name_len == 0) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
......@@ -891,6 +1151,23 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
}
break;
case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
/*
* Name-value replace operations require the caller to
* specify the old and new names and values explicitly.
* Values are optional.
*/
if (attr_name == NULL || name_len == 0) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
}
if (attr_new_name == NULL || new_name_len == 0) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
}
break;
}
/*
......@@ -899,7 +1176,9 @@ xlog_recover_attri_commit_pass2(
* reference.
*/
nv = xfs_attri_log_nameval_alloc(attr_name, name_len,
attr_value, value_len);
attr_new_name, new_name_len,
attr_value, value_len,
attr_new_value, new_value_len);
attrip = xfs_attri_init(mp, nv);
memcpy(&attrip->attri_format, attri_formatp, len);
......
......@@ -13,7 +13,9 @@ struct kmem_zone;
struct xfs_attri_log_nameval {
struct xfs_log_iovec name;
struct xfs_log_iovec new_name; /* PPTR_REPLACE only */
struct xfs_log_iovec value;
struct xfs_log_iovec new_value; /* PPTR_REPLACE only */
refcount_t refcount;
/* name and value follow the end of this struct */
......@@ -51,4 +53,12 @@ struct xfs_attrd_log_item {
extern struct kmem_cache *xfs_attri_cache;
extern struct kmem_cache *xfs_attrd_cache;
enum xfs_attr_defer_op {
XFS_ATTR_DEFER_SET,
XFS_ATTR_DEFER_REMOVE,
XFS_ATTR_DEFER_REPLACE,
};
void xfs_attr_defer_add(struct xfs_da_args *args, enum xfs_attr_defer_op op);
#endif /* __XFS_ATTR_ITEM_H__ */
......@@ -92,6 +92,7 @@ xfs_attr_shortform_list(
sfe->flags,
sfe->nameval,
(int)sfe->namelen,
&sfe->nameval[sfe->namelen],
(int)sfe->valuelen);
/*
* Either search callback finished early or
......@@ -135,12 +136,16 @@ xfs_attr_shortform_list(
}
sbp->entno = i;
sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen);
sbp->name = sfe->nameval;
sbp->namelen = sfe->namelen;
/* These are bytes, and both on-disk, don't endian-flip */
sbp->value = &sfe->nameval[sfe->namelen],
sbp->valuelen = sfe->valuelen;
sbp->flags = sfe->flags;
sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags,
sfe->nameval, sfe->namelen,
sfe->nameval + sfe->namelen,
sfe->valuelen);
sfe = xfs_attr_sf_nextentry(sfe);
sbp++;
nsbuf++;
......@@ -189,6 +194,7 @@ xfs_attr_shortform_list(
sbp->flags,
sbp->name,
sbp->namelen,
sbp->value,
sbp->valuelen);
if (context->seen_enough)
break;
......@@ -476,6 +482,7 @@ xfs_attr3_leaf_list_int(
*/
for (; i < ichdr.count; entry++, i++) {
char *name;
void *value;
int namelen, valuelen;
if (be32_to_cpu(entry->hashval) != cursor->hashval) {
......@@ -493,6 +500,7 @@ xfs_attr3_leaf_list_int(
name_loc = xfs_attr3_leaf_name_local(leaf, i);
name = name_loc->nameval;
namelen = name_loc->namelen;
value = &name_loc->nameval[name_loc->namelen];
valuelen = be16_to_cpu(name_loc->valuelen);
} else {
xfs_attr_leaf_name_remote_t *name_rmt;
......@@ -500,6 +508,7 @@ xfs_attr3_leaf_list_int(
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
name = name_rmt->name;
namelen = name_rmt->namelen;
value = NULL;
valuelen = be32_to_cpu(name_rmt->valuelen);
}
......@@ -510,7 +519,7 @@ xfs_attr3_leaf_list_int(
return -EFSCORRUPTED;
}
context->put_listent(context, entry->flags,
name, namelen, valuelen);
name, namelen, value, valuelen);
if (context->seen_enough)
break;
cursor->offset++;
......
......@@ -102,7 +102,7 @@ xfs_fs_encode_fh(
return fileid_type;
}
STATIC struct inode *
struct inode *
xfs_nfs_get_inode(
struct super_block *sb,
u64 ino,
......
......@@ -57,4 +57,6 @@ struct xfs_fid64 {
/* This flag goes on the wire. Don't play with it. */
#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */
struct inode *xfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 gen);
#endif /* __XFS_EXPORT_H__ */
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* Copyright (c) 2022-2024 Oracle.
* All rights reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_ioctl.h"
#include "xfs_parent.h"
#include "xfs_da_btree.h"
#include "xfs_handle.h"
#include "xfs_health.h"
#include "xfs_icache.h"
#include "xfs_export.h"
#include "xfs_xattr.h"
#include "xfs_acl.h"
#include <linux/namei.h>
static inline size_t
xfs_filehandle_fid_len(void)
{
struct xfs_handle *handle = NULL;
return sizeof(struct xfs_fid) - sizeof(handle->ha_fid.fid_len);
}
static inline size_t
xfs_filehandle_init(
struct xfs_mount *mp,
xfs_ino_t ino,
uint32_t gen,
struct xfs_handle *handle)
{
memcpy(&handle->ha_fsid, mp->m_fixedfsid, sizeof(struct xfs_fsid));
handle->ha_fid.fid_len = xfs_filehandle_fid_len();
handle->ha_fid.fid_pad = 0;
handle->ha_fid.fid_gen = gen;
handle->ha_fid.fid_ino = ino;
return sizeof(struct xfs_handle);
}
static inline size_t
xfs_fshandle_init(
struct xfs_mount *mp,
struct xfs_handle *handle)
{
memcpy(&handle->ha_fsid, mp->m_fixedfsid, sizeof(struct xfs_fsid));
memset(&handle->ha_fid, 0, sizeof(handle->ha_fid));
return sizeof(struct xfs_fsid);
}
/*
* xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
* a file or fs handle.
*
* XFS_IOC_PATH_TO_FSHANDLE
* returns fs handle for a mount point or path within that mount point
* XFS_IOC_FD_TO_HANDLE
* returns full handle for a FD opened in user space
* XFS_IOC_PATH_TO_HANDLE
* returns full handle for a path
*/
int
xfs_find_handle(
unsigned int cmd,
xfs_fsop_handlereq_t *hreq)
{
int hsize;
xfs_handle_t handle;
struct inode *inode;
struct fd f = {NULL};
struct path path;
int error;
struct xfs_inode *ip;
if (cmd == XFS_IOC_FD_TO_HANDLE) {
f = fdget(hreq->fd);
if (!f.file)
return -EBADF;
inode = file_inode(f.file);
} else {
error = user_path_at(AT_FDCWD, hreq->path, 0, &path);
if (error)
return error;
inode = d_inode(path.dentry);
}
ip = XFS_I(inode);
/*
* We can only generate handles for inodes residing on a XFS filesystem,
* and only for regular files, directories or symbolic links.
*/
error = -EINVAL;
if (inode->i_sb->s_magic != XFS_SB_MAGIC)
goto out_put;
error = -EBADF;
if (!S_ISREG(inode->i_mode) &&
!S_ISDIR(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
goto out_put;
memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
if (cmd == XFS_IOC_PATH_TO_FSHANDLE)
hsize = xfs_fshandle_init(ip->i_mount, &handle);
else
hsize = xfs_filehandle_init(ip->i_mount, ip->i_ino,
inode->i_generation, &handle);
error = -EFAULT;
if (copy_to_user(hreq->ohandle, &handle, hsize) ||
copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
goto out_put;
error = 0;
out_put:
if (cmd == XFS_IOC_FD_TO_HANDLE)
fdput(f);
else
path_put(&path);
return error;
}
/*
* No need to do permission checks on the various pathname components
* as the handle operations are privileged.
*/
STATIC int
xfs_handle_acceptable(
void *context,
struct dentry *dentry)
{
return 1;
}
/* Convert handle already copied to kernel space into a dentry. */
static struct dentry *
xfs_khandle_to_dentry(
struct file *file,
struct xfs_handle *handle)
{
struct xfs_fid64 fid = {
.ino = handle->ha_fid.fid_ino,
.gen = handle->ha_fid.fid_gen,
};
/*
* Only allow handle opens under a directory.
*/
if (!S_ISDIR(file_inode(file)->i_mode))
return ERR_PTR(-ENOTDIR);
if (handle->ha_fid.fid_len != xfs_filehandle_fid_len())
return ERR_PTR(-EINVAL);
return exportfs_decode_fh(file->f_path.mnt, (struct fid *)&fid, 3,
FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
xfs_handle_acceptable, NULL);
}
/* Convert handle already copied to kernel space into an xfs_inode. */
static struct xfs_inode *
xfs_khandle_to_inode(
struct file *file,
struct xfs_handle *handle)
{
struct xfs_inode *ip = XFS_I(file_inode(file));
struct xfs_mount *mp = ip->i_mount;
struct inode *inode;
if (!S_ISDIR(VFS_I(ip)->i_mode))
return ERR_PTR(-ENOTDIR);
if (handle->ha_fid.fid_len != xfs_filehandle_fid_len())
return ERR_PTR(-EINVAL);
inode = xfs_nfs_get_inode(mp->m_super, handle->ha_fid.fid_ino,
handle->ha_fid.fid_gen);
if (IS_ERR(inode))
return ERR_CAST(inode);
return XFS_I(inode);
}
/*
* Convert userspace handle data into a dentry.
*/
struct dentry *
xfs_handle_to_dentry(
struct file *parfilp,
void __user *uhandle,
u32 hlen)
{
xfs_handle_t handle;
if (hlen != sizeof(xfs_handle_t))
return ERR_PTR(-EINVAL);
if (copy_from_user(&handle, uhandle, hlen))
return ERR_PTR(-EFAULT);
return xfs_khandle_to_dentry(parfilp, &handle);
}
STATIC struct dentry *
xfs_handlereq_to_dentry(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
}
int
xfs_open_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
const struct cred *cred = current_cred();
int error;
int fd;
int permflag;
struct file *filp;
struct inode *inode;
struct dentry *dentry;
fmode_t fmode;
struct path path;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
inode = d_inode(dentry);
/* Restrict xfs_open_by_handle to directories & regular files. */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
error = -EPERM;
goto out_dput;
}
#if BITS_PER_LONG != 32
hreq->oflags |= O_LARGEFILE;
#endif
permflag = hreq->oflags;
fmode = OPEN_FMODE(permflag);
if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
(fmode & FMODE_WRITE) && IS_APPEND(inode)) {
error = -EPERM;
goto out_dput;
}
if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
error = -EPERM;
goto out_dput;
}
/* Can't write directories. */
if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
error = -EISDIR;
goto out_dput;
}
fd = get_unused_fd_flags(0);
if (fd < 0) {
error = fd;
goto out_dput;
}
path.mnt = parfilp->f_path.mnt;
path.dentry = dentry;
filp = dentry_open(&path, hreq->oflags, cred);
dput(dentry);
if (IS_ERR(filp)) {
put_unused_fd(fd);
return PTR_ERR(filp);
}
if (S_ISREG(inode->i_mode)) {
filp->f_flags |= O_NOATIME;
filp->f_mode |= FMODE_NOCMTIME;
}
fd_install(fd, filp);
return fd;
out_dput:
dput(dentry);
return error;
}
int
xfs_readlink_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
struct dentry *dentry;
__u32 olen;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
/* Restrict this handle operation to symlinks only. */
if (!d_is_symlink(dentry)) {
error = -EINVAL;
goto out_dput;
}
if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
error = -EFAULT;
goto out_dput;
}
error = vfs_readlink(dentry, hreq->ohandle, olen);
out_dput:
dput(dentry);
return error;
}
/*
* Format an attribute and copy it out to the user's buffer.
* Take care to check values and protect against them changing later,
* we may be reading them directly out of a user buffer.
*/
static void
xfs_ioc_attr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
int namelen,
void *value,
int valuelen)
{
struct xfs_attrlist *alist = context->buffer;
struct xfs_attrlist_ent *aep;
int arraytop;
ASSERT(!context->seen_enough);
ASSERT(context->count >= 0);
ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
ASSERT(context->firstu >= sizeof(*alist));
ASSERT(context->firstu <= context->bufsize);
/*
* Only list entries in the right namespace.
*/
if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK))
return;
arraytop = sizeof(*alist) +
context->count * sizeof(alist->al_offset[0]);
/* decrement by the actual bytes used by the attr */
context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) +
namelen + 1, sizeof(uint32_t));
if (context->firstu < arraytop) {
trace_xfs_attr_list_full(context);
alist->al_more = 1;
context->seen_enough = 1;
return;
}
aep = context->buffer + context->firstu;
aep->a_valuelen = valuelen;
memcpy(aep->a_name, name, namelen);
aep->a_name[namelen] = 0;
alist->al_offset[context->count++] = context->firstu;
alist->al_count = context->count;
trace_xfs_attr_list_add(context);
}
static unsigned int
xfs_attr_filter(
u32 ioc_flags)
{
if (ioc_flags & XFS_IOC_ATTR_ROOT)
return XFS_ATTR_ROOT;
if (ioc_flags & XFS_IOC_ATTR_SECURE)
return XFS_ATTR_SECURE;
return 0;
}
static inline enum xfs_attr_update
xfs_xattr_flags(
u32 ioc_flags,
void *value)
{
if (!value)
return XFS_ATTRUPDATE_REMOVE;
if (ioc_flags & XFS_IOC_ATTR_CREATE)
return XFS_ATTRUPDATE_CREATE;
if (ioc_flags & XFS_IOC_ATTR_REPLACE)
return XFS_ATTRUPDATE_REPLACE;
return XFS_ATTRUPDATE_UPSERT;
}
int
xfs_ioc_attr_list(
struct xfs_inode *dp,
void __user *ubuf,
size_t bufsize,
int flags,
struct xfs_attrlist_cursor __user *ucursor)
{
struct xfs_attr_list_context context = { };
struct xfs_attrlist *alist;
void *buffer;
int error;
if (bufsize < sizeof(struct xfs_attrlist) ||
bufsize > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE))
return -EINVAL;
if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE))
return -EINVAL;
/*
* Validate the cursor.
*/
if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor)))
return -EFAULT;
if (context.cursor.pad1 || context.cursor.pad2)
return -EINVAL;
if (!context.cursor.initted &&
(context.cursor.hashval || context.cursor.blkno ||
context.cursor.offset))
return -EINVAL;
buffer = kvzalloc(bufsize, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
/*
* Initialize the output buffer.
*/
context.dp = dp;
context.resynch = 1;
context.attr_filter = xfs_attr_filter(flags);
context.buffer = buffer;
context.bufsize = round_down(bufsize, sizeof(uint32_t));
context.firstu = context.bufsize;
context.put_listent = xfs_ioc_attr_put_listent;
alist = context.buffer;
alist->al_count = 0;
alist->al_more = 0;
alist->al_offset[0] = context.bufsize;
error = xfs_attr_list(&context);
if (error)
goto out_free;
if (copy_to_user(ubuf, buffer, bufsize) ||
copy_to_user(ucursor, &context.cursor, sizeof(context.cursor)))
error = -EFAULT;
out_free:
kvfree(buffer);
return error;
}
int
xfs_attrlist_by_handle(
struct file *parfilp,
struct xfs_fsop_attrlist_handlereq __user *p)
{
struct xfs_fsop_attrlist_handlereq al_hreq;
struct dentry *dentry;
int error = -ENOMEM;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&al_hreq, p, sizeof(al_hreq)))
return -EFAULT;
dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer,
al_hreq.buflen, al_hreq.flags, &p->pos);
dput(dentry);
return error;
}
static int
xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
uint32_t *len,
uint32_t flags)
{
struct xfs_da_args args = {
.dp = XFS_I(inode),
.attr_filter = xfs_attr_filter(flags),
.name = name,
.namelen = strlen(name),
.valuelen = *len,
};
int error;
if (*len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
error = xfs_attr_get(&args);
if (error)
goto out_kfree;
*len = args.valuelen;
if (copy_to_user(ubuf, args.value, args.valuelen))
error = -EFAULT;
out_kfree:
kvfree(args.value);
return error;
}
static int
xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
uint32_t len,
uint32_t flags)
{
struct xfs_da_args args = {
.dp = XFS_I(inode),
.attr_filter = xfs_attr_filter(flags),
.name = name,
.namelen = strlen(name),
};
int error;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
if (ubuf) {
if (len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
args.value = memdup_user(ubuf, len);
if (IS_ERR(args.value))
return PTR_ERR(args.value);
args.valuelen = len;
}
error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value));
if (!error && (flags & XFS_IOC_ATTR_ROOT))
xfs_forget_acl(inode, name);
kfree(args.value);
return error;
}
int
xfs_ioc_attrmulti_one(
struct file *parfilp,
struct inode *inode,
uint32_t opcode,
void __user *uname,
void __user *value,
uint32_t *len,
uint32_t flags)
{
unsigned char *name;
int error;
if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE))
return -EINVAL;
name = strndup_user(uname, MAXNAMELEN);
if (IS_ERR(name))
return PTR_ERR(name);
switch (opcode) {
case ATTR_OP_GET:
error = xfs_attrmulti_attr_get(inode, name, value, len, flags);
break;
case ATTR_OP_REMOVE:
value = NULL;
*len = 0;
fallthrough;
case ATTR_OP_SET:
error = mnt_want_write_file(parfilp);
if (error)
break;
error = xfs_attrmulti_attr_set(inode, name, value, *len, flags);
mnt_drop_write_file(parfilp);
break;
default:
error = -EINVAL;
break;
}
kfree(name);
return error;
}
int
xfs_attrmulti_by_handle(
struct file *parfilp,
void __user *arg)
{
int error;
xfs_attr_multiop_t *ops;
xfs_fsop_attrmulti_handlereq_t am_hreq;
struct dentry *dentry;
unsigned int i, size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
return -E2BIG;
dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
error = -E2BIG;
size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
ops = memdup_user(am_hreq.ops, size);
if (IS_ERR(ops)) {
error = PTR_ERR(ops);
goto out_dput;
}
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = xfs_ioc_attrmulti_one(parfilp,
d_inode(dentry), ops[i].am_opcode,
ops[i].am_attrname, ops[i].am_attrvalue,
&ops[i].am_length, ops[i].am_flags);
}
if (copy_to_user(am_hreq.ops, ops, size))
error = -EFAULT;
kfree(ops);
out_dput:
dput(dentry);
return error;
}
struct xfs_getparents_ctx {
struct xfs_attr_list_context context;
struct xfs_getparents_by_handle gph;
/* File to target */
struct xfs_inode *ip;
/* Internal buffer where we format records */
void *krecords;
/* Last record filled out */
struct xfs_getparents_rec *lastrec;
unsigned int count;
};
static inline unsigned int
xfs_getparents_rec_sizeof(
unsigned int namelen)
{
return round_up(sizeof(struct xfs_getparents_rec) + namelen + 1,
sizeof(uint64_t));
}
static void
xfs_getparents_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
int namelen,
void *value,
int valuelen)
{
struct xfs_getparents_ctx *gpx =
container_of(context, struct xfs_getparents_ctx, context);
struct xfs_inode *ip = context->dp;
struct xfs_mount *mp = ip->i_mount;
struct xfs_getparents *gp = &gpx->gph.gph_request;
struct xfs_getparents_rec *gpr = gpx->krecords + context->firstu;
unsigned short reclen =
xfs_getparents_rec_sizeof(namelen);
xfs_ino_t ino;
uint32_t gen;
int error;
if (!(flags & XFS_ATTR_PARENT))
return;
error = xfs_parent_from_attr(mp, flags, name, namelen, value, valuelen,
&ino, &gen);
if (error) {
xfs_inode_mark_sick(ip, XFS_SICK_INO_PARENT);
context->seen_enough = -EFSCORRUPTED;
return;
}
/*
* We found a parent pointer, but we've filled up the buffer. Signal
* to the caller that we did /not/ reach the end of the parent pointer
* recordset.
*/
if (context->firstu > context->bufsize - reclen) {
context->seen_enough = 1;
return;
}
/* Format the parent pointer directly into the caller buffer. */
gpr->gpr_reclen = reclen;
xfs_filehandle_init(mp, ino, gen, &gpr->gpr_parent);
memcpy(gpr->gpr_name, name, namelen);
gpr->gpr_name[namelen] = 0;
trace_xfs_getparents_put_listent(ip, gp, context, gpr);
context->firstu += reclen;
gpx->count++;
gpx->lastrec = gpr;
}
/* Expand the last record to fill the rest of the caller's buffer. */
static inline void
xfs_getparents_expand_lastrec(
struct xfs_getparents_ctx *gpx)
{
struct xfs_getparents *gp = &gpx->gph.gph_request;
struct xfs_getparents_rec *gpr = gpx->lastrec;
if (!gpx->lastrec)
gpr = gpx->krecords;
gpr->gpr_reclen = gp->gp_bufsize - ((void *)gpr - gpx->krecords);
trace_xfs_getparents_expand_lastrec(gpx->ip, gp, &gpx->context, gpr);
}
static inline void __user *u64_to_uptr(u64 val)
{
return (void __user *)(uintptr_t)val;
}
/* Retrieve the parent pointers for a given inode. */
STATIC int
xfs_getparents(
struct xfs_getparents_ctx *gpx)
{
struct xfs_getparents *gp = &gpx->gph.gph_request;
struct xfs_inode *ip = gpx->ip;
struct xfs_mount *mp = ip->i_mount;
size_t bufsize;
int error;
/* Check size of buffer requested by user */
if (gp->gp_bufsize > XFS_XATTR_LIST_MAX)
return -ENOMEM;
if (gp->gp_bufsize < xfs_getparents_rec_sizeof(1))
return -EINVAL;
if (gp->gp_iflags & ~XFS_GETPARENTS_IFLAGS_ALL)
return -EINVAL;
if (gp->gp_reserved)
return -EINVAL;
bufsize = round_down(gp->gp_bufsize, sizeof(uint64_t));
gpx->krecords = kvzalloc(bufsize, GFP_KERNEL);
if (!gpx->krecords) {
bufsize = min(bufsize, PAGE_SIZE);
gpx->krecords = kvzalloc(bufsize, GFP_KERNEL);
if (!gpx->krecords)
return -ENOMEM;
}
gpx->context.dp = ip;
gpx->context.resynch = 1;
gpx->context.put_listent = xfs_getparents_put_listent;
gpx->context.bufsize = bufsize;
/* firstu is used to track the bytes filled in the buffer */
gpx->context.firstu = 0;
/* Copy the cursor provided by caller */
memcpy(&gpx->context.cursor, &gp->gp_cursor,
sizeof(struct xfs_attrlist_cursor));
gpx->count = 0;
gp->gp_oflags = 0;
trace_xfs_getparents_begin(ip, gp, &gpx->context.cursor);
error = xfs_attr_list(&gpx->context);
if (error)
goto out_free_buf;
if (gpx->context.seen_enough < 0) {
error = gpx->context.seen_enough;
goto out_free_buf;
}
xfs_getparents_expand_lastrec(gpx);
/* Update the caller with the current cursor position */
memcpy(&gp->gp_cursor, &gpx->context.cursor,
sizeof(struct xfs_attrlist_cursor));
/* Is this the root directory? */
if (ip->i_ino == mp->m_sb.sb_rootino)
gp->gp_oflags |= XFS_GETPARENTS_OFLAG_ROOT;
if (gpx->context.seen_enough == 0) {
/*
* If we did not run out of buffer space, then we reached the
* end of the pptr recordset, so set the DONE flag.
*/
gp->gp_oflags |= XFS_GETPARENTS_OFLAG_DONE;
} else if (gpx->count == 0) {
/*
* If we ran out of buffer space before copying any parent
* pointers at all, the caller's buffer was too short. Tell
* userspace that, erm, the message is too long.
*/
error = -EMSGSIZE;
goto out_free_buf;
}
trace_xfs_getparents_end(ip, gp, &gpx->context.cursor);
ASSERT(gpx->context.firstu <= gpx->gph.gph_request.gp_bufsize);
/* Copy the records to userspace. */
if (copy_to_user(u64_to_uptr(gpx->gph.gph_request.gp_buffer),
gpx->krecords, gpx->context.firstu))
error = -EFAULT;
out_free_buf:
kvfree(gpx->krecords);
gpx->krecords = NULL;
return error;
}
/* Retrieve the parents of this file and pass them back to userspace. */
int
xfs_ioc_getparents(
struct file *file,
struct xfs_getparents __user *ureq)
{
struct xfs_getparents_ctx gpx = {
.ip = XFS_I(file_inode(file)),
};
struct xfs_getparents *kreq = &gpx.gph.gph_request;
struct xfs_mount *mp = gpx.ip->i_mount;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!xfs_has_parent(mp))
return -EOPNOTSUPP;
if (copy_from_user(kreq, ureq, sizeof(*kreq)))
return -EFAULT;
error = xfs_getparents(&gpx);
if (error)
return error;
if (copy_to_user(ureq, kreq, sizeof(*kreq)))
return -EFAULT;
return 0;
}
/* Retrieve the parents of this file handle and pass them back to userspace. */
int
xfs_ioc_getparents_by_handle(
struct file *file,
struct xfs_getparents_by_handle __user *ureq)
{
struct xfs_getparents_ctx gpx = { };
struct xfs_inode *ip = XFS_I(file_inode(file));
struct xfs_mount *mp = ip->i_mount;
struct xfs_getparents_by_handle *kreq = &gpx.gph;
struct xfs_handle *handle = &kreq->gph_handle;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!xfs_has_parent(mp))
return -EOPNOTSUPP;
if (copy_from_user(kreq, ureq, sizeof(*kreq)))
return -EFAULT;
/*
* We don't use exportfs_decode_fh because it does too much work here.
* If the handle refers to a directory, the exportfs code will walk
* upwards through the directory tree to connect the dentries to the
* root directory dentry. For GETPARENTS we don't care about that
* because we're not actually going to open a file descriptor; we only
* want to open an inode and read its parent pointers.
*
* Note that xfs_scrub uses GETPARENTS to log that it will try to fix a
* corrupted file's metadata. For this usecase we would really rather
* userspace single-step the path reconstruction to avoid loops or
* other strange things if the directory tree is corrupt.
*/
gpx.ip = xfs_khandle_to_inode(file, handle);
if (IS_ERR(gpx.ip))
return PTR_ERR(gpx.ip);
error = xfs_getparents(&gpx);
if (error)
goto out_rele;
if (copy_to_user(ureq, kreq, sizeof(*kreq)))
error = -EFAULT;
out_rele:
xfs_irele(gpx.ip);
return error;
}
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* Copyright (c) 2022-2024 Oracle.
* All rights reserved.
*/
#ifndef __XFS_HANDLE_H__
#define __XFS_HANDLE_H__
int xfs_attrlist_by_handle(struct file *parfilp,
struct xfs_fsop_attrlist_handlereq __user *p);
int xfs_attrmulti_by_handle(struct file *parfilp, void __user *arg);
int xfs_find_handle(unsigned int cmd, struct xfs_fsop_handlereq *hreq);
int xfs_open_by_handle(struct file *parfilp, struct xfs_fsop_handlereq *hreq);
int xfs_readlink_by_handle(struct file *parfilp,
struct xfs_fsop_handlereq *hreq);
int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode,
uint32_t opcode, void __user *uname, void __user *value,
uint32_t *len, uint32_t flags);
int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf,
size_t bufsize, int flags,
struct xfs_attrlist_cursor __user *ucursor);
struct dentry *xfs_handle_to_dentry(struct file *parfilp, void __user *uhandle,
u32 hlen);
int xfs_ioc_getparents(struct file *file, struct xfs_getparents __user *arg);
int xfs_ioc_getparents_by_handle(struct file *file,
struct xfs_getparents_by_handle __user *arg);
#endif /* __XFS_HANDLE_H__ */
......@@ -40,6 +40,8 @@
#include "xfs_log_priv.h"
#include "xfs_health.h"
#include "xfs_pnfs.h"
#include "xfs_parent.h"
#include "xfs_xattr.h"
struct kmem_cache *xfs_inode_cache;
......@@ -1017,7 +1019,7 @@ xfs_dir_hook_setup(
int
xfs_create(
struct mnt_idmap *idmap,
xfs_inode_t *dp,
struct xfs_inode *dp,
struct xfs_name *name,
umode_t mode,
dev_t rdev,
......@@ -1037,6 +1039,7 @@ xfs_create(
struct xfs_trans_res *tres;
uint resblks;
xfs_ino_t ino;
struct xfs_parent_args *ppargs;
trace_xfs_create(dp, name);
......@@ -1058,13 +1061,17 @@ xfs_create(
return error;
if (is_dir) {
resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
resblks = xfs_mkdir_space_res(mp, name->len);
tres = &M_RES(mp)->tr_mkdir;
} else {
resblks = XFS_CREATE_SPACE_RES(mp, name->len);
resblks = xfs_create_space_res(mp, name->len);
tres = &M_RES(mp)->tr_create;
}
error = xfs_parent_start(mp, &ppargs);
if (error)
goto out_release_dquots;
/*
* Initially assume that the file does not exist and
* reserve the resources for that case. If that is not
......@@ -1080,7 +1087,7 @@ xfs_create(
resblks, &tp);
}
if (error)
goto out_release_dquots;
goto out_parent;
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
......@@ -1123,6 +1130,16 @@ xfs_create(
xfs_bumplink(tp, dp);
}
/*
* If we have parent pointers, we need to add the attribute containing
* the parent information now.
*/
if (ppargs) {
error = xfs_parent_addname(tp, ppargs, dp, name, ip);
if (error)
goto out_trans_cancel;
}
/*
* Create ip with a reference from dp, and add '.' and '..' references
* if it's a directory.
......@@ -1155,6 +1172,7 @@ xfs_create(
*ipp = ip;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_parent_finish(mp, ppargs);
return 0;
out_trans_cancel:
......@@ -1170,6 +1188,8 @@ xfs_create(
xfs_finish_inode_setup(ip);
xfs_irele(ip);
}
out_parent:
xfs_parent_finish(mp, ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
......@@ -1185,6 +1205,7 @@ xfs_create_tmpfile(
struct mnt_idmap *idmap,
struct xfs_inode *dp,
umode_t mode,
bool init_xattrs,
struct xfs_inode **ipp)
{
struct xfs_mount *mp = dp->i_mount;
......@@ -1225,7 +1246,7 @@ xfs_create_tmpfile(
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (!error)
error = xfs_init_new_inode(idmap, tp, dp, ino, mode,
0, 0, prid, false, &ip);
0, 0, prid, init_xattrs, &ip);
if (error)
goto out_trans_cancel;
......@@ -1278,14 +1299,15 @@ xfs_create_tmpfile(
int
xfs_link(
xfs_inode_t *tdp,
xfs_inode_t *sip,
struct xfs_inode *tdp,
struct xfs_inode *sip,
struct xfs_name *target_name)
{
xfs_mount_t *mp = tdp->i_mount;
xfs_trans_t *tp;
struct xfs_mount *mp = tdp->i_mount;
struct xfs_trans *tp;
int error, nospace_error = 0;
int resblks;
struct xfs_parent_args *ppargs;
trace_xfs_link(tdp, target_name);
......@@ -1304,11 +1326,25 @@ xfs_link(
if (error)
goto std_return;
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
error = xfs_parent_start(mp, &ppargs);
if (error)
goto std_return;
resblks = xfs_link_space_res(mp, target_name->len);
error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
&tp, &nospace_error);
if (error)
goto std_return;
goto out_parent;
/*
* We don't allow reservationless or quotaless hardlinking when parent
* pointers are enabled because we can't back out if the xattrs must
* grow.
*/
if (ppargs && nospace_error) {
error = nospace_error;
goto error_return;
}
/*
* If we are using project inheritance, we only allow hard link
......@@ -1359,6 +1395,19 @@ xfs_link(
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
xfs_bumplink(tp, sip);
/*
* If we have parent pointers, we now need to add the parent record to
* the attribute fork of the inode. If this is the initial parent
* attribute, we need to create it correctly, otherwise we can just add
* the parent to the inode.
*/
if (ppargs) {
error = xfs_parent_addname(tp, ppargs, tdp, target_name, sip);
if (error)
goto error_return;
}
xfs_dir_update_hook(tdp, sip, 1, target_name);
/*
......@@ -1372,12 +1421,15 @@ xfs_link(
error = xfs_trans_commit(tp);
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
xfs_parent_finish(mp, ppargs);
return error;
error_return:
xfs_trans_cancel(tp);
xfs_iunlock(tdp, XFS_ILOCK_EXCL);
xfs_iunlock(sip, XFS_ILOCK_EXCL);
out_parent:
xfs_parent_finish(mp, ppargs);
std_return:
if (error == -ENOSPC && nospace_error)
error = nospace_error;
......@@ -2669,16 +2721,17 @@ xfs_iunpin_wait(
*/
int
xfs_remove(
xfs_inode_t *dp,
struct xfs_inode *dp,
struct xfs_name *name,
xfs_inode_t *ip)
struct xfs_inode *ip)
{
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp = NULL;
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans *tp = NULL;
int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int dontcare;
int error = 0;
uint resblks;
struct xfs_parent_args *ppargs;
trace_xfs_remove(dp, name);
......@@ -2695,6 +2748,10 @@ xfs_remove(
if (error)
goto std_return;
error = xfs_parent_start(mp, &ppargs);
if (error)
goto std_return;
/*
* We try to get the real space reservation first, allowing for
* directory btree deletion(s) implying possible bmap insert(s). If we
......@@ -2706,12 +2763,12 @@ xfs_remove(
* the directory code can handle a reservationless update and we don't
* want to prevent a user from trying to free space by deleting things.
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
resblks = xfs_remove_space_res(mp, name->len);
error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
&tp, &dontcare);
if (error) {
ASSERT(error != -ENOSPC);
goto std_return;
goto out_parent;
}
/*
......@@ -2771,6 +2828,13 @@ xfs_remove(
goto out_trans_cancel;
}
/* Remove parent pointer. */
if (ppargs) {
error = xfs_parent_removename(tp, ppargs, dp, name, ip);
if (error)
goto out_trans_cancel;
}
/*
* Drop the link from dp to ip, and if ip was a directory, remove the
* '.' and '..' references since we freed the directory.
......@@ -2794,6 +2858,7 @@ xfs_remove(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_parent_finish(mp, ppargs);
return 0;
out_trans_cancel:
......@@ -2801,6 +2866,8 @@ xfs_remove(
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
out_parent:
xfs_parent_finish(mp, ppargs);
std_return:
return error;
}
......@@ -2905,9 +2972,11 @@ xfs_cross_rename(
struct xfs_inode *dp1,
struct xfs_name *name1,
struct xfs_inode *ip1,
struct xfs_parent_args *ip1_ppargs,
struct xfs_inode *dp2,
struct xfs_name *name2,
struct xfs_inode *ip2,
struct xfs_parent_args *ip2_ppargs,
int spaceres)
{
int error = 0;
......@@ -2982,6 +3051,21 @@ xfs_cross_rename(
}
}
/* Schedule parent pointer replacements */
if (ip1_ppargs) {
error = xfs_parent_replacename(tp, ip1_ppargs, dp1, name1, dp2,
name2, ip1);
if (error)
goto out_trans_abort;
}
if (ip2_ppargs) {
error = xfs_parent_replacename(tp, ip2_ppargs, dp2, name2, dp1,
name1, ip2);
if (error)
goto out_trans_abort;
}
if (ip1_flags) {
xfs_trans_ichgtime(tp, ip1, ip1_flags);
xfs_trans_log_inode(tp, ip1, XFS_ILOG_CORE);
......@@ -3037,7 +3121,7 @@ xfs_rename_alloc_whiteout(
int error;
error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE,
&tmpfile);
xfs_has_parent(dp->i_mount), &tmpfile);
if (error)
return error;
......@@ -3081,6 +3165,9 @@ xfs_rename(
struct xfs_trans *tp;
struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
struct xfs_parent_args *src_ppargs = NULL;
struct xfs_parent_args *tgt_ppargs = NULL;
struct xfs_parent_args *wip_ppargs = NULL;
int i;
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
......@@ -3112,9 +3199,26 @@ xfs_rename(
xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
inodes, &num_inodes);
error = xfs_parent_start(mp, &src_ppargs);
if (error)
goto out_release_wip;
if (wip) {
error = xfs_parent_start(mp, &wip_ppargs);
if (error)
goto out_src_ppargs;
}
if (target_ip) {
error = xfs_parent_start(mp, &tgt_ppargs);
if (error)
goto out_wip_ppargs;
}
retry:
nospace_error = 0;
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
target_name->len, wip != NULL);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) {
nospace_error = error;
......@@ -3123,7 +3227,17 @@ xfs_rename(
&tp);
}
if (error)
goto out_release_wip;
goto out_tgt_ppargs;
/*
* We don't allow reservationless renaming when parent pointers are
* enabled because we can't back out if the xattrs must grow.
*/
if (src_ppargs && nospace_error) {
error = nospace_error;
xfs_trans_cancel(tp);
goto out_tgt_ppargs;
}
/*
* Attach the dquots to the inodes
......@@ -3131,7 +3245,7 @@ xfs_rename(
error = xfs_qm_vop_rename_dqattach(inodes);
if (error) {
xfs_trans_cancel(tp);
goto out_release_wip;
goto out_tgt_ppargs;
}
/*
......@@ -3168,10 +3282,10 @@ xfs_rename(
/* RENAME_EXCHANGE is unique from here on. */
if (flags & RENAME_EXCHANGE) {
error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
target_dp, target_name, target_ip,
spaceres);
xfs_iunlock_rename(inodes, num_inodes);
return error;
src_ppargs, target_dp, target_name, target_ip,
tgt_ppargs, spaceres);
nospace_error = 0;
goto out_unlock;
}
/*
......@@ -3200,6 +3314,15 @@ xfs_rename(
goto out_trans_cancel;
}
/*
* We don't allow quotaless renaming when parent pointers are enabled
* because we can't back out if the xattrs must grow.
*/
if (src_ppargs && nospace_error) {
error = nospace_error;
goto out_trans_cancel;
}
/*
* Check for expected errors before we dirty the transaction
* so we can return an error without a transaction abort.
......@@ -3392,6 +3515,28 @@ xfs_rename(
if (error)
goto out_trans_cancel;
/* Schedule parent pointer updates. */
if (wip_ppargs) {
error = xfs_parent_addname(tp, wip_ppargs, src_dp, src_name,
wip);
if (error)
goto out_trans_cancel;
}
if (src_ppargs) {
error = xfs_parent_replacename(tp, src_ppargs, src_dp,
src_name, target_dp, target_name, src_ip);
if (error)
goto out_trans_cancel;
}
if (tgt_ppargs) {
error = xfs_parent_removename(tp, tgt_ppargs, target_dp,
target_name, target_ip);
if (error)
goto out_trans_cancel;
}
xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
if (new_parent)
......@@ -3413,14 +3558,19 @@ xfs_rename(
xfs_dir_update_hook(src_dp, wip, 1, src_name);
error = xfs_finish_rename(tp);
xfs_iunlock_rename(inodes, num_inodes);
if (wip)
xfs_irele(wip);
return error;
nospace_error = 0;
goto out_unlock;
out_trans_cancel:
xfs_trans_cancel(tp);
out_unlock:
xfs_iunlock_rename(inodes, num_inodes);
out_tgt_ppargs:
xfs_parent_finish(mp, tgt_ppargs);
out_wip_ppargs:
xfs_parent_finish(mp, wip_ppargs);
out_src_ppargs:
xfs_parent_finish(mp, src_ppargs);
out_release_wip:
if (wip)
xfs_irele(wip);
......
......@@ -522,7 +522,7 @@ int xfs_create(struct mnt_idmap *idmap,
umode_t mode, dev_t rdev, bool need_xattr,
struct xfs_inode **ipp);
int xfs_create_tmpfile(struct mnt_idmap *idmap,
struct xfs_inode *dp, umode_t mode,
struct xfs_inode *dp, umode_t mode, bool init_xattrs,
struct xfs_inode **ipp);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
......
......@@ -23,11 +23,9 @@
#include "xfs_fsops.h"
#include "xfs_discard.h"
#include "xfs_quota.h"
#include "xfs_export.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_trans.h"
#include "xfs_acl.h"
#include "xfs_btree.h"
#include <linux/fsmap.h>
#include "xfs_fsmap.h"
......@@ -41,597 +39,11 @@
#include "xfs_rtbitmap.h"
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_handle.h"
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/fileattr.h>
/*
* xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
* a file or fs handle.
*
* XFS_IOC_PATH_TO_FSHANDLE
* returns fs handle for a mount point or path within that mount point
* XFS_IOC_FD_TO_HANDLE
* returns full handle for a FD opened in user space
* XFS_IOC_PATH_TO_HANDLE
* returns full handle for a path
*/
int
xfs_find_handle(
unsigned int cmd,
xfs_fsop_handlereq_t *hreq)
{
int hsize;
xfs_handle_t handle;
struct inode *inode;
struct fd f = {NULL};
struct path path;
int error;
struct xfs_inode *ip;
if (cmd == XFS_IOC_FD_TO_HANDLE) {
f = fdget(hreq->fd);
if (!f.file)
return -EBADF;
inode = file_inode(f.file);
} else {
error = user_path_at(AT_FDCWD, hreq->path, 0, &path);
if (error)
return error;
inode = d_inode(path.dentry);
}
ip = XFS_I(inode);
/*
* We can only generate handles for inodes residing on a XFS filesystem,
* and only for regular files, directories or symbolic links.
*/
error = -EINVAL;
if (inode->i_sb->s_magic != XFS_SB_MAGIC)
goto out_put;
error = -EBADF;
if (!S_ISREG(inode->i_mode) &&
!S_ISDIR(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
goto out_put;
memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
/*
* This handle only contains an fsid, zero the rest.
*/
memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
hsize = sizeof(xfs_fsid_t);
} else {
handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
sizeof(handle.ha_fid.fid_len);
handle.ha_fid.fid_pad = 0;
handle.ha_fid.fid_gen = inode->i_generation;
handle.ha_fid.fid_ino = ip->i_ino;
hsize = sizeof(xfs_handle_t);
}
error = -EFAULT;
if (copy_to_user(hreq->ohandle, &handle, hsize) ||
copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
goto out_put;
error = 0;
out_put:
if (cmd == XFS_IOC_FD_TO_HANDLE)
fdput(f);
else
path_put(&path);
return error;
}
/*
* No need to do permission checks on the various pathname components
* as the handle operations are privileged.
*/
STATIC int
xfs_handle_acceptable(
void *context,
struct dentry *dentry)
{
return 1;
}
/*
* Convert userspace handle data into a dentry.
*/
struct dentry *
xfs_handle_to_dentry(
struct file *parfilp,
void __user *uhandle,
u32 hlen)
{
xfs_handle_t handle;
struct xfs_fid64 fid;
/*
* Only allow handle opens under a directory.
*/
if (!S_ISDIR(file_inode(parfilp)->i_mode))
return ERR_PTR(-ENOTDIR);
if (hlen != sizeof(xfs_handle_t))
return ERR_PTR(-EINVAL);
if (copy_from_user(&handle, uhandle, hlen))
return ERR_PTR(-EFAULT);
if (handle.ha_fid.fid_len !=
sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
return ERR_PTR(-EINVAL);
memset(&fid, 0, sizeof(struct fid));
fid.ino = handle.ha_fid.fid_ino;
fid.gen = handle.ha_fid.fid_gen;
return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
xfs_handle_acceptable, NULL);
}
STATIC struct dentry *
xfs_handlereq_to_dentry(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
}
int
xfs_open_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
const struct cred *cred = current_cred();
int error;
int fd;
int permflag;
struct file *filp;
struct inode *inode;
struct dentry *dentry;
fmode_t fmode;
struct path path;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
inode = d_inode(dentry);
/* Restrict xfs_open_by_handle to directories & regular files. */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
error = -EPERM;
goto out_dput;
}
#if BITS_PER_LONG != 32
hreq->oflags |= O_LARGEFILE;
#endif
permflag = hreq->oflags;
fmode = OPEN_FMODE(permflag);
if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
(fmode & FMODE_WRITE) && IS_APPEND(inode)) {
error = -EPERM;
goto out_dput;
}
if ((fmode & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
error = -EPERM;
goto out_dput;
}
/* Can't write directories. */
if (S_ISDIR(inode->i_mode) && (fmode & FMODE_WRITE)) {
error = -EISDIR;
goto out_dput;
}
fd = get_unused_fd_flags(0);
if (fd < 0) {
error = fd;
goto out_dput;
}
path.mnt = parfilp->f_path.mnt;
path.dentry = dentry;
filp = dentry_open(&path, hreq->oflags, cred);
dput(dentry);
if (IS_ERR(filp)) {
put_unused_fd(fd);
return PTR_ERR(filp);
}
if (S_ISREG(inode->i_mode)) {
filp->f_flags |= O_NOATIME;
filp->f_mode |= FMODE_NOCMTIME;
}
fd_install(fd, filp);
return fd;
out_dput:
dput(dentry);
return error;
}
int
xfs_readlink_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq)
{
struct dentry *dentry;
__u32 olen;
int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
dentry = xfs_handlereq_to_dentry(parfilp, hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
/* Restrict this handle operation to symlinks only. */
if (!d_is_symlink(dentry)) {
error = -EINVAL;
goto out_dput;
}
if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
error = -EFAULT;
goto out_dput;
}
error = vfs_readlink(dentry, hreq->ohandle, olen);
out_dput:
dput(dentry);
return error;
}
/*
* Format an attribute and copy it out to the user's buffer.
* Take care to check values and protect against them changing later,
* we may be reading them directly out of a user buffer.
*/
static void
xfs_ioc_attr_put_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
int namelen,
int valuelen)
{
struct xfs_attrlist *alist = context->buffer;
struct xfs_attrlist_ent *aep;
int arraytop;
ASSERT(!context->seen_enough);
ASSERT(context->count >= 0);
ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
ASSERT(context->firstu >= sizeof(*alist));
ASSERT(context->firstu <= context->bufsize);
/*
* Only list entries in the right namespace.
*/
if (context->attr_filter != (flags & XFS_ATTR_NSP_ONDISK_MASK))
return;
arraytop = sizeof(*alist) +
context->count * sizeof(alist->al_offset[0]);
/* decrement by the actual bytes used by the attr */
context->firstu -= round_up(offsetof(struct xfs_attrlist_ent, a_name) +
namelen + 1, sizeof(uint32_t));
if (context->firstu < arraytop) {
trace_xfs_attr_list_full(context);
alist->al_more = 1;
context->seen_enough = 1;
return;
}
aep = context->buffer + context->firstu;
aep->a_valuelen = valuelen;
memcpy(aep->a_name, name, namelen);
aep->a_name[namelen] = 0;
alist->al_offset[context->count++] = context->firstu;
alist->al_count = context->count;
trace_xfs_attr_list_add(context);
}
static unsigned int
xfs_attr_filter(
u32 ioc_flags)
{
if (ioc_flags & XFS_IOC_ATTR_ROOT)
return XFS_ATTR_ROOT;
if (ioc_flags & XFS_IOC_ATTR_SECURE)
return XFS_ATTR_SECURE;
return 0;
}
static inline enum xfs_attr_update
xfs_xattr_flags(
u32 ioc_flags,
void *value)
{
if (!value)
return XFS_ATTRUPDATE_REMOVE;
if (ioc_flags & XFS_IOC_ATTR_CREATE)
return XFS_ATTRUPDATE_CREATE;
if (ioc_flags & XFS_IOC_ATTR_REPLACE)
return XFS_ATTRUPDATE_REPLACE;
return XFS_ATTRUPDATE_UPSERT;
}
int
xfs_ioc_attr_list(
struct xfs_inode *dp,
void __user *ubuf,
size_t bufsize,
int flags,
struct xfs_attrlist_cursor __user *ucursor)
{
struct xfs_attr_list_context context = { };
struct xfs_attrlist *alist;
void *buffer;
int error;
if (bufsize < sizeof(struct xfs_attrlist) ||
bufsize > XFS_XATTR_LIST_MAX)
return -EINVAL;
/*
* Reject flags, only allow namespaces.
*/
if (flags & ~(XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE))
return -EINVAL;
if (flags == (XFS_IOC_ATTR_ROOT | XFS_IOC_ATTR_SECURE))
return -EINVAL;
/*
* Validate the cursor.
*/
if (copy_from_user(&context.cursor, ucursor, sizeof(context.cursor)))
return -EFAULT;
if (context.cursor.pad1 || context.cursor.pad2)
return -EINVAL;
if (!context.cursor.initted &&
(context.cursor.hashval || context.cursor.blkno ||
context.cursor.offset))
return -EINVAL;
buffer = kvzalloc(bufsize, GFP_KERNEL);
if (!buffer)
return -ENOMEM;
/*
* Initialize the output buffer.
*/
context.dp = dp;
context.resynch = 1;
context.attr_filter = xfs_attr_filter(flags);
context.buffer = buffer;
context.bufsize = round_down(bufsize, sizeof(uint32_t));
context.firstu = context.bufsize;
context.put_listent = xfs_ioc_attr_put_listent;
alist = context.buffer;
alist->al_count = 0;
alist->al_more = 0;
alist->al_offset[0] = context.bufsize;
error = xfs_attr_list(&context);
if (error)
goto out_free;
if (copy_to_user(ubuf, buffer, bufsize) ||
copy_to_user(ucursor, &context.cursor, sizeof(context.cursor)))
error = -EFAULT;
out_free:
kvfree(buffer);
return error;
}
STATIC int
xfs_attrlist_by_handle(
struct file *parfilp,
struct xfs_fsop_attrlist_handlereq __user *p)
{
struct xfs_fsop_attrlist_handlereq al_hreq;
struct dentry *dentry;
int error = -ENOMEM;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&al_hreq, p, sizeof(al_hreq)))
return -EFAULT;
dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
error = xfs_ioc_attr_list(XFS_I(d_inode(dentry)), al_hreq.buffer,
al_hreq.buflen, al_hreq.flags, &p->pos);
dput(dentry);
return error;
}
static int
xfs_attrmulti_attr_get(
struct inode *inode,
unsigned char *name,
unsigned char __user *ubuf,
uint32_t *len,
uint32_t flags)
{
struct xfs_da_args args = {
.dp = XFS_I(inode),
.attr_filter = xfs_attr_filter(flags),
.name = name,
.namelen = strlen(name),
.valuelen = *len,
};
int error;
if (*len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
error = xfs_attr_get(&args);
if (error)
goto out_kfree;
*len = args.valuelen;
if (copy_to_user(ubuf, args.value, args.valuelen))
error = -EFAULT;
out_kfree:
kvfree(args.value);
return error;
}
static int
xfs_attrmulti_attr_set(
struct inode *inode,
unsigned char *name,
const unsigned char __user *ubuf,
uint32_t len,
uint32_t flags)
{
struct xfs_da_args args = {
.dp = XFS_I(inode),
.attr_filter = xfs_attr_filter(flags),
.name = name,
.namelen = strlen(name),
};
int error;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
if (ubuf) {
if (len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
args.value = memdup_user(ubuf, len);
if (IS_ERR(args.value))
return PTR_ERR(args.value);
args.valuelen = len;
}
error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value));
if (!error && (flags & XFS_IOC_ATTR_ROOT))
xfs_forget_acl(inode, name);
kfree(args.value);
return error;
}
int
xfs_ioc_attrmulti_one(
struct file *parfilp,
struct inode *inode,
uint32_t opcode,
void __user *uname,
void __user *value,
uint32_t *len,
uint32_t flags)
{
unsigned char *name;
int error;
if ((flags & XFS_IOC_ATTR_ROOT) && (flags & XFS_IOC_ATTR_SECURE))
return -EINVAL;
name = strndup_user(uname, MAXNAMELEN);
if (IS_ERR(name))
return PTR_ERR(name);
switch (opcode) {
case ATTR_OP_GET:
error = xfs_attrmulti_attr_get(inode, name, value, len, flags);
break;
case ATTR_OP_REMOVE:
value = NULL;
*len = 0;
fallthrough;
case ATTR_OP_SET:
error = mnt_want_write_file(parfilp);
if (error)
break;
error = xfs_attrmulti_attr_set(inode, name, value, *len, flags);
mnt_drop_write_file(parfilp);
break;
default:
error = -EINVAL;
break;
}
kfree(name);
return error;
}
STATIC int
xfs_attrmulti_by_handle(
struct file *parfilp,
void __user *arg)
{
int error;
xfs_attr_multiop_t *ops;
xfs_fsop_attrmulti_handlereq_t am_hreq;
struct dentry *dentry;
unsigned int i, size;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
return -EFAULT;
/* overflow check */
if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
return -E2BIG;
dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
error = -E2BIG;
size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
if (!size || size > 16 * PAGE_SIZE)
goto out_dput;
ops = memdup_user(am_hreq.ops, size);
if (IS_ERR(ops)) {
error = PTR_ERR(ops);
goto out_dput;
}
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
ops[i].am_error = xfs_ioc_attrmulti_one(parfilp,
d_inode(dentry), ops[i].am_opcode,
ops[i].am_attrname, ops[i].am_attrvalue,
&ops[i].am_length, ops[i].am_flags);
}
if (copy_to_user(am_hreq.ops, ops, size))
error = -EFAULT;
kfree(ops);
out_dput:
dput(dentry);
return error;
}
/* Return 0 on success or positive error */
int
xfs_fsbulkstat_one_fmt(
......@@ -2013,7 +1425,10 @@ xfs_file_ioctl(
case XFS_IOC_FSGETXATTRA:
return xfs_ioc_fsgetxattra(ip, arg);
case XFS_IOC_GETPARENTS:
return xfs_ioc_getparents(filp, arg);
case XFS_IOC_GETPARENTS_BY_HANDLE:
return xfs_ioc_getparents_by_handle(filp, arg);
case XFS_IOC_GETBMAP:
case XFS_IOC_GETBMAPA:
case XFS_IOC_GETBMAPX:
......
......@@ -14,34 +14,6 @@ int
xfs_ioc_swapext(
xfs_swapext_t *sxp);
extern int
xfs_find_handle(
unsigned int cmd,
xfs_fsop_handlereq_t *hreq);
extern int
xfs_open_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq);
extern int
xfs_readlink_by_handle(
struct file *parfilp,
xfs_fsop_handlereq_t *hreq);
int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode,
uint32_t opcode, void __user *uname, void __user *value,
uint32_t *len, uint32_t flags);
int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf,
size_t bufsize, int flags,
struct xfs_attrlist_cursor __user *ucursor);
extern struct dentry *
xfs_handle_to_dentry(
struct file *parfilp,
void __user *uhandle,
u32 hlen);
extern int
xfs_fileattr_get(
struct dentry *dentry,
......
......@@ -24,6 +24,7 @@
#include "xfs_ioctl32.h"
#include "xfs_trace.h"
#include "xfs_sb.h"
#include "xfs_handle.h"
#define _NATIVE_IOC(cmd, type) \
_IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
......
......@@ -157,6 +157,8 @@ xfs_create_need_xattr(
if (dir->i_sb->s_security)
return true;
#endif
if (xfs_has_parent(XFS_I(dir)->i_mount))
return true;
return false;
}
......@@ -201,7 +203,18 @@ xfs_generic_create(
xfs_create_need_xattr(dir, default_acl, acl),
&ip);
} else {
error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, &ip);
bool init_xattrs = false;
/*
* If this temporary file will be linkable, set up the file
* with an attr fork to receive a parent pointer.
*/
if (!(tmpfile->f_flags & O_EXCL) &&
xfs_has_parent(XFS_I(dir)->i_mount))
init_xattrs = true;
error = xfs_create_tmpfile(idmap, XFS_I(dir), mode,
init_xattrs, &ip);
}
if (unlikely(error))
goto out_free_acl;
......
......@@ -44,6 +44,7 @@
#include "xfs_dahash_test.h"
#include "xfs_rtbitmap.h"
#include "xfs_exchmaps_item.h"
#include "xfs_parent.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
......@@ -1745,6 +1746,10 @@ xfs_fs_fill_super(
xfs_warn(mp,
"EXPERIMENTAL exchange-range feature enabled. Use at your own risk!");
if (xfs_has_parent(mp))
xfs_warn(mp,
"EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
......@@ -2211,8 +2216,16 @@ xfs_init_caches(void)
if (!xfs_xmi_cache)
goto out_destroy_xmd_cache;
xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
sizeof(struct xfs_parent_args),
0, 0, NULL);
if (!xfs_parent_args_cache)
goto out_destroy_xmi_cache;
return 0;
out_destroy_xmi_cache:
kmem_cache_destroy(xfs_xmi_cache);
out_destroy_xmd_cache:
kmem_cache_destroy(xfs_xmd_cache);
out_destroy_iul_cache:
......@@ -2273,6 +2286,7 @@ xfs_destroy_caches(void)
* destroy caches.
*/
rcu_barrier();
kmem_cache_destroy(xfs_parent_args_cache);
kmem_cache_destroy(xfs_xmd_cache);
kmem_cache_destroy(xfs_xmi_cache);
kmem_cache_destroy(xfs_iunlink_cache);
......
......@@ -25,6 +25,8 @@
#include "xfs_error.h"
#include "xfs_health.h"
#include "xfs_symlink_remote.h"
#include "xfs_parent.h"
#include "xfs_defer.h"
int
xfs_readlink(
......@@ -100,6 +102,7 @@ xfs_symlink(
struct xfs_dquot *pdqp = NULL;
uint resblks;
xfs_ino_t ino;
struct xfs_parent_args *ppargs;
*ipp = NULL;
......@@ -130,18 +133,24 @@ xfs_symlink(
/*
* The symlink will fit into the inode data fork?
* There can't be any attributes so we get the whole variable part.
* If there are no parent pointers, then there wont't be any attributes.
* So we get the whole variable part, and do not need to reserve extra
* blocks. Otherwise, we need to reserve the blocks.
*/
if (pathlen <= XFS_LITINO(mp))
if (pathlen <= XFS_LITINO(mp) && !xfs_has_parent(mp))
fs_blocks = 0;
else
fs_blocks = xfs_symlink_blocks(mp, pathlen);
resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
resblks = xfs_symlink_space_res(mp, link_name->len, fs_blocks);
error = xfs_parent_start(mp, &ppargs);
if (error)
goto out_release_dquots;
error = xfs_trans_alloc_icreate(mp, &M_RES(mp)->tr_symlink, udqp, gdqp,
pdqp, resblks, &tp);
if (error)
goto out_release_dquots;
goto out_parent;
xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
......@@ -161,7 +170,7 @@ xfs_symlink(
if (!error)
error = xfs_init_new_inode(idmap, tp, dp, ino,
S_IFLNK | (mode & ~S_IFMT), 1, 0, prid,
false, &ip);
xfs_has_parent(mp), &ip);
if (error)
goto out_trans_cancel;
......@@ -195,6 +204,14 @@ xfs_symlink(
goto out_trans_cancel;
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
/* Add parent pointer for the new symlink. */
if (ppargs) {
error = xfs_parent_addname(tp, ppargs, dp, link_name, ip);
if (error)
goto out_trans_cancel;
}
xfs_dir_update_hook(dp, ip, 1, link_name);
/*
......@@ -216,6 +233,7 @@ xfs_symlink(
*ipp = ip;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_parent_finish(mp, ppargs);
return 0;
out_trans_cancel:
......@@ -231,6 +249,8 @@ xfs_symlink(
xfs_finish_inode_setup(ip);
xfs_irele(ip);
}
out_parent:
xfs_parent_finish(mp, ppargs);
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
......
......@@ -41,6 +41,7 @@
#include "xfs_bmap.h"
#include "xfs_exchmaps.h"
#include "xfs_exchrange.h"
#include "xfs_parent.h"
/*
* We include this last to have the helpers above available for the trace
......
......@@ -87,11 +87,15 @@ struct xfs_bmap_intent;
struct xfs_exchmaps_intent;
struct xfs_exchmaps_req;
struct xfs_exchrange;
struct xfs_getparents;
struct xfs_parent_irec;
struct xfs_attrlist_cursor_kern;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
{ XFS_ATTR_SECURE, "SECURE" }, \
{ XFS_ATTR_INCOMPLETE, "INCOMPLETE" }
{ XFS_ATTR_INCOMPLETE, "INCOMPLETE" }, \
{ XFS_ATTR_PARENT, "PARENT" }
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
......@@ -5095,6 +5099,95 @@ TRACE_EVENT(xfs_exchmaps_delta_nextents,
__entry->d_nexts1, __entry->d_nexts2)
);
DECLARE_EVENT_CLASS(xfs_getparents_rec_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi,
const struct xfs_attr_list_context *context,
const struct xfs_getparents_rec *pptr),
TP_ARGS(ip, ppi, context, pptr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, firstu)
__field(unsigned short, reclen)
__field(unsigned int, bufsize)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, parent_gen)
__string(name, pptr->gpr_name)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->firstu = context->firstu;
__entry->reclen = pptr->gpr_reclen;
__entry->bufsize = ppi->gp_bufsize;
__entry->parent_ino = pptr->gpr_parent.ha_fid.fid_ino;
__entry->parent_gen = pptr->gpr_parent.ha_fid.fid_gen;
__assign_str(name, pptr->gpr_name);
),
TP_printk("dev %d:%d ino 0x%llx firstu %u reclen %u bufsize %u parent_ino 0x%llx parent_gen 0x%x name '%s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->firstu,
__entry->reclen,
__entry->bufsize,
__entry->parent_ino,
__entry->parent_gen,
__get_str(name))
)
#define DEFINE_XFS_GETPARENTS_REC_EVENT(name) \
DEFINE_EVENT(xfs_getparents_rec_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \
const struct xfs_attr_list_context *context, \
const struct xfs_getparents_rec *pptr), \
TP_ARGS(ip, ppi, context, pptr))
DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_put_listent);
DEFINE_XFS_GETPARENTS_REC_EVENT(xfs_getparents_expand_lastrec);
DECLARE_EVENT_CLASS(xfs_getparents_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi,
const struct xfs_attrlist_cursor_kern *cur),
TP_ARGS(ip, ppi, cur),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned short, iflags)
__field(unsigned short, oflags)
__field(unsigned int, bufsize)
__field(unsigned int, hashval)
__field(unsigned int, blkno)
__field(unsigned int, offset)
__field(int, initted)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->iflags = ppi->gp_iflags;
__entry->oflags = ppi->gp_oflags;
__entry->bufsize = ppi->gp_bufsize;
__entry->hashval = cur->hashval;
__entry->blkno = cur->blkno;
__entry->offset = cur->offset;
__entry->initted = cur->initted;
),
TP_printk("dev %d:%d ino 0x%llx iflags 0x%x oflags 0x%x bufsize %u cur_init? %d hashval 0x%x blkno %u offset %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->iflags,
__entry->oflags,
__entry->bufsize,
__entry->initted,
__entry->hashval,
__entry->blkno,
__entry->offset)
)
#define DEFINE_XFS_GETPARENTS_EVENT(name) \
DEFINE_EVENT(xfs_getparents_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_getparents *ppi, \
const struct xfs_attrlist_cursor_kern *cur), \
TP_ARGS(ip, ppi, cur))
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin);
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end);
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
......
......@@ -222,6 +222,7 @@ xfs_xattr_put_listent(
int flags,
unsigned char *name,
int namelen,
void *value,
int valuelen)
{
char *prefix;
......@@ -229,6 +230,10 @@ xfs_xattr_put_listent(
ASSERT(context->count >= 0);
/* Don't expose private xattr namespaces. */
if (flags & XFS_ATTR_PRIVATE_NSP_MASK)
return;
if (flags & XFS_ATTR_ROOT) {
#ifdef CONFIG_XFS_POSIX_ACL
if (namelen == SGI_ACL_FILE_SIZE &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment