Commit 1da824b0 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-pptrs-6.10_2024-04-23' of...

Merge tag 'repair-pptrs-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeC

xfs: online repair for parent pointers

This series implements online repair for directory parent pointer
metadata.  The checking half is fairly straightforward -- for each
outgoing directory link (forward or backwards), grab the inode at the
other end, and confirm that there's a corresponding link.  If we can't
grab an inode or lock it, we'll save that link for a slower loop that
cycles all the locks, confirms the continued existence of the link, and
rechecks the link if it's actually still there.

Repairs are a bit more involved -- for directories, we walk the entire
filesystem to rebuild the dirents from parent pointer information.
Parent pointer repairs do the same walk but rebuild the pptrs from the
dirent information, but with the added twist that it duplicates all the
xattrs so that it can use the atomic extent swapping code to commit the
repairs atomically.

This introduces an added twist to the xattr repair code -- we use dirent
hooks to detect a colliding update to the pptr data while we're not
holding the ILOCKs; if one is detected, we restart the xattr salvaging
process but this time hold all the ILOCKs until the end of the scan.

For offline repair, the phase6 directory connectivity scan generates an
index of all the expected parent pointers in the filesystem.  Then it
walks each file and compares the parent pointers attached to that file
against the index generated, and resyncs the results as necessary.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-pptrs-6.10_2024-04-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: inode repair should ensure there's an attr fork to store parent pointers
  xfs: repair link count of nondirectories after rebuilding parent pointers
  xfs: adapt the orphanage code to handle parent pointers
  xfs: actually rebuild the parent pointer xattrs
  xfs: add a per-leaf block callback to xchk_xattr_walk
  xfs: split xfs_bmap_add_attrfork into two pieces
  xfs: remove pointless unlocked assertion
  xfs: implement live updates for parent pointer repairs
  xfs: repair directory parent pointers by scanning for dirents
  xfs: replay unlocked parent pointer updates that accrue during xattr repair
  xfs: implement live updates for directory repairs
  xfs: repair directories by scanning directory parent pointers
  xfs: add raw parent pointer apis to support repair
  xfs: salvage parent pointers when rebuilding xattr structures
  xfs: make the reserved block permission flag explicit in xfs_attr_set
  xfs: remove some boilerplate from xfs_attr_set
parents 0d2dd382 327ed702
......@@ -948,40 +948,67 @@ xfs_attr_lookup(
return error;
}
int
xfs_attr_add_fork(
struct xfs_inode *ip, /* incore inode pointer */
int size, /* space new attribute needs */
int rsvd) /* xact may use reserved blks */
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp; /* transaction pointer */
unsigned int blks; /* space reservation */
int error; /* error return value */
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
blks = XFS_ADDAFORK_SPACE_RES(mp);
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
rsvd, &tp);
if (error)
return error;
if (xfs_inode_has_attr_fork(ip))
goto trans_cancel;
error = xfs_bmap_add_attrfork(tp, ip, size, rsvd);
if (error)
goto trans_cancel;
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
trans_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
/*
* Make a change to the xattr structure.
*
* The caller must have initialized @args, attached dquots, and must not hold
* any ILOCKs. Reserved data blocks may be used if @rsvd is set.
*
* Returns -EEXIST for XFS_ATTRUPDATE_CREATE if the name already exists.
* Returns -ENOATTR for XFS_ATTRUPDATE_REMOVE if the name does not exist.
* Returns 0 on success, or a negative errno if something else went wrong.
*/
int
xfs_attr_set(
struct xfs_da_args *args,
enum xfs_attr_update op)
enum xfs_attr_update op,
bool rsvd)
{
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
struct xfs_trans_res tres;
bool rsvd = (args->attr_filter & XFS_ATTR_ROOT);
int error, local;
int rmt_blks = 0;
unsigned int total;
if (xfs_is_shutdown(dp->i_mount))
return -EIO;
error = xfs_qm_dqattach(dp);
if (error)
return error;
if (!args->owner)
args->owner = args->dp->i_ino;
args->geo = mp->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
xfs_attr_sethash(args);
/*
* We have no control over the attribute names that userspace passes us
* to remove, so we have to allow the name lookup prior to attribute
* removal to fail as well. Preserve the logged flag, since we need
* to pass that through to the logging code.
*/
args->op_flags = XFS_DA_OP_OKNOENT |
(args->op_flags & XFS_DA_OP_LOGGED);
ASSERT(!args->trans);
switch (op) {
case XFS_ATTRUPDATE_UPSERT:
......@@ -999,7 +1026,7 @@ xfs_attr_set(
xfs_attr_sf_entsize_byname(args->namelen,
args->valuelen);
error = xfs_bmap_add_attrfork(dp, sf_size, rsvd);
error = xfs_attr_add_fork(dp, sf_size, rsvd);
if (error)
return error;
}
......@@ -1076,6 +1103,7 @@ xfs_attr_set(
error = xfs_trans_commit(args->trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
args->trans = NULL;
return error;
out_trans_cancel:
......
......@@ -558,7 +558,7 @@ enum xfs_attr_update {
XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */
};
int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op);
int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op, bool rsvd);
int xfs_attr_set_iter(struct xfs_attr_intent *attr);
int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
bool xfs_attr_check_namespace(unsigned int attr_flags);
......@@ -648,5 +648,6 @@ int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void);
int xfs_attr_sf_totsize(struct xfs_inode *dp);
int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd);
#endif /* __XFS_ATTR_H__ */
......@@ -1025,40 +1025,29 @@ xfs_bmap_set_attrforkoff(
}
/*
* Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked.
* Convert inode from non-attributed to attributed. Caller must hold the
* ILOCK_EXCL and the file cannot have an attr fork.
*/
int /* error code */
xfs_bmap_add_attrfork(
xfs_inode_t *ip, /* incore inode pointer */
struct xfs_trans *tp,
struct xfs_inode *ip, /* incore inode pointer */
int size, /* space new attribute needs */
int rsvd) /* xact may use reserved blks */
{
xfs_mount_t *mp; /* mount structure */
xfs_trans_t *tp; /* transaction pointer */
int blks; /* space reservation */
struct xfs_mount *mp = tp->t_mountp;
int version = 1; /* superblock attr version */
int logflags; /* logging flags */
int error; /* error return value */
ASSERT(xfs_inode_has_attr_fork(ip) == 0);
mp = ip->i_mount;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
blks = XFS_ADDAFORK_SPACE_RES(mp);
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0,
rsvd, &tp);
if (error)
return error;
if (xfs_inode_has_attr_fork(ip))
goto trans_cancel;
ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_bmap_set_attrforkoff(ip, size, &version);
if (error)
goto trans_cancel;
return error;
xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0);
logflags = 0;
......@@ -1079,7 +1068,7 @@ xfs_bmap_add_attrfork(
if (logflags)
xfs_trans_log_inode(tp, ip, logflags);
if (error)
goto trans_cancel;
return error;
if (!xfs_has_attr(mp) ||
(!xfs_has_attr2(mp) && version == 2)) {
bool log_sb = false;
......@@ -1098,14 +1087,7 @@ xfs_bmap_add_attrfork(
xfs_log_sb(tp);
}
error = xfs_trans_commit(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
trans_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
return 0;
}
/*
......
......@@ -176,7 +176,8 @@ int xfs_bmap_longest_free_extent(struct xfs_perag *pag,
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_add_attrfork(struct xfs_trans *tp, struct xfs_inode *ip,
int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork);
int xfs_bmap_local_to_extents(struct xfs_trans *tp, struct xfs_inode *ip,
......
......@@ -434,7 +434,7 @@ int
xfs_dir_removename(
struct xfs_trans *tp,
struct xfs_inode *dp,
struct xfs_name *name,
const struct xfs_name *name,
xfs_ino_t ino,
xfs_extlen_t total) /* bmap's total block count */
{
......
......@@ -58,7 +58,7 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t *inum,
struct xfs_name *ci_name);
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
const struct xfs_name *name, xfs_ino_t ino,
xfs_extlen_t tot);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t inum,
......
......@@ -313,3 +313,67 @@ xfs_parent_lookup(
xfs_parent_da_args_init(scratch, tp, pptr, ip, ip->i_ino, parent_name);
return xfs_attr_get_ilocked(scratch);
}
/* Sanity-check a parent pointer before we try to perform repairs. */
static inline bool
xfs_parent_sanity_check(
struct xfs_mount *mp,
const struct xfs_name *parent_name,
const struct xfs_parent_rec *pptr)
{
if (!xfs_parent_namecheck(XFS_ATTR_PARENT, parent_name->name,
parent_name->len))
return false;
if (!xfs_parent_valuecheck(mp, pptr, sizeof(*pptr)))
return false;
return true;
}
/*
* Attach the parent pointer (@parent_name -> @pptr) to @ip immediately.
* Caller must not have a transaction or hold the ILOCK. This is for
* specialized repair functions only. The scratchpad need not be initialized.
*/
int
xfs_parent_set(
struct xfs_inode *ip,
xfs_ino_t owner,
const struct xfs_name *parent_name,
struct xfs_parent_rec *pptr,
struct xfs_da_args *scratch)
{
if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
ASSERT(0);
return -EFSCORRUPTED;
}
memset(scratch, 0, sizeof(struct xfs_da_args));
xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
return xfs_attr_set(scratch, XFS_ATTRUPDATE_CREATE, false);
}
/*
* Remove the parent pointer (@parent_name -> @pptr) from @ip immediately.
* Caller must not have a transaction or hold the ILOCK. This is for
* specialized repair functions only. The scratchpad need not be initialized.
*/
int
xfs_parent_unset(
struct xfs_inode *ip,
xfs_ino_t owner,
const struct xfs_name *parent_name,
struct xfs_parent_rec *pptr,
struct xfs_da_args *scratch)
{
if (!xfs_parent_sanity_check(ip->i_mount, parent_name, pptr)) {
ASSERT(0);
return -EFSCORRUPTED;
}
memset(scratch, 0, sizeof(struct xfs_da_args));
xfs_parent_da_args_init(scratch, NULL, pptr, ip, owner, parent_name);
return xfs_attr_set(scratch, XFS_ATTRUPDATE_REMOVE, false);
}
......@@ -100,5 +100,11 @@ int xfs_parent_from_attr(struct xfs_mount *mp, unsigned int attr_flags,
int xfs_parent_lookup(struct xfs_trans *tp, struct xfs_inode *ip,
const struct xfs_name *name, struct xfs_parent_rec *pptr,
struct xfs_da_args *scratch);
int xfs_parent_set(struct xfs_inode *ip, xfs_ino_t owner,
const struct xfs_name *name, struct xfs_parent_rec *pptr,
struct xfs_da_args *scratch);
int xfs_parent_unset(struct xfs_inode *ip, xfs_ino_t owner,
const struct xfs_name *name, struct xfs_parent_rec *pptr,
struct xfs_da_args *scratch);
#endif /* __XFS_PARENT_H__ */
......@@ -675,7 +675,7 @@ xchk_xattr(
* iteration, which doesn't really follow the usual buffer
* locking order.
*/
error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL);
error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL, NULL);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
return error;
......
......@@ -28,6 +28,7 @@
#include "xfs_exchmaps.h"
#include "xfs_exchrange.h"
#include "xfs_acl.h"
#include "xfs_parent.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
......@@ -95,6 +96,52 @@ struct xrep_xattr {
/* Number of attributes that we are salvaging. */
unsigned long long attrs_found;
/* Can we flush stashed attrs to the tempfile? */
bool can_flush;
/* Did the live update fail, and hence the repair is now out of date? */
bool live_update_aborted;
/* Lock protecting parent pointer updates */
struct mutex lock;
/* Fixed-size array of xrep_xattr_pptr structures. */
struct xfarray *pptr_recs;
/* Blobs containing parent pointer names. */
struct xfblob *pptr_names;
/* Hook to capture parent pointer updates. */
struct xfs_dir_hook dhook;
/* Scratch buffer for capturing parent pointers. */
struct xfs_da_args pptr_args;
/* Name buffer */
struct xfs_name xname;
char namebuf[MAXNAMELEN];
};
/* Create a parent pointer in the tempfile. */
#define XREP_XATTR_PPTR_ADD (1)
/* Remove a parent pointer from the tempfile. */
#define XREP_XATTR_PPTR_REMOVE (2)
/* A stashed parent pointer update. */
struct xrep_xattr_pptr {
/* Cookie for retrieval of the pptr name. */
xfblob_cookie name_cookie;
/* Parent pointer record. */
struct xfs_parent_rec pptr_rec;
/* Length of the pptr name. */
uint8_t namelen;
/* XREP_XATTR_PPTR_{ADD,REMOVE} */
uint8_t action;
};
/* Set up to recreate the extended attributes. */
......@@ -102,6 +149,9 @@ int
xrep_setup_xattr(
struct xfs_scrub *sc)
{
if (xfs_has_parent(sc->mp))
xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
return xrep_tempfile_create(sc, S_IFREG);
}
......@@ -127,6 +177,9 @@ xrep_xattr_want_salvage(
return false;
if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
return false;
if (attr_flags & XFS_ATTR_PARENT)
return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
return true;
}
......@@ -154,14 +207,21 @@ xrep_xattr_salvage_key(
* Truncate the name to the first character that would trip namecheck.
* If we no longer have a name after that, ignore this attribute.
*/
while (i < namelen && name[i] != 0)
i++;
if (i == 0)
return 0;
key.namelen = i;
if (flags & XFS_ATTR_PARENT) {
key.namelen = namelen;
trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
key.namelen, value, valuelen);
} else {
while (i < namelen && name[i] != 0)
i++;
if (i == 0)
return 0;
key.namelen = i;
trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen,
valuelen);
trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
key.namelen, valuelen);
}
error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
key.namelen);
......@@ -558,6 +618,9 @@ xrep_xattr_insert_rec(
.namelen = key->namelen,
.valuelen = key->valuelen,
.owner = rx->sc->ip->i_ino,
.geo = rx->sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.op_flags = XFS_DA_OP_OKNOENT,
};
struct xchk_xattr_buf *ab = rx->sc->buf;
int error;
......@@ -595,14 +658,22 @@ xrep_xattr_insert_rec(
ab->name[key->namelen] = 0;
trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name,
key->namelen, key->valuelen);
if (key->flags & XFS_ATTR_PARENT) {
trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
ab->name, key->namelen, ab->value,
key->valuelen);
args.op_flags |= XFS_DA_OP_LOGGED;
} else {
trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
ab->name, key->namelen, key->valuelen);
}
/*
* xfs_attr_set creates and commits its own transaction. If the attr
* already exists, we'll just drop it during the rebuild.
*/
error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE);
xfs_attr_sethash(&args);
error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
if (error == -EEXIST)
error = 0;
......@@ -689,11 +760,122 @@ xrep_xattr_want_flush_stashed(
{
unsigned long long bytes;
if (!rx->can_flush)
return false;
bytes = xfarray_bytes(rx->xattr_records) +
xfblob_bytes(rx->xattr_blobs);
return bytes > XREP_XATTR_MAX_STASH_BYTES;
}
/*
* Did we observe rename changing parent pointer xattrs while we were flushing
* salvaged attrs?
*/
static inline bool
xrep_xattr_saw_pptr_conflict(
struct xrep_xattr *rx)
{
bool ret;
ASSERT(rx->can_flush);
if (!xfs_has_parent(rx->sc->mp))
return false;
xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
mutex_lock(&rx->lock);
ret = xfarray_bytes(rx->pptr_recs) > 0;
mutex_unlock(&rx->lock);
return ret;
}
/*
* Reset the entire repair state back to initial conditions, now that we've
* detected a parent pointer update to the attr structure while we were
* flushing salvaged attrs. See the locking notes in dir_repair.c for more
* information on why this is all necessary.
*/
STATIC int
xrep_xattr_full_reset(
struct xrep_xattr *rx)
{
struct xfs_scrub *sc = rx->sc;
struct xfs_attr_sf_hdr *hdr;
struct xfs_ifork *ifp = &sc->tempip->i_af;
int error;
trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
/* The temporary file's data fork had better not be in btree format. */
if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
ASSERT(0);
return -EIO;
}
/*
* We begin in transaction context with sc->ip ILOCKed but not joined
* to the transaction. To reset to the initial state, we must hold
* sc->ip's ILOCK to prevent rename from updating parent pointer
* information and the tempfile's ILOCK to clear its contents.
*/
xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
xrep_tempfile_ilock_both(sc);
xfs_trans_ijoin(sc->tp, sc->ip, 0);
xfs_trans_ijoin(sc->tp, sc->tempip, 0);
/*
* Free all the blocks of the attr fork of the temp file, and reset
* it back to local format.
*/
if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
if (error)
return error;
ASSERT(ifp->if_bytes == 0);
ifp->if_format = XFS_DINODE_FMT_LOCAL;
xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
}
/* Reinitialize the attr fork to an empty shortform structure. */
hdr = ifp->if_data;
memset(hdr, 0, sizeof(*hdr));
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
/*
* Roll this transaction to commit our reset ondisk. The tempfile
* should no longer be joined to the transaction, so we drop its ILOCK.
* This should leave us in transaction context with sc->ip ILOCKed but
* not joined to the transaction.
*/
error = xrep_roll_trans(sc);
if (error)
return error;
xrep_tempfile_iunlock(sc);
/*
* Erase any accumulated parent pointer updates now that we've erased
* the tempfile's attr fork. We're resetting the entire repair state
* back to where we were initially, except now we won't flush salvaged
* xattrs until the very end.
*/
mutex_lock(&rx->lock);
xfarray_truncate(rx->pptr_recs);
xfblob_truncate(rx->pptr_names);
mutex_unlock(&rx->lock);
rx->can_flush = false;
rx->attrs_found = 0;
ASSERT(xfarray_bytes(rx->xattr_records) == 0);
ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
return 0;
}
/* Extract as many attribute keys and values as we can. */
STATIC int
xrep_xattr_recover(
......@@ -708,6 +890,7 @@ xrep_xattr_recover(
int nmap;
int error;
restart:
/*
* Iterate each xattr leaf block in the attr fork to scan them for any
* attributes that we might salvage.
......@@ -746,6 +929,14 @@ xrep_xattr_recover(
error = xrep_xattr_flush_stashed(rx);
if (error)
return error;
if (xrep_xattr_saw_pptr_conflict(rx)) {
error = xrep_xattr_full_reset(rx);
if (error)
return error;
goto restart;
}
}
}
}
......@@ -839,7 +1030,7 @@ xrep_xattr_reset_fork(
* fork. The caller must ILOCK the tempfile and join it to the transaction.
* This function returns with the inode joined to a clean scrub transaction.
*/
STATIC int
int
xrep_xattr_reset_tempfile_fork(
struct xfs_scrub *sc)
{
......@@ -905,6 +1096,180 @@ xrep_xattr_salvage_attributes(
return xrep_xattr_flush_stashed(rx);
}
/*
* Add this stashed incore parent pointer to the temporary file. The caller
* must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
* transaction context.
*/
STATIC int
xrep_xattr_replay_pptr_update(
struct xrep_xattr *rx,
const struct xfs_name *xname,
struct xrep_xattr_pptr *pptr)
{
struct xfs_scrub *sc = rx->sc;
int error;
switch (pptr->action) {
case XREP_XATTR_PPTR_ADD:
/* Create parent pointer. */
trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
&pptr->pptr_rec);
error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rx->pptr_args);
ASSERT(error != -EEXIST);
return error;
case XREP_XATTR_PPTR_REMOVE:
/* Remove parent pointer. */
trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
&pptr->pptr_rec);
error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rx->pptr_args);
ASSERT(error != -ENOATTR);
return error;
}
ASSERT(0);
return -EIO;
}
/*
* Flush stashed parent pointer updates that have been recorded by the scanner.
* This is done to reduce the memory requirements of the xattr rebuild, since
* files can have a lot of hardlinks and the fs can be busy.
*
* Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
* IOLOCK.
*/
STATIC int
xrep_xattr_replay_pptr_updates(
struct xrep_xattr *rx)
{
xfarray_idx_t array_cur;
int error;
mutex_lock(&rx->lock);
foreach_xfarray_idx(rx->pptr_recs, array_cur) {
struct xrep_xattr_pptr pptr;
error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
if (error)
goto out_unlock;
error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
&rx->xname, pptr.namelen);
if (error)
goto out_unlock;
mutex_unlock(&rx->lock);
error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
if (error)
return error;
mutex_lock(&rx->lock);
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rx->pptr_recs);
xfblob_truncate(rx->pptr_names);
mutex_unlock(&rx->lock);
return 0;
out_unlock:
mutex_unlock(&rx->lock);
return error;
}
/*
* Remember that we want to create a parent pointer in the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_xattr_stash_parentadd(
struct xrep_xattr *rx,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_xattr_pptr pptr = {
.action = XREP_XATTR_PPTR_ADD,
.namelen = name->len,
};
int error;
trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rx->pptr_recs, &pptr);
}
/*
* Remember that we want to remove a parent pointer from the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_xattr_stash_parentremove(
struct xrep_xattr *rx,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_xattr_pptr pptr = {
.action = XREP_XATTR_PPTR_REMOVE,
.namelen = name->len,
};
int error;
trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rx->pptr_recs, &pptr);
}
/*
* Capture dirent updates being made by other threads. We will have to replay
* the parent pointer updates before exchanging attr forks.
*/
STATIC int
xrep_xattr_live_dirent_update(
struct notifier_block *nb,
unsigned long action,
void *data)
{
struct xfs_dir_update_params *p = data;
struct xrep_xattr *rx;
struct xfs_scrub *sc;
int error;
rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
sc = rx->sc;
/*
* This thread updated a dirent that points to the file that we're
* repairing, so stash the update for replay against the temporary
* file.
*/
if (p->ip->i_ino != sc->ip->i_ino)
return NOTIFY_DONE;
mutex_lock(&rx->lock);
if (p->delta > 0)
error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
else
error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
if (error)
rx->live_update_aborted = true;
mutex_unlock(&rx->lock);
return NOTIFY_DONE;
}
/*
* Prepare both inodes' attribute forks for an exchange. Promote the tempfile
* from short format to leaf format, and if the file being repaired has a short
......@@ -971,7 +1336,7 @@ xrep_xattr_swap_prep(
}
/* Exchange the temporary file's attribute fork with the one being repaired. */
STATIC int
int
xrep_xattr_swap(
struct xfs_scrub *sc,
struct xrep_tempexch *tx)
......@@ -1008,6 +1373,45 @@ xrep_xattr_swap(
return xrep_tempexch_contents(sc, tx);
}
/*
* Finish replaying stashed parent pointer updates, allocate a transaction for
* exchanging extent mappings, and take the ILOCKs of both files before we
* commit the new extended attribute structure.
*/
STATIC int
xrep_xattr_finalize_tempfile(
struct xrep_xattr *rx)
{
struct xfs_scrub *sc = rx->sc;
int error;
if (!xfs_has_parent(sc->mp))
return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
/*
* Repair relies on the ILOCK to quiesce all possible xattr updates.
* Replay all queued parent pointer updates into the tempfile before
* exchanging the contents, even if that means dropping the ILOCKs and
* the transaction.
*/
do {
error = xrep_xattr_replay_pptr_updates(rx);
if (error)
return error;
error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
if (error)
return error;
if (xfarray_length(rx->pptr_recs) == 0)
break;
xchk_trans_cancel(sc);
xrep_tempfile_iunlock_both(sc);
} while (!xchk_should_terminate(sc, &error));
return error;
}
/*
* Exchange the new extended attribute data (which we created in the tempfile)
* with the file being repaired.
......@@ -1060,8 +1464,12 @@ xrep_xattr_rebuild_tree(
if (error)
return error;
/* Allocate exchange transaction and lock both inodes. */
error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx);
/*
* Allocate transaction, lock inodes, and make sure that we've replayed
* all the stashed parent pointer updates to the temp file. After this
* point, we're ready to exchange attr fork mappings.
*/
error = xrep_xattr_finalize_tempfile(rx);
if (error)
return error;
......@@ -1102,8 +1510,15 @@ STATIC void
xrep_xattr_teardown(
struct xrep_xattr *rx)
{
if (xfs_has_parent(rx->sc->mp))
xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
if (rx->pptr_names)
xfblob_destroy(rx->pptr_names);
if (rx->pptr_recs)
xfarray_destroy(rx->pptr_recs);
xfblob_destroy(rx->xattr_blobs);
xfarray_destroy(rx->xattr_records);
mutex_destroy(&rx->lock);
kfree(rx);
}
......@@ -1122,6 +1537,10 @@ xrep_xattr_setup_scan(
if (!rx)
return -ENOMEM;
rx->sc = sc;
rx->can_flush = true;
rx->xname.name = rx->namebuf;
mutex_init(&rx->lock);
/*
* Allocate enough memory to handle loading local attr values from the
......@@ -1149,11 +1568,43 @@ xrep_xattr_setup_scan(
if (error)
goto out_keys;
if (xfs_has_parent(sc->mp)) {
ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
descr = xchk_xfile_ino_descr(sc,
"xattr retained parent pointer entries");
error = xfarray_create(descr, 0,
sizeof(struct xrep_xattr_pptr),
&rx->pptr_recs);
kfree(descr);
if (error)
goto out_values;
descr = xchk_xfile_ino_descr(sc,
"xattr retained parent pointer names");
error = xfblob_create(descr, &rx->pptr_names);
kfree(descr);
if (error)
goto out_pprecs;
xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
error = xfs_dir_hook_add(sc->mp, &rx->dhook);
if (error)
goto out_ppnames;
}
*rxp = rx;
return 0;
out_ppnames:
xfblob_destroy(rx->pptr_names);
out_pprecs:
xfarray_destroy(rx->pptr_recs);
out_values:
xfblob_destroy(rx->xattr_blobs);
out_keys:
xfarray_destroy(rx->xattr_records);
out_rx:
mutex_destroy(&rx->lock);
kfree(rx);
return error;
}
......@@ -1190,6 +1641,11 @@ xrep_xattr(
if (error)
goto out_scan;
if (rx->live_update_aborted) {
error = -EIO;
goto out_scan;
}
/* Last chance to abort before we start committing fixes. */
if (xchk_should_terminate(sc, &error))
goto out_scan;
......
......@@ -6,6 +6,10 @@
#ifndef __XFS_SCRUB_ATTR_REPAIR_H__
#define __XFS_SCRUB_ATTR_REPAIR_H__
struct xrep_tempexch;
int xrep_xattr_swap(struct xfs_scrub *sc, struct xrep_tempexch *tx);
int xrep_xattr_reset_fork(struct xfs_scrub *sc);
int xrep_xattr_reset_tempfile_fork(struct xfs_scrub *sc);
#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */
......@@ -28,6 +28,7 @@
#include "xfs_exchmaps.h"
#include "xfs_exchrange.h"
#include "xfs_ag.h"
#include "xfs_parent.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
......@@ -43,6 +44,7 @@
#include "scrub/reap.h"
#include "scrub/findparent.h"
#include "scrub/orphanage.h"
#include "scrub/listxattr.h"
/*
* Directory Repair
......@@ -57,6 +59,15 @@
* being repaired and the temporary directory, and will later become important
* for parent pointer scanning.
*
* If parent pointers are enabled on this filesystem, we instead reconstruct
* the directory by visiting each parent pointer of each file in the filesystem
* and translating the relevant parent pointer records into dirents. In this
* case, it is advantageous to stash all directory entries created from parent
* pointers for a single child file before replaying them into the temporary
* directory. To save memory, the live filesystem scan reuses the findparent
* fields. Directory repair chooses either parent pointer scanning or
* directory entry salvaging, but not both.
*
* Directory entries added to the temporary directory do not elevate the link
* counts of the inodes found. When salvaging completes, the remaining stashed
* entries are replayed to the temporary directory. An atomic mapping exchange
......@@ -74,6 +85,12 @@
* other threads.
*/
/* Create a dirent in the tempdir. */
#define XREP_DIRENT_ADD (1)
/* Remove a dirent from the tempdir. */
#define XREP_DIRENT_REMOVE (2)
/* Directory entry to be restored in the new directory. */
struct xrep_dirent {
/* Cookie for retrieval of the dirent name. */
......@@ -87,6 +104,9 @@ struct xrep_dirent {
/* File type of the dirent. */
uint8_t ftype;
/* XREP_DIRENT_{ADD,REMOVE} */
uint8_t action;
};
/*
......@@ -112,7 +132,15 @@ struct xrep_dir {
/*
* Information used to scan the filesystem to find the inumber of the
* dotdot entry for this directory.
* dotdot entry for this directory. For directory salvaging when
* parent pointers are not enabled, we use the findparent_* functions
* on this object and access only the parent_ino field directly.
*
* When parent pointers are enabled, however, the pptr scanner uses the
* iscan, hooks, lock, and parent_ino fields of this object directly.
* @pscan.lock coordinates access to dir_entries, dir_names,
* parent_ino, subdirs, dirents, and args. This reduces the memory
* requirements of this structure.
*/
struct xrep_parent_scan_info pscan;
......@@ -320,6 +348,7 @@ xrep_dir_stash_createname(
xfs_ino_t ino)
{
struct xrep_dirent dirent = {
.action = XREP_DIRENT_ADD,
.ino = ino,
.namelen = name->len,
.ftype = name->type,
......@@ -335,6 +364,33 @@ xrep_dir_stash_createname(
return xfarray_append(rd->dir_entries, &dirent);
}
/*
* Remember that we want to remove a dirent from the tempdir. These stashed
* actions will be replayed later.
*/
STATIC int
xrep_dir_stash_removename(
struct xrep_dir *rd,
const struct xfs_name *name,
xfs_ino_t ino)
{
struct xrep_dirent dirent = {
.action = XREP_DIRENT_REMOVE,
.ino = ino,
.namelen = name->len,
.ftype = name->type,
};
int error;
trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name);
if (error)
return error;
return xfarray_append(rd->dir_entries, &dirent);
}
/* Allocate an in-core record to hold entries while we rebuild the dir data. */
STATIC int
xrep_dir_salvage_entry(
......@@ -686,6 +742,43 @@ xrep_dir_replay_createname(
return xfs_dir2_node_addname(&rd->args);
}
/* Replay a stashed removename onto the temporary directory. */
STATIC int
xrep_dir_replay_removename(
struct xrep_dir *rd,
const struct xfs_name *name,
xfs_extlen_t total)
{
struct xfs_inode *dp = rd->args.dp;
bool is_block, is_leaf;
int error;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
xrep_dir_init_args(rd, dp, name);
rd->args.op_flags = 0;
rd->args.total = total;
trace_xrep_dir_replay_removename(dp, name, 0);
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
return xfs_dir2_sf_removename(&rd->args);
error = xfs_dir2_isblock(&rd->args, &is_block);
if (error)
return error;
if (is_block)
return xfs_dir2_block_removename(&rd->args);
error = xfs_dir2_isleaf(&rd->args, &is_leaf);
if (error)
return error;
if (is_leaf)
return xfs_dir2_leaf_removename(&rd->args);
return xfs_dir2_node_removename(&rd->args);
}
/*
* Add this stashed incore directory entry to the temporary directory.
* The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
......@@ -713,26 +806,64 @@ xrep_dir_replay_update(
xrep_tempfile_ilock(rd->sc);
xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
/*
* Create a replacement dirent in the temporary directory. Note that
* _createname doesn't check for existing entries. There shouldn't be
* any in the temporary dir, but we'll verify this in debug mode.
*/
switch (dirent->action) {
case XREP_DIRENT_ADD:
/*
* Create a replacement dirent in the temporary directory.
* Note that _createname doesn't check for existing entries.
* There shouldn't be any in the temporary dir, but we'll
* verify this in debug mode.
*/
#ifdef DEBUG
error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino);
if (error != -ENOENT) {
ASSERT(error != -ENOENT);
goto out_cancel;
}
error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino);
if (error != -ENOENT) {
ASSERT(error != -ENOENT);
goto out_cancel;
}
#endif
error = xrep_dir_replay_createname(rd, xname, dirent->ino, resblks);
if (error)
goto out_cancel;
error = xrep_dir_replay_createname(rd, xname, dirent->ino,
resblks);
if (error)
goto out_cancel;
if (xname->type == XFS_DIR3_FT_DIR)
rd->subdirs++;
rd->dirents++;
break;
case XREP_DIRENT_REMOVE:
/*
* Remove a dirent from the temporary directory. Note that
* _removename doesn't check the inode target of the exist
* entry. There should be a perfect match in the temporary
* dir, but we'll verify this in debug mode.
*/
#ifdef DEBUG
error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino);
if (error) {
ASSERT(error != 0);
goto out_cancel;
}
if (ino != dirent->ino) {
ASSERT(ino == dirent->ino);
error = -EIO;
goto out_cancel;
}
#endif
error = xrep_dir_replay_removename(rd, xname, resblks);
if (error)
goto out_cancel;
if (xname->type == XFS_DIR3_FT_DIR)
rd->subdirs++;
rd->dirents++;
if (xname->type == XFS_DIR3_FT_DIR)
rd->subdirs--;
rd->dirents--;
break;
default:
ASSERT(0);
error = -EIO;
goto out_cancel;
}
/* Commit and unlock. */
error = xrep_trans_commit(rd->sc);
......@@ -763,28 +894,35 @@ xrep_dir_replay_updates(
int error;
/* Add all the salvaged dirents to the temporary directory. */
mutex_lock(&rd->pscan.lock);
foreach_xfarray_idx(rd->dir_entries, array_cur) {
struct xrep_dirent dirent;
error = xfarray_load(rd->dir_entries, array_cur, &dirent);
if (error)
return error;
goto out_unlock;
error = xfblob_loadname(rd->dir_names, dirent.name_cookie,
&rd->xname, dirent.namelen);
if (error)
return error;
goto out_unlock;
rd->xname.type = dirent.ftype;
mutex_unlock(&rd->pscan.lock);
error = xrep_dir_replay_update(rd, &rd->xname, &dirent);
if (error)
return error;
mutex_lock(&rd->pscan.lock);
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rd->dir_entries);
xfblob_truncate(rd->dir_names);
mutex_unlock(&rd->pscan.lock);
return 0;
out_unlock:
mutex_unlock(&rd->pscan.lock);
return error;
}
/*
......@@ -995,6 +1133,334 @@ xrep_dir_salvage_entries(
}
/*
* Examine a parent pointer of a file. If it leads us back to the directory
* that we're rebuilding, create an incore dirent from the parent pointer and
* stash it.
*/
STATIC int
xrep_dir_scan_pptr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xfs_name xname = {
.name = name,
.len = namelen,
.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode),
};
xfs_ino_t parent_ino;
uint32_t parent_gen;
struct xrep_dir *rd = priv;
int error;
if (!(attr_flags & XFS_ATTR_PARENT))
return 0;
/*
* Ignore parent pointers that point back to a different dir, list the
* wrong generation number, or are invalid.
*/
error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
valuelen, &parent_ino, &parent_gen);
if (error)
return error;
if (parent_ino != sc->ip->i_ino ||
parent_gen != VFS_I(sc->ip)->i_generation)
return 0;
mutex_lock(&rd->pscan.lock);
error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
mutex_unlock(&rd->pscan.lock);
return error;
}
/*
* If this child dirent points to the directory being repaired, remember that
* fact so that we can reset the dotdot entry if necessary.
*/
STATIC int
xrep_dir_scan_dirent(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos,
const struct xfs_name *name,
xfs_ino_t ino,
void *priv)
{
struct xrep_dir *rd = priv;
/* Dirent doesn't point to this directory. */
if (ino != rd->sc->ip->i_ino)
return 0;
/* Ignore garbage inum. */
if (!xfs_verify_dir_ino(rd->sc->mp, ino))
return 0;
/* No weird looking names. */
if (name->len >= MAXNAMELEN || name->len <= 0)
return 0;
/* Don't pick up dot or dotdot entries; we only want child dirents. */
if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
xfs_dir2_samename(name, &xfs_name_dot))
return 0;
trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
dp->i_ino);
xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
return 0;
}
/*
* Decide if we want to look for child dirents or parent pointers in this file.
* Skip the dir being repaired and any files being used to stage repairs.
*/
static inline bool
xrep_dir_want_scan(
struct xrep_dir *rd,
const struct xfs_inode *ip)
{
return ip != rd->sc->ip && !xrep_is_tempfile(ip);
}
/*
* Take ILOCK on a file that we want to scan.
*
* Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
* has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED.
*/
static inline unsigned int
xrep_dir_scan_ilock(
struct xrep_dir *rd,
struct xfs_inode *ip)
{
uint lock_mode = XFS_ILOCK_SHARED;
/* Need to take the shared ILOCK to advance the iscan cursor. */
if (!xrep_dir_want_scan(rd, ip))
goto lock;
if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
lock_mode = XFS_ILOCK_EXCL;
goto lock;
}
if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
lock_mode = XFS_ILOCK_EXCL;
lock:
xfs_ilock(ip, lock_mode);
return lock_mode;
}
/*
* Scan this file for relevant child dirents or parent pointers that point to
* the directory we're rebuilding.
*/
STATIC int
xrep_dir_scan_file(
struct xrep_dir *rd,
struct xfs_inode *ip)
{
unsigned int lock_mode;
int error = 0;
lock_mode = xrep_dir_scan_ilock(rd, ip);
if (!xrep_dir_want_scan(rd, ip))
goto scan_done;
/*
* If the extended attributes look as though they has been zapped by
* the inode record repair code, we cannot scan for parent pointers.
*/
if (xchk_pptr_looks_zapped(ip)) {
error = -EBUSY;
goto scan_done;
}
error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_pptr, NULL, rd);
if (error)
goto scan_done;
if (S_ISDIR(VFS_I(ip)->i_mode)) {
/*
* If the directory looks as though it has been zapped by the
* inode record repair code, we cannot scan for child dirents.
*/
if (xchk_dir_looks_zapped(ip)) {
error = -EBUSY;
goto scan_done;
}
error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
if (error)
goto scan_done;
}
scan_done:
xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
xfs_iunlock(ip, lock_mode);
return error;
}
/*
* Scan all files in the filesystem for parent pointers that we can turn into
* replacement dirents, and a dirent that we can use to set the dotdot pointer.
*/
STATIC int
xrep_dir_scan_dirtree(
struct xrep_dir *rd)
{
struct xfs_scrub *sc = rd->sc;
struct xfs_inode *ip;
int error;
/* Roots of directory trees are their own parents. */
if (sc->ip == sc->mp->m_rootip)
xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
/*
* Filesystem scans are time consuming. Drop the directory ILOCK and
* all other resources for the duration of the scan and hope for the
* best. The live update hooks will keep our scan information up to
* date even though we've dropped the locks.
*/
xchk_trans_cancel(sc);
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
error = xchk_trans_alloc_empty(sc);
if (error)
return error;
while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
bool flush;
error = xrep_dir_scan_file(rd, ip);
xchk_irele(sc, ip);
if (error)
break;
/* Flush stashed dirent updates to constrain memory usage. */
mutex_lock(&rd->pscan.lock);
flush = xrep_dir_want_flush_stashed(rd);
mutex_unlock(&rd->pscan.lock);
if (flush) {
xchk_trans_cancel(sc);
error = xrep_tempfile_iolock_polled(sc);
if (error)
break;
error = xrep_dir_replay_updates(rd);
xrep_tempfile_iounlock(sc);
if (error)
break;
error = xchk_trans_alloc_empty(sc);
if (error)
break;
}
if (xchk_should_terminate(sc, &error))
break;
}
xchk_iscan_iter_finish(&rd->pscan.iscan);
if (error) {
/*
* If we couldn't grab an inode that was busy with a state
* change, change the error code so that we exit to userspace
* as quickly as possible.
*/
if (error == -EBUSY)
return -ECANCELED;
return error;
}
/*
* Cancel the empty transaction so that we can (later) use the atomic
* file mapping exchange functions to lock files and commit the new
* directory.
*/
xchk_trans_cancel(rd->sc);
return 0;
}
/*
* Capture dirent updates being made by other threads which are relevant to the
* directory being repaired.
*/
STATIC int
xrep_dir_live_update(
struct notifier_block *nb,
unsigned long action,
void *data)
{
struct xfs_dir_update_params *p = data;
struct xrep_dir *rd;
struct xfs_scrub *sc;
int error = 0;
rd = container_of(nb, struct xrep_dir, pscan.dhook.dirent_hook.nb);
sc = rd->sc;
/*
* This thread updated a child dirent in the directory that we're
* rebuilding. Stash the update for replay against the temporary
* directory.
*/
if (p->dp->i_ino == sc->ip->i_ino &&
xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
mutex_lock(&rd->pscan.lock);
if (p->delta > 0)
error = xrep_dir_stash_createname(rd, p->name,
p->ip->i_ino);
else
error = xrep_dir_stash_removename(rd, p->name,
p->ip->i_ino);
mutex_unlock(&rd->pscan.lock);
if (error)
goto out_abort;
}
/*
* This thread updated another directory's child dirent that points to
* the directory that we're rebuilding, so remember the new dotdot
* target.
*/
if (p->ip->i_ino == sc->ip->i_ino &&
xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
if (p->delta > 0) {
trace_xrep_dir_stash_createname(sc->tempip,
&xfs_name_dotdot,
p->dp->i_ino);
xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
} else {
trace_xrep_dir_stash_removename(sc->tempip,
&xfs_name_dotdot,
rd->pscan.parent_ino);
xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
}
}
return NOTIFY_DONE;
out_abort:
xchk_iscan_abort(&rd->pscan.iscan);
return NOTIFY_DONE;
}
/*
* Free all the directory blocks and reset the data fork. The caller must
* join the inode to the transaction. This function returns with the inode
......@@ -1194,6 +1660,45 @@ xrep_dir_set_nlink(
return 0;
}
/*
* Finish replaying stashed dirent updates, allocate a transaction for
* exchanging data fork mappings, and take the ILOCKs of both directories
* before we commit the new directory structure.
*/
STATIC int
xrep_dir_finalize_tempdir(
struct xrep_dir *rd)
{
struct xfs_scrub *sc = rd->sc;
int error;
if (!xfs_has_parent(sc->mp))
return xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
/*
* Repair relies on the ILOCK to quiesce all possible dirent updates.
* Replay all queued dirent updates into the tempdir before exchanging
* the contents, even if that means dropping the ILOCKs and the
* transaction.
*/
do {
error = xrep_dir_replay_updates(rd);
if (error)
return error;
error = xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
if (error)
return error;
if (xfarray_length(rd->dir_entries) == 0)
break;
xchk_trans_cancel(sc);
xrep_tempfile_iunlock_both(sc);
} while (!xchk_should_terminate(sc, &error));
return error;
}
/* Exchange the temporary directory's data fork with the one being repaired. */
STATIC int
xrep_dir_swap(
......@@ -1296,11 +1801,18 @@ xrep_dir_rebuild_tree(
if (error)
return error;
/* Allocate transaction and ILOCK the scrub file and the temp file. */
error = xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
/*
* Allocate transaction, lock inodes, and make sure that we've replayed
* all the stashed dirent updates to the tempdir. After this point,
* we're ready to exchange data fork mappings.
*/
error = xrep_dir_finalize_tempdir(rd);
if (error)
return error;
if (xchk_iscan_aborted(&rd->pscan.iscan))
return -ECANCELED;
/*
* Exchange the tempdir's data fork with the file being repaired. This
* recreates the transaction and re-takes the ILOCK in the scrub
......@@ -1356,7 +1868,11 @@ xrep_dir_setup_scan(
if (error)
goto out_xfarray;
error = xrep_findparent_scan_start(sc, &rd->pscan);
if (xfs_has_parent(sc->mp))
error = __xrep_findparent_scan_start(sc, &rd->pscan,
xrep_dir_live_update);
else
error = xrep_findparent_scan_start(sc, &rd->pscan);
if (error)
goto out_xfblob;
......@@ -1482,7 +1998,10 @@ xrep_directory(
if (error)
return error;
error = xrep_dir_salvage_entries(rd);
if (xfs_has_parent(sc->mp))
error = xrep_dir_scan_dirtree(rd);
else
error = xrep_dir_salvage_entries(rd);
if (error)
goto out_teardown;
......
......@@ -24,6 +24,7 @@
#include "xfs_trans_space.h"
#include "xfs_health.h"
#include "xfs_exchmaps.h"
#include "xfs_parent.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
......@@ -33,6 +34,7 @@
#include "scrub/findparent.h"
#include "scrub/readdir.h"
#include "scrub/tempfile.h"
#include "scrub/listxattr.h"
/*
* Finding the Parent of a Directory
......@@ -238,9 +240,10 @@ xrep_findparent_live_update(
* will be called when there is a dotdot update for the inode being repaired.
*/
int
xrep_findparent_scan_start(
__xrep_findparent_scan_start(
struct xfs_scrub *sc,
struct xrep_parent_scan_info *pscan)
struct xrep_parent_scan_info *pscan,
notifier_fn_t custom_fn)
{
int error;
......@@ -262,7 +265,10 @@ xrep_findparent_scan_start(
* ILOCK, which means that any in-progress inode updates will finish
* before we can scan the inode.
*/
xfs_dir_hook_setup(&pscan->dhook, xrep_findparent_live_update);
if (custom_fn)
xfs_dir_hook_setup(&pscan->dhook, custom_fn);
else
xfs_dir_hook_setup(&pscan->dhook, xrep_findparent_live_update);
error = xfs_dir_hook_add(sc->mp, &pscan->dhook);
if (error)
goto out_iscan;
......
......@@ -24,8 +24,14 @@ struct xrep_parent_scan_info {
bool lookup_parent;
};
int xrep_findparent_scan_start(struct xfs_scrub *sc,
struct xrep_parent_scan_info *pscan);
int __xrep_findparent_scan_start(struct xfs_scrub *sc,
struct xrep_parent_scan_info *pscan,
notifier_fn_t custom_fn);
static inline int xrep_findparent_scan_start(struct xfs_scrub *sc,
struct xrep_parent_scan_info *pscan)
{
return __xrep_findparent_scan_start(sc, pscan, NULL);
}
int xrep_findparent_scan(struct xrep_parent_scan_info *pscan);
void xrep_findparent_scan_teardown(struct xrep_parent_scan_info *pscan);
......
......@@ -1736,6 +1736,44 @@ xrep_inode_extsize(
}
}
/* Ensure this file has an attr fork if it needs to hold a parent pointer. */
STATIC int
xrep_inode_pptr(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
struct inode *inode = VFS_I(ip);
if (!xfs_has_parent(mp))
return 0;
/*
* Unlinked inodes that cannot be added to the directory tree will not
* have a parent pointer.
*/
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
return 0;
/* The root directory doesn't have a parent pointer. */
if (ip == mp->m_rootip)
return 0;
/*
* Metadata inodes are rooted in the superblock and do not have any
* parents.
*/
if (xfs_is_metadata_inode(ip))
return 0;
/* Inode already has an attr fork; no further work possible here. */
if (xfs_inode_has_attr_fork(ip))
return 0;
return xfs_bmap_add_attrfork(sc->tp, ip,
sizeof(struct xfs_attr_sf_hdr), true);
}
/* Fix any irregularities in an inode that the verifiers don't catch. */
STATIC int
xrep_inode_problems(
......@@ -1744,6 +1782,9 @@ xrep_inode_problems(
int error;
error = xrep_inode_blockcounts(sc);
if (error)
return error;
error = xrep_inode_pptr(sc);
if (error)
return error;
xrep_inode_timestamps(sc->ip);
......
......@@ -221,6 +221,7 @@ xchk_xattr_walk_node(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
xchk_xattrleaf_fn leaf_fn,
void *priv)
{
struct xfs_attr3_icleaf_hdr leafhdr;
......@@ -252,6 +253,12 @@ xchk_xattr_walk_node(
xfs_trans_brelse(sc->tp, leaf_bp);
if (leaf_fn) {
error = leaf_fn(sc, priv);
if (error)
goto out_bitmap;
}
/* Make sure we haven't seen this new leaf already. */
len = 1;
if (xdab_bitmap_test(&seen_dablks, leafhdr.forw, &len)) {
......@@ -288,6 +295,7 @@ xchk_xattr_walk(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
xchk_xattrleaf_fn leaf_fn,
void *priv)
{
int error;
......@@ -308,5 +316,5 @@ xchk_xattr_walk(
if (xfs_attr_is_leaf(ip))
return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv);
return xchk_xattr_walk_node(sc, ip, attr_fn, priv);
return xchk_xattr_walk_node(sc, ip, attr_fn, leaf_fn, priv);
}
......@@ -11,7 +11,9 @@ typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip,
unsigned int namelen, const void *value, unsigned int valuelen,
void *priv);
typedef int (*xchk_xattrleaf_fn)(struct xfs_scrub *sc, void *priv);
int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip,
xchk_xattr_fn attr_fn, void *priv);
xchk_xattr_fn attr_fn, xchk_xattrleaf_fn leaf_fn, void *priv);
#endif /* __XFS_SCRUB_LISTXATTR_H__ */
......@@ -434,7 +434,8 @@ xchk_nlinks_collect_dir(
goto out_unlock;
}
error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, xnc);
error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL,
xnc);
if (error == -ECANCELED) {
error = 0;
goto out_unlock;
......
......@@ -19,6 +19,8 @@
#include "xfs_icache.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_parent.h"
#include "xfs_attr_sf.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
......@@ -330,6 +332,8 @@ xrep_adoption_trans_alloc(
if (S_ISDIR(VFS_I(sc->ip)->i_mode))
child_blkres = xfs_rename_space_res(mp, 0, false,
xfs_name_dotdot.len, false);
if (xfs_has_parent(mp))
child_blkres += XFS_ADDAFORK_SPACE_RES(mp);
adopt->child_blkres = child_blkres;
/*
......@@ -503,6 +507,19 @@ xrep_adoption_zap_dcache(
dput(d_orphanage);
}
/*
* If we have to add an attr fork ahead of a parent pointer update, how much
* space should we ask for?
*/
static inline int
xrep_adoption_attr_sizeof(
const struct xrep_adoption *adopt)
{
return sizeof(struct xfs_attr_sf_hdr) +
xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec),
adopt->xname->len);
}
/*
* Move the current file to the orphanage under the computed name.
*
......@@ -524,6 +541,19 @@ xrep_adoption_move(
if (error)
return error;
/*
* If this filesystem has parent pointers, ensure that the file being
* moved to the orphanage has an attribute fork. This is required
* because the parent pointer code does not itself add attr forks.
*/
if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) {
int sf_size = xrep_adoption_attr_sizeof(adopt);
error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true);
if (error)
return error;
}
/* Create the new name in the orphanage. */
error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname,
sc->ip->i_ino, adopt->orphanage_blkres);
......@@ -548,6 +578,14 @@ xrep_adoption_move(
return error;
}
/* Add a parent pointer from the file back to the lost+found. */
if (xfs_has_parent(sc->mp)) {
error = xfs_parent_addname(sc->tp, &adopt->ppargs,
sc->orphanage, adopt->xname, sc->ip);
if (error)
return error;
}
/*
* Notify dirent hooks that we moved the file to /lost+found, and
* finish all the deferred work so that we know the adoption is fully
......
......@@ -54,6 +54,9 @@ struct xrep_adoption {
/* Name used for the adoption. */
struct xfs_name *xname;
/* Parent pointer context tracking */
struct xfs_parent_args ppargs;
/* Block reservations for orphanage and child (if directory). */
unsigned int orphanage_blkres;
unsigned int child_blkres;
......
......@@ -317,7 +317,7 @@ xchk_parent_pptr_and_dotdot(
return 0;
/* Otherwise, walk the pptrs again, and check. */
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_dotdot, pp);
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_dotdot, NULL, pp);
if (error == -ECANCELED) {
/* Found a parent pointer that matches dotdot. */
return 0;
......@@ -699,7 +699,8 @@ xchk_parent_count_pptrs(
*/
if (pp->need_revalidate) {
pp->pptrs_found = 0;
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_count_pptr, pp);
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_count_pptr,
NULL, pp);
if (error == -EFSCORRUPTED) {
/* Found a bad parent pointer */
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0);
......@@ -758,7 +759,7 @@ xchk_parent_pptr(
if (error)
goto out_entries;
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, pp);
error = xchk_xattr_walk(sc, sc->ip, xchk_parent_scan_attr, NULL, pp);
if (error == -ECANCELED) {
error = 0;
goto out_names;
......
......@@ -24,6 +24,10 @@
#include "xfs_trans_space.h"
#include "xfs_health.h"
#include "xfs_exchmaps.h"
#include "xfs_parent.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_ag.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
......@@ -33,7 +37,13 @@
#include "scrub/findparent.h"
#include "scrub/readdir.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/orphanage.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
#include "scrub/attr_repair.h"
#include "scrub/listxattr.h"
/*
* Repairing The Directory Parent Pointer
......@@ -49,14 +59,89 @@
* See the section on locking issues in dir_repair.c for more information about
* conflicts with the VFS. The findparent code wll keep our incore parent
* inode up to date.
*
* If parent pointers are enabled, we instead reconstruct the parent pointer
* information by visiting every directory entry of every directory in the
* system and translating the relevant dirents into parent pointers. In this
* case, it is advantageous to stash all parent pointers created from dirents
* from a single parent file before replaying them into the temporary file. To
* save memory, the live filesystem scan reuses the findparent object. Parent
* pointer repair chooses either directory scanning or findparent, but not
* both.
*
* When salvaging completes, the remaining stashed entries are replayed to the
* temporary file. All non-parent pointer extended attributes are copied to
* the temporary file's extended attributes. An atomic file mapping exchange
* is used to commit the new xattr blocks to the file being repaired. This
* will disrupt attrmulti cursors.
*/
/* Create a parent pointer in the tempfile. */
#define XREP_PPTR_ADD (1)
/* Remove a parent pointer from the tempfile. */
#define XREP_PPTR_REMOVE (2)
/* A stashed parent pointer update. */
struct xrep_pptr {
/* Cookie for retrieval of the pptr name. */
xfblob_cookie name_cookie;
/* Parent pointer record. */
struct xfs_parent_rec pptr_rec;
/* Length of the pptr name. */
uint8_t namelen;
/* XREP_PPTR_{ADD,REMOVE} */
uint8_t action;
};
/*
* Stash up to 8 pages of recovered parent pointers in pptr_recs and
* pptr_names before we write them to the temp file.
*/
#define XREP_PARENT_MAX_STASH_BYTES (PAGE_SIZE * 8)
struct xrep_parent {
struct xfs_scrub *sc;
/* Fixed-size array of xrep_pptr structures. */
struct xfarray *pptr_recs;
/* Blobs containing parent pointer names. */
struct xfblob *pptr_names;
/* xattr keys */
struct xfarray *xattr_records;
/* xattr values */
struct xfblob *xattr_blobs;
/* Scratch buffers for saving extended attributes */
unsigned char *xattr_name;
void *xattr_value;
unsigned int xattr_value_sz;
/*
* Information used to exchange the attr fork mappings, if the fs
* supports parent pointers.
*/
struct xrep_tempexch tx;
/*
* Information used to scan the filesystem to find the inumber of the
* dotdot entry for this directory.
* dotdot entry for this directory. On filesystems without parent
* pointers, we use the findparent_* functions on this object and
* access only the parent_ino field directly.
*
* When parent pointers are enabled, the directory entry scanner uses
* the iscan, hooks, and lock fields of this object directly.
* @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and
* pptr_scratch. This reduces the memory requirements of this
* structure.
*
* The lock also controls access to xattr_records and xattr_blobs(?)
*/
struct xrep_parent_scan_info pscan;
......@@ -66,14 +151,60 @@ struct xrep_parent {
/* Directory entry name, plus the trailing null. */
struct xfs_name xname;
unsigned char namebuf[MAXNAMELEN];
/* Scratch buffer for scanning pptr xattrs */
struct xfs_da_args pptr_args;
/* Have we seen any live updates of parent pointers recently? */
bool saw_pptr_updates;
/* Number of parents we found after all other repairs */
unsigned long long parents;
};
struct xrep_parent_xattr {
/* Cookie for retrieval of the xattr name. */
xfblob_cookie name_cookie;
/* Cookie for retrieval of the xattr value. */
xfblob_cookie value_cookie;
/* XFS_ATTR_* flags */
int flags;
/* Length of the value and name. */
uint32_t valuelen;
uint16_t namelen;
};
/*
* Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
* them to the temp file.
*/
#define XREP_PARENT_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
/* Tear down all the incore stuff we created. */
static void
xrep_parent_teardown(
struct xrep_parent *rp)
{
xrep_findparent_scan_teardown(&rp->pscan);
kvfree(rp->xattr_name);
rp->xattr_name = NULL;
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
if (rp->xattr_blobs)
xfblob_destroy(rp->xattr_blobs);
rp->xattr_blobs = NULL;
if (rp->xattr_records)
xfarray_destroy(rp->xattr_records);
rp->xattr_records = NULL;
if (rp->pptr_names)
xfblob_destroy(rp->pptr_names);
rp->pptr_names = NULL;
if (rp->pptr_recs)
xfarray_destroy(rp->pptr_recs);
rp->pptr_recs = NULL;
}
/* Set up for a parent repair. */
......@@ -82,6 +213,7 @@ xrep_setup_parent(
struct xfs_scrub *sc)
{
struct xrep_parent *rp;
int error;
xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
......@@ -92,6 +224,10 @@ xrep_setup_parent(
rp->xname.name = rp->namebuf;
sc->buf = rp;
error = xrep_tempfile_create(sc, S_IFREG);
if (error)
return error;
return xrep_orphanage_try_create(sc);
}
......@@ -147,6 +283,393 @@ xrep_parent_find_dotdot(
return error;
}
/*
* Add this stashed incore parent pointer to the temporary file.
* The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
* must not be in transaction context.
*/
STATIC int
xrep_parent_replay_update(
struct xrep_parent *rp,
const struct xfs_name *xname,
struct xrep_pptr *pptr)
{
struct xfs_scrub *sc = rp->sc;
switch (pptr->action) {
case XREP_PPTR_ADD:
/* Create parent pointer. */
trace_xrep_parent_replay_parentadd(sc->tempip, xname,
&pptr->pptr_rec);
return xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rp->pptr_args);
case XREP_PPTR_REMOVE:
/* Remove parent pointer. */
trace_xrep_parent_replay_parentremove(sc->tempip, xname,
&pptr->pptr_rec);
return xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
&pptr->pptr_rec, &rp->pptr_args);
}
ASSERT(0);
return -EIO;
}
/*
* Flush stashed parent pointer updates that have been recorded by the scanner.
* This is done to reduce the memory requirements of the parent pointer
* rebuild, since files can have a lot of hardlinks and the fs can be busy.
*
* Caller must not hold transactions or ILOCKs. Caller must hold the tempfile
* IOLOCK.
*/
STATIC int
xrep_parent_replay_updates(
struct xrep_parent *rp)
{
xfarray_idx_t array_cur;
int error;
mutex_lock(&rp->pscan.lock);
foreach_xfarray_idx(rp->pptr_recs, array_cur) {
struct xrep_pptr pptr;
error = xfarray_load(rp->pptr_recs, array_cur, &pptr);
if (error)
goto out_unlock;
error = xfblob_loadname(rp->pptr_names, pptr.name_cookie,
&rp->xname, pptr.namelen);
if (error)
goto out_unlock;
rp->xname.len = pptr.namelen;
mutex_unlock(&rp->pscan.lock);
error = xrep_parent_replay_update(rp, &rp->xname, &pptr);
if (error)
return error;
mutex_lock(&rp->pscan.lock);
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rp->pptr_recs);
xfblob_truncate(rp->pptr_names);
mutex_unlock(&rp->pscan.lock);
return 0;
out_unlock:
mutex_unlock(&rp->pscan.lock);
return error;
}
/*
* Remember that we want to create a parent pointer in the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_parent_stash_parentadd(
struct xrep_parent *rp,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_pptr pptr = {
.action = XREP_PPTR_ADD,
.namelen = name->len,
};
int error;
trace_xrep_parent_stash_parentadd(rp->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rp->pptr_recs, &pptr);
}
/*
* Remember that we want to remove a parent pointer from the tempfile. These
* stashed actions will be replayed later.
*/
STATIC int
xrep_parent_stash_parentremove(
struct xrep_parent *rp,
const struct xfs_name *name,
const struct xfs_inode *dp)
{
struct xrep_pptr pptr = {
.action = XREP_PPTR_REMOVE,
.namelen = name->len,
};
int error;
trace_xrep_parent_stash_parentremove(rp->sc->tempip, dp, name);
xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
error = xfblob_storename(rp->pptr_names, &pptr.name_cookie, name);
if (error)
return error;
return xfarray_append(rp->pptr_recs, &pptr);
}
/*
* Examine an entry of a directory. If this dirent leads us back to the file
* whose parent pointers we're rebuilding, add a pptr to the temporary
* directory.
*/
STATIC int
xrep_parent_scan_dirent(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos,
const struct xfs_name *name,
xfs_ino_t ino,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
/* Dirent doesn't point to this directory. */
if (ino != rp->sc->ip->i_ino)
return 0;
/* No weird looking names. */
if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
return -EFSCORRUPTED;
/* No mismatching ftypes. */
if (name->type != xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode))
return -EFSCORRUPTED;
/* Don't pick up dot or dotdot entries; we only want child dirents. */
if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
xfs_dir2_samename(name, &xfs_name_dot))
return 0;
/*
* Transform this dirent into a parent pointer and queue it for later
* addition to the temporary file.
*/
mutex_lock(&rp->pscan.lock);
error = xrep_parent_stash_parentadd(rp, name, dp);
mutex_unlock(&rp->pscan.lock);
return error;
}
/*
* Decide if we want to look for dirents in this directory. Skip the file
* being repaired and any files being used to stage repairs.
*/
static inline bool
xrep_parent_want_scan(
struct xrep_parent *rp,
const struct xfs_inode *ip)
{
return ip != rp->sc->ip && !xrep_is_tempfile(ip);
}
/*
* Take ILOCK on a file that we want to scan.
*
* Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt.
* Otherwise, take ILOCK_SHARED.
*/
static inline unsigned int
xrep_parent_scan_ilock(
struct xrep_parent *rp,
struct xfs_inode *ip)
{
uint lock_mode = XFS_ILOCK_SHARED;
/* Still need to take the shared ILOCK to advance the iscan cursor. */
if (!xrep_parent_want_scan(rp, ip))
goto lock;
if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
lock_mode = XFS_ILOCK_EXCL;
goto lock;
}
lock:
xfs_ilock(ip, lock_mode);
return lock_mode;
}
/*
* Scan this file for relevant child dirents that point to the file whose
* parent pointers we're rebuilding.
*/
STATIC int
xrep_parent_scan_file(
struct xrep_parent *rp,
struct xfs_inode *ip)
{
unsigned int lock_mode;
int error = 0;
lock_mode = xrep_parent_scan_ilock(rp, ip);
if (!xrep_parent_want_scan(rp, ip))
goto scan_done;
if (S_ISDIR(VFS_I(ip)->i_mode)) {
/*
* If the directory looks as though it has been zapped by the
* inode record repair code, we cannot scan for child dirents.
*/
if (xchk_dir_looks_zapped(ip)) {
error = -EBUSY;
goto scan_done;
}
error = xchk_dir_walk(rp->sc, ip, xrep_parent_scan_dirent, rp);
if (error)
goto scan_done;
}
scan_done:
xchk_iscan_mark_visited(&rp->pscan.iscan, ip);
xfs_iunlock(ip, lock_mode);
return error;
}
/* Decide if we've stashed too much pptr data in memory. */
static inline bool
xrep_parent_want_flush_stashed(
struct xrep_parent *rp)
{
unsigned long long bytes;
bytes = xfarray_bytes(rp->pptr_recs) + xfblob_bytes(rp->pptr_names);
return bytes > XREP_PARENT_MAX_STASH_BYTES;
}
/*
* Scan all directories in the filesystem to look for dirents that we can turn
* into parent pointers.
*/
STATIC int
xrep_parent_scan_dirtree(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_inode *ip;
int error;
/*
* Filesystem scans are time consuming. Drop the file ILOCK and all
* other resources for the duration of the scan and hope for the best.
* The live update hooks will keep our scan information up to date.
*/
xchk_trans_cancel(sc);
if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
XFS_ILOCK_EXCL));
error = xchk_trans_alloc_empty(sc);
if (error)
return error;
while ((error = xchk_iscan_iter(&rp->pscan.iscan, &ip)) == 1) {
bool flush;
error = xrep_parent_scan_file(rp, ip);
xchk_irele(sc, ip);
if (error)
break;
/* Flush stashed pptr updates to constrain memory usage. */
mutex_lock(&rp->pscan.lock);
flush = xrep_parent_want_flush_stashed(rp);
mutex_unlock(&rp->pscan.lock);
if (flush) {
xchk_trans_cancel(sc);
error = xrep_tempfile_iolock_polled(sc);
if (error)
break;
error = xrep_parent_replay_updates(rp);
xrep_tempfile_iounlock(sc);
if (error)
break;
error = xchk_trans_alloc_empty(sc);
if (error)
break;
}
if (xchk_should_terminate(sc, &error))
break;
}
xchk_iscan_iter_finish(&rp->pscan.iscan);
if (error) {
/*
* If we couldn't grab an inode that was busy with a state
* change, change the error code so that we exit to userspace
* as quickly as possible.
*/
if (error == -EBUSY)
return -ECANCELED;
return error;
}
/*
* Retake sc->ip's ILOCK now that we're done flushing stashed parent
* pointers. We end this function with an empty transaction and the
* ILOCK.
*/
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
/*
* Capture dirent updates being made by other threads which are relevant to the
* file being repaired.
*/
STATIC int
xrep_parent_live_update(
struct notifier_block *nb,
unsigned long action,
void *data)
{
struct xfs_dir_update_params *p = data;
struct xrep_parent *rp;
struct xfs_scrub *sc;
int error;
rp = container_of(nb, struct xrep_parent, pscan.dhook.dirent_hook.nb);
sc = rp->sc;
/*
* This thread updated a dirent that points to the file that we're
* repairing, so stash the update for replay against the temporary
* file.
*/
if (p->ip->i_ino == sc->ip->i_ino &&
xchk_iscan_want_live_update(&rp->pscan.iscan, p->dp->i_ino)) {
mutex_lock(&rp->pscan.lock);
if (p->delta > 0)
error = xrep_parent_stash_parentadd(rp, p->name, p->dp);
else
error = xrep_parent_stash_parentremove(rp, p->name,
p->dp);
if (!error)
rp->saw_pptr_updates = true;
mutex_unlock(&rp->pscan.lock);
if (error)
goto out_abort;
}
return NOTIFY_DONE;
out_abort:
xchk_iscan_abort(&rp->pscan.iscan);
return NOTIFY_DONE;
}
/* Reset a directory's dotdot entry, if needed. */
STATIC int
xrep_parent_reset_dotdot(
......@@ -190,6 +713,55 @@ xrep_parent_reset_dotdot(
return xfs_trans_roll(&sc->tp);
}
/* Pass back the parent inumber if this a parent pointer */
STATIC int
xrep_parent_lookup_pptr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
xfs_ino_t *inop = priv;
xfs_ino_t parent_ino;
int error;
if (!(attr_flags & XFS_ATTR_PARENT))
return 0;
error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
valuelen, &parent_ino, NULL);
if (error)
return error;
*inop = parent_ino;
return -ECANCELED;
}
/*
* Find the first parent of the scrub target by walking parent pointers for
* the purpose of deciding if we're going to move it to the orphanage.
* We don't care if the attr fork is zapped.
*/
STATIC int
xrep_parent_lookup_pptrs(
struct xfs_scrub *sc,
xfs_ino_t *inop)
{
int error;
*inop = NULLFSINO;
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_lookup_pptr, NULL,
inop);
if (error && error != -ECANCELED)
return error;
return 0;
}
/*
* Move the current file to the orphanage.
*
......@@ -206,14 +778,26 @@ xrep_parent_move_to_orphanage(
xfs_ino_t orig_parent, new_parent;
int error;
/*
* We are about to drop the ILOCK on sc->ip to lock the orphanage and
* prepare for the adoption. Therefore, look up the old dotdot entry
* for sc->ip so that we can compare it after we re-lock sc->ip.
*/
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
if (error)
return error;
if (S_ISDIR(VFS_I(sc->ip)->i_mode)) {
/*
* We are about to drop the ILOCK on sc->ip to lock the
* orphanage and prepare for the adoption. Therefore, look up
* the old dotdot entry for sc->ip so that we can compare it
* after we re-lock sc->ip.
*/
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
&orig_parent);
if (error)
return error;
} else {
/*
* We haven't dropped the ILOCK since we committed the new
* xattr structure (and hence the new parent pointer records),
* which means that the file cannot have been moved in the
* directory tree, and there are no parents.
*/
orig_parent = NULLFSINO;
}
/*
* Drop the ILOCK on the scrub target and commit the transaction.
......@@ -246,9 +830,14 @@ xrep_parent_move_to_orphanage(
* Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
* entry again. If the parent changed or the child was unlinked while
* the child directory was unlocked, we don't need to move the child to
* the orphanage after all.
* the orphanage after all. For a non-directory, we have to scan for
* the first parent pointer to see if one has been added.
*/
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
if (S_ISDIR(VFS_I(sc->ip)->i_mode))
error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot,
&new_parent);
else
error = xrep_parent_lookup_pptrs(sc, &new_parent);
if (error)
return error;
......@@ -275,55 +864,743 @@ xrep_parent_move_to_orphanage(
return 0;
}
/*
* Commit the new parent pointer structure (currently only the dotdot entry) to
* the file that we're repairing.
*/
/* Ensure that the xattr value buffer is large enough. */
STATIC int
xrep_parent_rebuild_tree(
struct xrep_parent *rp)
xrep_parent_alloc_xattr_value(
struct xrep_parent *rp,
size_t bufsize)
{
if (rp->pscan.parent_ino == NULLFSINO) {
if (xrep_orphanage_can_adopt(rp->sc))
return xrep_parent_move_to_orphanage(rp);
return -EFSCORRUPTED;
void *new_val;
if (rp->xattr_value_sz >= bufsize)
return 0;
if (rp->xattr_value) {
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
rp->xattr_value_sz = 0;
}
return xrep_parent_reset_dotdot(rp);
new_val = kvmalloc(bufsize, XCHK_GFP_FLAGS);
if (!new_val)
return -ENOMEM;
rp->xattr_value = new_val;
rp->xattr_value_sz = bufsize;
return 0;
}
/* Set up the filesystem scan so we can look for parents. */
/* Retrieve the (remote) value of a non-pptr xattr. */
STATIC int
xrep_parent_setup_scan(
struct xrep_parent *rp)
xrep_parent_fetch_xattr_remote(
struct xrep_parent *rp,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
unsigned int valuelen)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_da_args args = {
.attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
.geo = sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.dp = ip,
.name = name,
.namelen = namelen,
.trans = sc->tp,
.valuelen = valuelen,
.owner = ip->i_ino,
};
int error;
return xrep_findparent_scan_start(sc, &rp->pscan);
/*
* If we need a larger value buffer, try to allocate one. If that
* fails, return with -EDEADLOCK to try harder.
*/
error = xrep_parent_alloc_xattr_value(rp, valuelen);
if (error == -ENOMEM)
return -EDEADLOCK;
if (error)
return error;
args.value = rp->xattr_value;
xfs_attr_sethash(&args);
return xfs_attr_get_ilocked(&args);
}
int
xrep_parent(
struct xfs_scrub *sc)
/* Stash non-pptr attributes for later replay into the temporary file. */
STATIC int
xrep_parent_stash_xattr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xrep_parent *rp = sc->buf;
struct xrep_parent_xattr key = {
.valuelen = valuelen,
.namelen = namelen,
.flags = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
};
struct xrep_parent *rp = priv;
int error;
error = xrep_parent_setup_scan(rp);
if (error)
return error;
if (attr_flags & (XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT))
return 0;
error = xrep_parent_find_dotdot(rp);
if (error)
goto out_teardown;
if (!value) {
error = xrep_parent_fetch_xattr_remote(rp, ip, attr_flags,
name, namelen, valuelen);
if (error)
return error;
value = rp->xattr_value;
}
trace_xrep_parent_stash_xattr(rp->sc->tempip, key.flags, (void *)name,
key.namelen, key.valuelen);
error = xfblob_store(rp->xattr_blobs, &key.name_cookie, name,
key.namelen);
if (error)
return error;
error = xfblob_store(rp->xattr_blobs, &key.value_cookie, value,
key.valuelen);
if (error)
return error;
return xfarray_append(rp->xattr_records, &key);
}
/* Insert one xattr key/value. */
STATIC int
xrep_parent_insert_xattr(
struct xrep_parent *rp,
const struct xrep_parent_xattr *key)
{
struct xfs_da_args args = {
.dp = rp->sc->tempip,
.attr_filter = key->flags,
.namelen = key->namelen,
.valuelen = key->valuelen,
.owner = rp->sc->ip->i_ino,
.geo = rp->sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.op_flags = XFS_DA_OP_OKNOENT,
};
int error;
ASSERT(!(key->flags & XFS_ATTR_PARENT));
/*
* Grab pointers to the scrub buffer so that we can use them to insert
* attrs into the temp file.
*/
args.name = rp->xattr_name;
args.value = rp->xattr_value;
/*
* The attribute name is stored near the end of the in-core buffer,
* though we reserve one more byte to ensure null termination.
*/
rp->xattr_name[XATTR_NAME_MAX] = 0;
error = xfblob_load(rp->xattr_blobs, key->name_cookie, rp->xattr_name,
key->namelen);
if (error)
return error;
error = xfblob_free(rp->xattr_blobs, key->name_cookie);
if (error)
return error;
error = xfblob_load(rp->xattr_blobs, key->value_cookie, args.value,
key->valuelen);
if (error)
return error;
error = xfblob_free(rp->xattr_blobs, key->value_cookie);
if (error)
return error;
rp->xattr_name[key->namelen] = 0;
trace_xrep_parent_insert_xattr(rp->sc->tempip, key->flags,
rp->xattr_name, key->namelen, key->valuelen);
xfs_attr_sethash(&args);
return xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false);
}
/*
* Periodically flush salvaged attributes to the temporary file. This is done
* to reduce the memory requirements of the xattr rebuild because files can
* contain millions of attributes.
*/
STATIC int
xrep_parent_flush_xattrs(
struct xrep_parent *rp)
{
xfarray_idx_t array_cur;
int error;
/*
* Entering this function, the scrub context has a reference to the
* inode being repaired, the temporary file, and the empty scrub
* transaction that we created for the xattr scan. We hold ILOCK_EXCL
* on the inode being repaired.
*
* To constrain kernel memory use, we occasionally flush salvaged
* xattrs from the xfarray and xfblob structures into the temporary
* file in preparation for exchanging the xattr structures at the end.
* Updating the temporary file requires a transaction, so we commit the
* scrub transaction and drop the ILOCK so that xfs_attr_set can
* allocate whatever transaction it wants.
*
* We still hold IOLOCK_EXCL on the inode being repaired, which
* prevents anyone from adding xattrs (or parent pointers) while we're
* flushing.
*/
xchk_trans_cancel(rp->sc);
xchk_iunlock(rp->sc, XFS_ILOCK_EXCL);
/*
* Take the IOLOCK of the temporary file while we modify xattrs. This
* isn't strictly required because the temporary file is never revealed
* to userspace, but we follow the same locking rules. We still hold
* sc->ip's IOLOCK.
*/
error = xrep_tempfile_iolock_polled(rp->sc);
if (error)
return error;
/* Add all the salvaged attrs to the temporary file. */
foreach_xfarray_idx(rp->xattr_records, array_cur) {
struct xrep_parent_xattr key;
error = xfarray_load(rp->xattr_records, array_cur, &key);
if (error)
return error;
error = xrep_parent_insert_xattr(rp, &key);
if (error)
return error;
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rp->xattr_records);
xfblob_truncate(rp->xattr_blobs);
xrep_tempfile_iounlock(rp->sc);
/* Recreate the empty transaction and relock the inode. */
error = xchk_trans_alloc_empty(rp->sc);
if (error)
return error;
xchk_ilock(rp->sc, XFS_ILOCK_EXCL);
return 0;
}
/* Decide if we've stashed too much xattr data in memory. */
static inline bool
xrep_parent_want_flush_xattrs(
struct xrep_parent *rp)
{
unsigned long long bytes;
bytes = xfarray_bytes(rp->xattr_records) +
xfblob_bytes(rp->xattr_blobs);
return bytes > XREP_PARENT_XATTR_MAX_STASH_BYTES;
}
/* Flush staged attributes to the temporary file if we're over the limit. */
STATIC int
xrep_parent_try_flush_xattrs(
struct xfs_scrub *sc,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
if (!xrep_parent_want_flush_xattrs(rp))
return 0;
error = xrep_parent_flush_xattrs(rp);
if (error)
return error;
/*
* If there were any parent pointer updates to the xattr structure
* while we dropped the ILOCK, the xattr structure is now stale.
* Signal to the attr copy process that we need to start over, but
* this time without opportunistic attr flushing.
*
* This is unlikely to happen, so we're ok with restarting the copy.
*/
mutex_lock(&rp->pscan.lock);
if (rp->saw_pptr_updates)
error = -ESTALE;
mutex_unlock(&rp->pscan.lock);
return error;
}
/* Copy all the non-pptr extended attributes into the temporary file. */
STATIC int
xrep_parent_copy_xattrs(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
/*
* Clear the pptr updates flag. We hold sc->ip ILOCKed, so there
* can't be any parent pointer updates in progress.
*/
mutex_lock(&rp->pscan.lock);
rp->saw_pptr_updates = false;
mutex_unlock(&rp->pscan.lock);
/* Copy xattrs, stopping periodically to flush the incore buffers. */
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
xrep_parent_try_flush_xattrs, rp);
if (error && error != -ESTALE)
return error;
if (error == -ESTALE) {
/*
* The xattr copy collided with a parent pointer update.
* Restart the copy, but this time hold the ILOCK all the way
* to the end to lock out any directory parent pointer updates.
*/
error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr,
NULL, rp);
if (error)
return error;
}
/* Flush any remaining stashed xattrs to the temporary file. */
if (xfarray_bytes(rp->xattr_records) == 0)
return 0;
return xrep_parent_flush_xattrs(rp);
}
/*
* Ensure that @sc->ip and @sc->tempip both have attribute forks before we head
* into the attr fork exchange transaction. All files on a filesystem with
* parent pointers must have an attr fork because the parent pointer code does
* not itself add attribute forks.
*
* Note: Unlinkable unlinked files don't need one, but the overhead of having
* an unnecessary attr fork is not justified by the additional code complexity
* that would be needed to track that state correctly.
*/
STATIC int
xrep_parent_ensure_attr_fork(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
error = xfs_attr_add_fork(sc->tempip,
sizeof(struct xfs_attr_sf_hdr), 1);
if (error)
return error;
return xfs_attr_add_fork(sc->ip, sizeof(struct xfs_attr_sf_hdr), 1);
}
/*
* Finish replaying stashed parent pointer updates, allocate a transaction for
* exchanging extent mappings, and take the ILOCKs of both files before we
* commit the new attribute structure.
*/
STATIC int
xrep_parent_finalize_tempfile(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
int error;
/*
* Repair relies on the ILOCK to quiesce all possible xattr updates.
* Replay all queued parent pointer updates into the tempfile before
* exchanging the contents, even if that means dropping the ILOCKs and
* the transaction.
*/
do {
error = xrep_parent_replay_updates(rp);
if (error)
return error;
error = xrep_parent_ensure_attr_fork(rp);
if (error)
return error;
error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rp->tx);
if (error)
return error;
if (xfarray_length(rp->pptr_recs) == 0)
break;
xchk_trans_cancel(sc);
xrep_tempfile_iunlock_both(sc);
} while (!xchk_should_terminate(sc, &error));
return error;
}
/*
* Replay all the stashed parent pointers into the temporary file, copy all
* the non-pptr xattrs from the file being repaired into the temporary file,
* and exchange the attr fork contents atomically.
*/
STATIC int
xrep_parent_rebuild_pptrs(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
xfs_ino_t parent_ino = NULLFSINO;
int error;
/*
* Copy non-ppttr xattrs from the file being repaired into the
* temporary file's xattr structure. We hold sc->ip's IOLOCK, which
* prevents setxattr/removexattr calls from occurring, but renames
* update the parent pointers without holding IOLOCK. If we detect
* stale attr structures, we restart the scan but only flush at the
* end.
*/
error = xrep_parent_copy_xattrs(rp);
if (error)
return error;
/*
* Cancel the empty transaction that we used to walk and copy attrs,
* and drop the ILOCK so that we can take the IOLOCK on the temporary
* file. We still hold sc->ip's IOLOCK.
*/
xchk_trans_cancel(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
error = xrep_tempfile_iolock_polled(sc);
if (error)
return error;
/* Last chance to abort before we start committing fixes. */
/*
* Allocate transaction, lock inodes, and make sure that we've replayed
* all the stashed pptr updates to the tempdir. After this point,
* we're ready to exchange the attr fork mappings.
*/
error = xrep_parent_finalize_tempfile(rp);
if (error)
return error;
/* Last chance to abort before we start committing pptr fixes. */
if (xchk_should_terminate(sc, &error))
return error;
if (xchk_iscan_aborted(&rp->pscan.iscan))
return -ECANCELED;
/*
* Exchange the attr fork contents and junk the old attr fork contents,
* which are now in the tempfile.
*/
error = xrep_xattr_swap(sc, &rp->tx);
if (error)
return error;
error = xrep_xattr_reset_tempfile_fork(sc);
if (error)
return error;
/*
* Roll to get a transaction without any inodes joined to it. Then we
* can drop the tempfile's ILOCK and IOLOCK before doing more work on
* the scrub target file.
*/
error = xfs_trans_roll(&sc->tp);
if (error)
return error;
xrep_tempfile_iunlock(sc);
xrep_tempfile_iounlock(sc);
/*
* We've committed the new parent pointers. Find at least one parent
* so that we can decide if we're moving this file to the orphanage.
* For this purpose, root directories are their own parents.
*/
if (sc->ip == sc->mp->m_rootip) {
xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino);
} else {
error = xrep_parent_lookup_pptrs(sc, &parent_ino);
if (error)
return error;
if (parent_ino != NULLFSINO)
xrep_findparent_scan_found(&rp->pscan, parent_ino);
}
return 0;
}
/*
* Commit the new parent pointer structure (currently only the dotdot entry) to
* the file that we're repairing.
*/
STATIC int
xrep_parent_rebuild_tree(
struct xrep_parent *rp)
{
int error;
if (xfs_has_parent(rp->sc->mp)) {
error = xrep_parent_rebuild_pptrs(rp);
if (error)
return error;
}
if (rp->pscan.parent_ino == NULLFSINO) {
if (xrep_orphanage_can_adopt(rp->sc))
return xrep_parent_move_to_orphanage(rp);
return -EFSCORRUPTED;
}
if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode))
return xrep_parent_reset_dotdot(rp);
return 0;
}
/* Count the number of parent pointers. */
STATIC int
xrep_parent_count_pptr(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xrep_parent *rp = priv;
int error;
if (!(attr_flags & XFS_ATTR_PARENT))
return 0;
error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
valuelen, NULL, NULL);
if (error)
return error;
rp->parents++;
return 0;
}
/*
* After all parent pointer rebuilding and adoption activity completes, reset
* the link count of this nondirectory, having scanned the fs to rebuild all
* parent pointers.
*/
STATIC int
xrep_parent_set_nondir_nlink(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
struct xfs_inode *ip = sc->ip;
struct xfs_perag *pag;
bool joined = false;
int error;
/* Count parent pointers so we can reset the file link count. */
rp->parents = 0;
error = xchk_xattr_walk(sc, ip, xrep_parent_count_pptr, NULL, rp);
if (error)
return error;
if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
/*
* The file is on the unlinked list but we found parents.
* Remove the file from the unlinked list.
*/
pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, ip->i_ino));
if (!pag) {
ASSERT(0);
return -EFSCORRUPTED;
}
error = xfs_iunlink_remove(sc->tp, pag, ip);
xfs_perag_put(pag);
if (error)
return error;
} else if (rp->parents == 0 && !xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
/*
* The file is not on the unlinked list but we found no
* parents. Add the file to the unlinked list.
*/
error = xfs_iunlink(sc->tp, ip);
if (error)
return error;
}
/* Set the correct link count. */
if (VFS_I(ip)->i_nlink != rp->parents) {
if (!joined) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
}
set_nlink(VFS_I(ip), min_t(unsigned long long, rp->parents,
XFS_NLINK_PINNED));
}
/* Log the inode to keep it moving forward if we dirtied anything. */
if (joined)
xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_CORE);
return 0;
}
/* Set up the filesystem scan so we can look for parents. */
STATIC int
xrep_parent_setup_scan(
struct xrep_parent *rp)
{
struct xfs_scrub *sc = rp->sc;
char *descr;
struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
int max_len;
int error;
if (!xfs_has_parent(sc->mp))
return xrep_findparent_scan_start(sc, &rp->pscan);
/* Buffers for copying non-pptr attrs to the tempfile */
rp->xattr_name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS);
if (!rp->xattr_name)
return -ENOMEM;
/*
* Allocate enough memory to handle loading local attr values from the
* xfblob data while flushing stashed attrs to the temporary file.
* We only realloc the buffer when salvaging remote attr values, so
* TRY_HARDER means we allocate the maximal attr value size.
*/
if (sc->flags & XCHK_TRY_HARDER)
max_len = XATTR_SIZE_MAX;
else
max_len = xfs_attr_leaf_entsize_local_max(geo->blksize);
error = xrep_parent_alloc_xattr_value(rp, max_len);
if (error)
goto out_xattr_name;
/* Set up some staging memory for logging parent pointer updates. */
descr = xchk_xfile_ino_descr(sc, "parent pointer entries");
error = xfarray_create(descr, 0, sizeof(struct xrep_pptr),
&rp->pptr_recs);
kfree(descr);
if (error)
goto out_xattr_value;
descr = xchk_xfile_ino_descr(sc, "parent pointer names");
error = xfblob_create(descr, &rp->pptr_names);
kfree(descr);
if (error)
goto out_recs;
/* Set up some storage for copying attrs before the mapping exchange */
descr = xchk_xfile_ino_descr(sc,
"parent pointer retained xattr entries");
error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr),
&rp->xattr_records);
kfree(descr);
if (error)
goto out_names;
descr = xchk_xfile_ino_descr(sc,
"parent pointer retained xattr values");
error = xfblob_create(descr, &rp->xattr_blobs);
kfree(descr);
if (error)
goto out_attr_keys;
error = __xrep_findparent_scan_start(sc, &rp->pscan,
xrep_parent_live_update);
if (error)
goto out_attr_values;
return 0;
out_attr_values:
xfblob_destroy(rp->xattr_blobs);
rp->xattr_blobs = NULL;
out_attr_keys:
xfarray_destroy(rp->xattr_records);
rp->xattr_records = NULL;
out_names:
xfblob_destroy(rp->pptr_names);
rp->pptr_names = NULL;
out_recs:
xfarray_destroy(rp->pptr_recs);
rp->pptr_recs = NULL;
out_xattr_value:
kvfree(rp->xattr_value);
rp->xattr_value = NULL;
out_xattr_name:
kvfree(rp->xattr_name);
rp->xattr_name = NULL;
return error;
}
int
xrep_parent(
struct xfs_scrub *sc)
{
struct xrep_parent *rp = sc->buf;
int error;
/*
* When the parent pointers feature is enabled, repairs are committed
* by atomically committing a new xattr structure and reaping the old
* attr fork. Reaping requires rmap to be enabled.
*/
if (xfs_has_parent(sc->mp) && !xfs_has_rmapbt(sc->mp))
return -EOPNOTSUPP;
error = xrep_parent_setup_scan(rp);
if (error)
return error;
if (xfs_has_parent(sc->mp))
error = xrep_parent_scan_dirtree(rp);
else
error = xrep_parent_find_dotdot(rp);
if (error)
goto out_teardown;
/* Last chance to abort before we start committing dotdot fixes. */
if (xchk_should_terminate(sc, &error))
goto out_teardown;
error = xrep_parent_rebuild_tree(rp);
if (error)
goto out_teardown;
if (xfs_has_parent(sc->mp) && !S_ISDIR(VFS_I(sc->ip)->i_mode)) {
error = xrep_parent_set_nondir_nlink(rp);
if (error)
goto out_teardown;
}
error = xrep_defer_finish(sc);
out_teardown:
xrep_parent_teardown(rp);
......
......@@ -19,6 +19,8 @@
#include "xfs_rmap.h"
#include "xfs_exchrange.h"
#include "xfs_exchmaps.h"
#include "xfs_dir2.h"
#include "xfs_parent.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
......
......@@ -2539,6 +2539,46 @@ DEFINE_EVENT(xrep_xattr_salvage_class, name, \
TP_ARGS(ip, flags, name, namelen, valuelen))
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec);
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec);
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_stash_xattr);
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_insert_xattr);
DECLARE_EVENT_CLASS(xrep_pptr_salvage_class,
TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name,
unsigned int namelen, const void *value, unsigned int valuelen),
TP_ARGS(ip, flags, name, namelen, value, valuelen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, parent_gen)
__field(unsigned int, namelen)
__dynamic_array(char, name, namelen)
),
TP_fast_assign(
const struct xfs_parent_rec *rec = value;
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->parent_ino = be64_to_cpu(rec->p_ino);
__entry->parent_gen = be32_to_cpu(rec->p_gen);
__entry->namelen = namelen;
memcpy(__get_str(name), name, namelen);
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__entry->parent_gen,
__entry->namelen,
__get_str(name))
)
#define DEFINE_XREP_PPTR_SALVAGE_EVENT(name) \
DEFINE_EVENT(xrep_pptr_salvage_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name, \
unsigned int namelen, const void *value, unsigned int valuelen), \
TP_ARGS(ip, flags, name, namelen, value, valuelen))
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_salvage_pptr);
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_insert_pptr);
TRACE_EVENT(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
......@@ -2564,6 +2604,43 @@ DEFINE_EVENT(xrep_xattr_class, name, \
TP_ARGS(ip, arg_ip))
DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree);
DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork);
DEFINE_XREP_XATTR_EVENT(xrep_xattr_full_reset);
DECLARE_EVENT_CLASS(xrep_xattr_pptr_scan_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp,
const struct xfs_name *name),
TP_ARGS(ip, dp, name),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, parent_gen)
__field(unsigned int, namelen)
__dynamic_array(char, name, name->len)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->parent_ino = dp->i_ino;
__entry->parent_gen = VFS_IC(dp)->i_generation;
__entry->namelen = name->len;
memcpy(__get_str(name), name->name, name->len);
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__entry->parent_gen,
__entry->namelen,
__get_str(name))
)
#define DEFINE_XREP_XATTR_PPTR_SCAN_EVENT(name) \
DEFINE_EVENT(xrep_xattr_pptr_scan_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, \
const struct xfs_name *name), \
TP_ARGS(ip, dp, name))
DEFINE_XREP_XATTR_PPTR_SCAN_EVENT(xrep_xattr_stash_parentadd);
DEFINE_XREP_XATTR_PPTR_SCAN_EVENT(xrep_xattr_stash_parentremove);
TRACE_EVENT(xrep_dir_recover_dirblock,
TP_PROTO(struct xfs_inode *dp, xfs_dablk_t dabno, uint32_t magic,
......@@ -2654,6 +2731,8 @@ DEFINE_XREP_DIRENT_EVENT(xrep_dir_salvage_entry);
DEFINE_XREP_DIRENT_EVENT(xrep_dir_stash_createname);
DEFINE_XREP_DIRENT_EVENT(xrep_dir_replay_createname);
DEFINE_XREP_DIRENT_EVENT(xrep_adoption_reparent);
DEFINE_XREP_DIRENT_EVENT(xrep_dir_stash_removename);
DEFINE_XREP_DIRENT_EVENT(xrep_dir_replay_removename);
DECLARE_EVENT_CLASS(xrep_adoption_class,
TP_PROTO(struct xfs_inode *dp, struct xfs_inode *ip, bool moved),
......@@ -2708,6 +2787,80 @@ DEFINE_XREP_PARENT_SALVAGE_EVENT(xrep_dir_salvaged_parent);
DEFINE_XREP_PARENT_SALVAGE_EVENT(xrep_findparent_dirent);
DEFINE_XREP_PARENT_SALVAGE_EVENT(xrep_findparent_from_dcache);
DECLARE_EVENT_CLASS(xrep_pptr_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name,
const struct xfs_parent_rec *pptr),
TP_ARGS(ip, name, pptr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, parent_gen)
__field(unsigned int, namelen)
__dynamic_array(char, name, name->len)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->parent_ino = be64_to_cpu(pptr->p_ino);
__entry->parent_gen = be32_to_cpu(pptr->p_gen);
__entry->namelen = name->len;
memcpy(__get_str(name), name->name, name->len);
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__entry->parent_gen,
__entry->namelen,
__get_str(name))
)
#define DEFINE_XREP_PPTR_EVENT(name) \
DEFINE_EVENT(xrep_pptr_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_name *name, \
const struct xfs_parent_rec *pptr), \
TP_ARGS(ip, name, pptr))
DEFINE_XREP_PPTR_EVENT(xrep_xattr_replay_parentadd);
DEFINE_XREP_PPTR_EVENT(xrep_xattr_replay_parentremove);
DEFINE_XREP_PPTR_EVENT(xrep_parent_replay_parentadd);
DEFINE_XREP_PPTR_EVENT(xrep_parent_replay_parentremove);
DECLARE_EVENT_CLASS(xrep_pptr_scan_class,
TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp,
const struct xfs_name *name),
TP_ARGS(ip, dp, name),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, parent_ino)
__field(unsigned int, parent_gen)
__field(unsigned int, namelen)
__dynamic_array(char, name, name->len)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->parent_ino = dp->i_ino;
__entry->parent_gen = VFS_IC(dp)->i_generation;
__entry->namelen = name->len;
memcpy(__get_str(name), name->name, name->len);
),
TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx parent_gen 0x%x name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parent_ino,
__entry->parent_gen,
__entry->namelen,
__get_str(name))
)
#define DEFINE_XREP_PPTR_SCAN_EVENT(name) \
DEFINE_EVENT(xrep_pptr_scan_class, name, \
TP_PROTO(struct xfs_inode *ip, const struct xfs_inode *dp, \
const struct xfs_name *name), \
TP_ARGS(ip, dp, name))
DEFINE_XREP_PPTR_SCAN_EVENT(xrep_parent_stash_parentadd);
DEFINE_XREP_PPTR_SCAN_EVENT(xrep_parent_stash_parentremove);
TRACE_EVENT(xrep_nlinks_set_record,
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino,
const struct xchk_nlink *obs),
......
......@@ -17,6 +17,7 @@
#include "xfs_acl.h"
#include "xfs_log.h"
#include "xfs_xattr.h"
#include "xfs_quota.h"
#include <linux/posix_acl_xattr.h>
......@@ -70,7 +71,9 @@ xfs_attr_want_log_assist(
/*
* Set or remove an xattr, having grabbed the appropriate logging resources
* prior to calling libxfs.
* prior to calling libxfs. Callers of this function are only required to
* initialize the inode, attr_filter, name, namelen, value, and valuelen fields
* of @args.
*/
int
xfs_attr_change(
......@@ -80,7 +83,19 @@ xfs_attr_change(
struct xfs_mount *mp = args->dp->i_mount;
int error;
ASSERT(!(args->op_flags & XFS_DA_OP_LOGGED));
if (xfs_is_shutdown(mp))
return -EIO;
error = xfs_qm_dqattach(args->dp);
if (error)
return error;
/*
* We have no control over the attribute names that userspace passes us
* to remove, so we have to allow the name lookup prior to attribute
* removal to fail as well.
*/
args->op_flags = XFS_DA_OP_OKNOENT;
if (xfs_attr_want_log_assist(mp)) {
error = xfs_attr_grab_log_assist(mp);
......@@ -90,7 +105,12 @@ xfs_attr_change(
args->op_flags |= XFS_DA_OP_LOGGED;
}
return xfs_attr_set(args, op);
args->owner = args->dp->i_ino;
args->geo = mp->m_attr_geo;
args->whichfork = XFS_ATTR_FORK;
xfs_attr_sethash(args);
return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment