Commit f0eb870a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "A few fixes for issues in the new online shrink code, additional
  corrections for my recent bug-hunt w.r.t. extent size hints on
  realtime, and improved input checking of the GROWFSRT ioctl.

  IOW, the usual 'I somehow got bored during the merge window and
  resumed auditing the farther reaches of xfs':

   - Fix shrink eligibility checking when sparse inode clusters enabled

   - Reset '..' directory entries when unlinking directories to prevent
     verifier errors if fs is shrinked later

   - Don't report unusable extent size hints to FSGETXATTR

   - Don't warn when extent size hints are unusable because the sysadmin
     configured them that way

   - Fix insufficient parameter validation in GROWFSRT ioctl

   - Fix integer overflow when adding rt volumes to filesystem"

* tag 'xfs-5.14-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: detect misaligned rtinherit directory extent size hints
  xfs: fix an integer overflow error in xfs_growfs_rt
  xfs: improve FSGROWFSRT precondition checking
  xfs: don't expose misaligned extszinherit hints to userspace
  xfs: correct the narrative around misaligned rtinherit/extszinherit dirs
  xfs: reset child dir '..' entry when unlinking child
  xfs: check for sparse inode clusters that cross new EOAG when shrinking
parents fbf1bddc b102a46c
...@@ -803,6 +803,14 @@ xfs_ag_shrink_space( ...@@ -803,6 +803,14 @@ xfs_ag_shrink_space(
args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
/*
* Make sure that the last inode cluster cannot overlap with the new
* end of the AG, even if it's sparse.
*/
error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
if (error)
return error;
/* /*
* Disable perag reservations so it doesn't cause the allocation request * Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return. * to fail. We'll reestablish reservation before we return.
......
...@@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino( ...@@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino(
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno)); return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
} }
/*
* Ensure there are not sparse inode clusters that cross the new EOAG.
*
* This is a no-op for non-spinode filesystems since clusters are always fully
* allocated and checking the bnobt suffices. However, a spinode filesystem
* could have a record where the upper inodes are free blocks. If those blocks
* were removed from the filesystem, the inode record would extend beyond EOAG,
* which will be flagged as corruption.
*/
int
xfs_ialloc_check_shrink(
struct xfs_trans *tp,
xfs_agnumber_t agno,
struct xfs_buf *agibp,
xfs_agblock_t new_length)
{
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_mount *mp = tp->t_mountp;
struct xfs_perag *pag;
xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
int has;
int error;
if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
return 0;
pag = xfs_perag_get(mp, agno);
cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
/* Look up the inobt record that would correspond to the new EOFS. */
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
if (error || !has)
goto out;
error = xfs_inobt_get_rec(cur, &rec, &has);
if (error)
goto out;
if (!has) {
error = -EFSCORRUPTED;
goto out;
}
/* If the record covers inodes that would be beyond EOFS, bail out. */
if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
error = -ENOSPC;
goto out;
}
out:
xfs_btree_del_cursor(cur, error);
xfs_perag_put(pag);
return error;
}
...@@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); ...@@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno,
struct xfs_buf *agibp, xfs_agblock_t new_length);
#endif /* __XFS_IALLOC_H__ */ #endif /* __XFS_IALLOC_H__ */
...@@ -592,23 +592,27 @@ xfs_inode_validate_extsize( ...@@ -592,23 +592,27 @@ xfs_inode_validate_extsize(
/* /*
* This comment describes a historic gap in this verifier function. * This comment describes a historic gap in this verifier function.
* *
* On older kernels, the extent size hint verifier doesn't check that * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
* the extent size hint is an integer multiple of the realtime extent * function has never checked that the extent size hint is an integer
* size on a directory with both RTINHERIT and EXTSZINHERIT flags set. * multiple of the realtime extent size. Since we allow users to set
* The verifier has always enforced the alignment rule for regular * this combination on non-rt filesystems /and/ to change the rt
* files with the REALTIME flag set. * extent size when adding a rt device to a filesystem, the net effect
* is that users can configure a filesystem anticipating one rt
* geometry and change their minds later. Directories do not use the
* extent size hint, so this is harmless for them.
* *
* If a directory with a misaligned extent size hint is allowed to * If a directory with a misaligned extent size hint is allowed to
* propagate that hint into a new regular realtime file, the result * propagate that hint into a new regular realtime file, the result
* is that the inode cluster buffer verifier will trigger a corruption * is that the inode cluster buffer verifier will trigger a corruption
* shutdown the next time it is run. * shutdown the next time it is run, because the verifier has always
* enforced the alignment rule for regular files.
* *
* Unfortunately, there could be filesystems with these misconfigured * Because we allow administrators to set a new rt extent size when
* directories in the wild, so we cannot add a check to this verifier * adding a rt section, we cannot add a check to this verifier because
* at this time because that will result a new source of directory * that will result a new source of directory corruption errors when
* corruption errors when reading an existing filesystem. Instead, we * reading an existing filesystem. Instead, we rely on callers to
* permit the misconfiguration to pass through the verifiers so that * decide when alignment checks are appropriate, and fix things up as
* callers of this function can correct and mitigate externally. * needed.
*/ */
if (rt_flag) if (rt_flag)
......
...@@ -143,16 +143,14 @@ xfs_trans_log_inode( ...@@ -143,16 +143,14 @@ xfs_trans_log_inode(
} }
/* /*
* Inode verifiers on older kernels don't check that the extent size * Inode verifiers do not check that the extent size hint is an integer
* hint is an integer multiple of the rt extent size on a directory * multiple of the rt extent size on a directory with both rtinherit
* with both rtinherit and extszinherit flags set. If we're logging a * and extszinherit flags set. If we're logging a directory that is
* directory that is misconfigured in this way, clear the hint. * misconfigured in this way, clear the hint.
*/ */
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) && if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) { (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
xfs_info_once(ip->i_mount,
"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE | ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT); XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0; ip->i_extsize = 0;
......
...@@ -73,11 +73,25 @@ xchk_inode_extsize( ...@@ -73,11 +73,25 @@ xchk_inode_extsize(
uint16_t flags) uint16_t flags)
{ {
xfs_failaddr_t fa; xfs_failaddr_t fa;
uint32_t value = be32_to_cpu(dip->di_extsize);
fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags);
mode, flags);
if (fa) if (fa)
xchk_ino_set_corrupt(sc, ino); xchk_ino_set_corrupt(sc, ino);
/*
* XFS allows a sysadmin to change the rt extent size when adding a rt
* section to a filesystem after formatting. If there are any
* directories with extszinherit and rtinherit set, the hint could
* become misaligned with the new rextsize. The verifier doesn't check
* this, because we allow rtinherit directories even without an rt
* device. Flag this as an administrative warning since we will clean
* this up eventually.
*/
if ((flags & XFS_DIFLAG_RTINHERIT) &&
(flags & XFS_DIFLAG_EXTSZINHERIT) &&
value % sc->mp->m_sb.sb_rextsize > 0)
xchk_ino_set_warning(sc, ino);
} }
/* /*
......
...@@ -2763,6 +2763,19 @@ xfs_remove( ...@@ -2763,6 +2763,19 @@ xfs_remove(
error = xfs_droplink(tp, ip); error = xfs_droplink(tp, ip);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
/*
* Point the unlinked child directory's ".." entry to the root
* directory to eliminate back-references to inodes that may
* get freed before the child directory is closed. If the fs
* gets shrunk, this can lead to dirent inode validation errors.
*/
if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
tp->t_mountp->m_sb.sb_rootino, 0);
if (error)
return error;
}
} else { } else {
/* /*
* When removing a non-directory we need to log the parent * When removing a non-directory we need to log the parent
......
...@@ -1065,7 +1065,24 @@ xfs_fill_fsxattr( ...@@ -1065,7 +1065,24 @@ xfs_fill_fsxattr(
fileattr_fill_xflags(fa, xfs_ip2xflags(ip)); fileattr_fill_xflags(fa, xfs_ip2xflags(ip));
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize); if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) {
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
} else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
/*
* Don't let a misaligned extent size hint on a directory
* escape to userspace if it won't pass the setattr checks
* later.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
FS_XFLAG_EXTSZINHERIT);
fa->fsx_extsize = 0;
} else {
fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
}
}
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize); fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize);
fa->fsx_projid = ip->i_projid; fa->fsx_projid = ip->i_projid;
...@@ -1292,10 +1309,10 @@ xfs_ioctl_setattr_check_extsize( ...@@ -1292,10 +1309,10 @@ xfs_ioctl_setattr_check_extsize(
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags); new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
/* /*
* Inode verifiers on older kernels don't check that the extent size * Inode verifiers do not check that the extent size hint is an integer
* hint is an integer multiple of the rt extent size on a directory * multiple of the rt extent size on a directory with both rtinherit
* with both rtinherit and extszinherit flags set. Don't let sysadmins * and extszinherit flags set. Don't let sysadmins misconfigure
* misconfigure directories. * directories.
*/ */
if ((new_diflags & XFS_DIFLAG_RTINHERIT) && if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) { (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
......
...@@ -923,16 +923,41 @@ xfs_growfs_rt( ...@@ -923,16 +923,41 @@ xfs_growfs_rt(
uint8_t *rsum_cache; /* old summary cache */ uint8_t *rsum_cache; /* old summary cache */
sbp = &mp->m_sb; sbp = &mp->m_sb;
/*
* Initial error checking.
*/
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
(nrblocks = in->newblocks) <= sbp->sb_rblocks || /* Needs to have been mounted with an rt device. */
(sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize))) if (!XFS_IS_REALTIME_MOUNT(mp))
return -EINVAL;
/*
* Mount should fail if the rt bitmap/summary files don't load, but
* we'll check anyway.
*/
if (!mp->m_rbmip || !mp->m_rsumip)
return -EINVAL;
/* Shrink not supported. */
if (in->newblocks <= sbp->sb_rblocks)
return -EINVAL;
/* Can only change rt extent size when adding rt volume. */
if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
return -EINVAL;
/* Range check the extent size. */
if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
return -EINVAL; return -EINVAL;
if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
/* Unsupported realtime features. */
if (xfs_sb_version_hasrmapbt(&mp->m_sb) ||
xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
nrblocks = in->newblocks;
error = xfs_sb_validate_fsb_count(sbp, nrblocks);
if (error)
return error; return error;
/* /*
* Read in the last block of the device, make sure it exists. * Read in the last block of the device, make sure it exists.
...@@ -996,7 +1021,8 @@ xfs_growfs_rt( ...@@ -996,7 +1021,8 @@ xfs_growfs_rt(
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0); ((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
bmbno < nrbmblocks; bmbno < nrbmblocks;
bmbno++) { bmbno++) {
xfs_trans_t *tp; struct xfs_trans *tp;
xfs_rfsblock_t nrblocks_step;
*nmp = *mp; *nmp = *mp;
nsbp = &nmp->m_sb; nsbp = &nmp->m_sb;
...@@ -1005,10 +1031,9 @@ xfs_growfs_rt( ...@@ -1005,10 +1031,9 @@ xfs_growfs_rt(
*/ */
nsbp->sb_rextsize = in->extsize; nsbp->sb_rextsize = in->extsize;
nsbp->sb_rbmblocks = bmbno + 1; nsbp->sb_rbmblocks = bmbno + 1;
nsbp->sb_rblocks = nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
XFS_RTMIN(nrblocks, nsbp->sb_rextsize;
nsbp->sb_rbmblocks * NBBY * nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
nsbp->sb_blocksize * nsbp->sb_rextsize);
nsbp->sb_rextents = nsbp->sb_rblocks; nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize); do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
ASSERT(nsbp->sb_rextents != 0); ASSERT(nsbp->sb_rextents != 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment