Commit 75b9c727 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "This week's pile mitigates some decades-old problems in how extent
  size hints interact with realtime volumes, fixes some failures in
  online shrink, and fixes a problem where directory and symlink
  shrinking on extremely fragmented filesystems could fail.

  The most user-notable change here is to point users at our (new) IRC
  channel on OFTC. Freedom isn't free, it costs folks like you and me;
  and if you don't kowtow, they'll expel everyone and take over your
  channel. (Ok, ok, that didn't fit the song lyrics...)

  Summary:

   - Fix a bug where unmapping operations end earlier than expected,
     which can cause chaos on multi-block directory and symlink shrink
     operations.

   - Fix an erroneous assert that can trigger if we try to transition a
     bmap structure from btree format to extents format with zero
     extents. This was exposed by xfs/538"

* tag 'xfs-5.13-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: bunmapi has unnecessary AG lock ordering issues
  xfs: btree format inode forks can have zero extents
  xfs: add new IRC channel to MAINTAINERS
  xfs: validate extsz hints against rt extent size when rtinherit is set
  xfs: standardize extent size hint validation
  xfs: check free AG space when making per-AG reservations
parents df8c66c4 0fe0bbe0
...@@ -20014,6 +20014,7 @@ F: arch/x86/xen/*swiotlb* ...@@ -20014,6 +20014,7 @@ F: arch/x86/xen/*swiotlb*
F: drivers/xen/*swiotlb* F: drivers/xen/*swiotlb*
XFS FILESYSTEM XFS FILESYSTEM
C: irc://irc.oftc.net/xfs
M: Darrick J. Wong <djwong@kernel.org> M: Darrick J. Wong <djwong@kernel.org>
M: linux-xfs@vger.kernel.org M: linux-xfs@vger.kernel.org
L: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org
......
...@@ -325,10 +325,22 @@ xfs_ag_resv_init( ...@@ -325,10 +325,22 @@ xfs_ag_resv_init(
error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0); error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
if (error2) if (error2)
return error2; return error2;
ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= /*
pag->pagf_freeblks + pag->pagf_flcount); * If there isn't enough space in the AG to satisfy the
* reservation, let the caller know that there wasn't enough
* space. Callers are responsible for deciding what to do
* next, since (in theory) we can stumble along with
* insufficient reservation if data blocks are being freed to
* replenish the AG's free space.
*/
if (!error &&
xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
pag->pagf_freeblks + pag->pagf_flcount)
error = -ENOSPC;
} }
return error; return error;
} }
......
...@@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents( ...@@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents(
ASSERT(cur); ASSERT(cur);
ASSERT(whichfork != XFS_COW_FORK); ASSERT(whichfork != XFS_COW_FORK);
ASSERT(!xfs_need_iread_extents(ifp));
ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
ASSERT(be16_to_cpu(rblock->bb_level) == 1); ASSERT(be16_to_cpu(rblock->bb_level) == 1);
ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1); ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
...@@ -5350,7 +5349,6 @@ __xfs_bunmapi( ...@@ -5350,7 +5349,6 @@ __xfs_bunmapi(
xfs_fsblock_t sum; xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */ xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t max_len; xfs_fileoff_t max_len;
xfs_agnumber_t prev_agno = NULLAGNUMBER, agno;
xfs_fileoff_t end; xfs_fileoff_t end;
struct xfs_iext_cursor icur; struct xfs_iext_cursor icur;
bool done = false; bool done = false;
...@@ -5442,16 +5440,6 @@ __xfs_bunmapi( ...@@ -5442,16 +5440,6 @@ __xfs_bunmapi(
del = got; del = got;
wasdel = isnullstartblock(del.br_startblock); wasdel = isnullstartblock(del.br_startblock);
/*
* Make sure we don't touch multiple AGF headers out of order
* in a single transaction, as that could cause AB-BA deadlocks.
*/
if (!wasdel && !isrt) {
agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
if (prev_agno != NULLAGNUMBER && prev_agno > agno)
break;
prev_agno = agno;
}
if (got.br_startoff < start) { if (got.br_startoff < start) {
del.br_startoff = start; del.br_startoff = start;
del.br_blockcount -= start - got.br_startoff; del.br_blockcount -= start - got.br_startoff;
......
...@@ -559,8 +559,17 @@ xfs_dinode_calc_crc( ...@@ -559,8 +559,17 @@ xfs_dinode_calc_crc(
/* /*
* Validate di_extsize hint. * Validate di_extsize hint.
* *
* The rules are documented at xfs_ioctl_setattr_check_extsize(). * 1. Extent size hint is only valid for directories and regular files.
* These functions must be kept in sync with each other. * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
* 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. Hint cannot be larger than MAXTEXTLEN.
* 5. Can be changed on directories at any time.
* 6. Hint value of 0 turns off hints, clears inode flags.
* 7. Extent size must be a multiple of the appropriate block size.
* For realtime files, this is the rt extent size.
* 8. For non-realtime files, the extent size hint must be limited
* to half the AG size to avoid alignment extending the extent beyond the
* limits of the AG.
*/ */
xfs_failaddr_t xfs_failaddr_t
xfs_inode_validate_extsize( xfs_inode_validate_extsize(
...@@ -580,6 +589,28 @@ xfs_inode_validate_extsize( ...@@ -580,6 +589,28 @@ xfs_inode_validate_extsize(
inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
extsize_bytes = XFS_FSB_TO_B(mp, extsize); extsize_bytes = XFS_FSB_TO_B(mp, extsize);
/*
* This comment describes a historic gap in this verifier function.
*
* On older kernels, the extent size hint verifier doesn't check that
* the extent size hint is an integer multiple of the realtime extent
* size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
* The verifier has always enforced the alignment rule for regular
* files with the REALTIME flag set.
*
* If a directory with a misaligned extent size hint is allowed to
* propagate that hint into a new regular realtime file, the result
* is that the inode cluster buffer verifier will trigger a corruption
* shutdown the next time it is run.
*
* Unfortunately, there could be filesystems with these misconfigured
* directories in the wild, so we cannot add a check to this verifier
* at this time because that will result a new source of directory
* corruption errors when reading an existing filesystem. Instead, we
* permit the misconfiguration to pass through the verifiers so that
* callers of this function can correct and mitigate externally.
*/
if (rt_flag) if (rt_flag)
blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
else else
...@@ -616,8 +647,15 @@ xfs_inode_validate_extsize( ...@@ -616,8 +647,15 @@ xfs_inode_validate_extsize(
/* /*
* Validate di_cowextsize hint. * Validate di_cowextsize hint.
* *
* The rules are documented at xfs_ioctl_setattr_check_cowextsize(). * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
* These functions must be kept in sync with each other. * The inode does not have to have any shared blocks, but it must be a v3.
* 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
* for a directory, the hint is propagated to new files.
* 3. Can be changed on files & directories at any time.
* 4. Hint value of 0 turns off hints, clears inode flags.
* 5. Extent size must be a multiple of the appropriate block size.
* 6. The extent size hint must be limited to half the AG size to avoid
* alignment extending the extent beyond the limits of the AG.
*/ */
xfs_failaddr_t xfs_failaddr_t
xfs_inode_validate_cowextsize( xfs_inode_validate_cowextsize(
......
...@@ -142,6 +142,23 @@ xfs_trans_log_inode( ...@@ -142,6 +142,23 @@ xfs_trans_log_inode(
flags |= XFS_ILOG_CORE; flags |= XFS_ILOG_CORE;
} }
/*
* Inode verifiers on older kernels don't check that the extent size
* hint is an integer multiple of the rt extent size on a directory
* with both rtinherit and extszinherit flags set. If we're logging a
* directory that is misconfigured in this way, clear the hint.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
xfs_info_once(ip->i_mount,
"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
flags |= XFS_ILOG_CORE;
}
/* /*
* Record the specific change for fdatasync optimisation. This allows * Record the specific change for fdatasync optimisation. This allows
* fdatasync to skip log forces for inodes that are only timestamp * fdatasync to skip log forces for inodes that are only timestamp
......
...@@ -690,6 +690,7 @@ xfs_inode_inherit_flags( ...@@ -690,6 +690,7 @@ xfs_inode_inherit_flags(
const struct xfs_inode *pip) const struct xfs_inode *pip)
{ {
unsigned int di_flags = 0; unsigned int di_flags = 0;
xfs_failaddr_t failaddr;
umode_t mode = VFS_I(ip)->i_mode; umode_t mode = VFS_I(ip)->i_mode;
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
...@@ -729,6 +730,24 @@ xfs_inode_inherit_flags( ...@@ -729,6 +730,24 @@ xfs_inode_inherit_flags(
di_flags |= XFS_DIFLAG_FILESTREAM; di_flags |= XFS_DIFLAG_FILESTREAM;
ip->i_diflags |= di_flags; ip->i_diflags |= di_flags;
/*
* Inode verifiers on older kernels only check that the extent size
* hint is an integer multiple of the rt extent size on realtime files.
* They did not check the hint alignment on a directory with both
* rtinherit and extszinherit flags set. If the misaligned hint is
* propagated from a directory into a new realtime file, new file
* allocations will fail due to math errors in the rt allocator and/or
* trip the verifiers. Validate the hint settings in the new file so
* that we don't let broken hints propagate.
*/
failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
VFS_I(ip)->i_mode, ip->i_diflags);
if (failaddr) {
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
}
} }
/* Propagate di_flags2 from a parent inode to a child inode. */ /* Propagate di_flags2 from a parent inode to a child inode. */
...@@ -737,12 +756,22 @@ xfs_inode_inherit_flags2( ...@@ -737,12 +756,22 @@ xfs_inode_inherit_flags2(
struct xfs_inode *ip, struct xfs_inode *ip,
const struct xfs_inode *pip) const struct xfs_inode *pip)
{ {
xfs_failaddr_t failaddr;
if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) { if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE; ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
ip->i_cowextsize = pip->i_cowextsize; ip->i_cowextsize = pip->i_cowextsize;
} }
if (pip->i_diflags2 & XFS_DIFLAG2_DAX) if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
ip->i_diflags2 |= XFS_DIFLAG2_DAX; ip->i_diflags2 |= XFS_DIFLAG2_DAX;
/* Don't let invalid cowextsize hints propagate. */
failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
if (failaddr) {
ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
ip->i_cowextsize = 0;
}
} }
/* /*
......
...@@ -1267,20 +1267,8 @@ xfs_ioctl_setattr_get_trans( ...@@ -1267,20 +1267,8 @@ xfs_ioctl_setattr_get_trans(
} }
/* /*
* extent size hint validation is somewhat cumbersome. Rules are: * Validate a proposed extent size hint. For regular files, the hint can only
* * be changed if no extents are allocated.
* 1. extent size hint is only valid for directories and regular files
* 2. FS_XFLAG_EXTSIZE is only valid for regular files
* 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. can only be changed on regular files if no extents are allocated
* 5. can be changed on directories at any time
* 6. extsize hint of 0 turns off hints, clears inode flags.
* 7. Extent size must be a multiple of the appropriate block size.
* 8. for non-realtime files, the extent size hint must be limited
* to half the AG size to avoid alignment extending the extent beyond the
* limits of the AG.
*
* Please keep this function in sync with xfs_scrub_inode_extsize.
*/ */
static int static int
xfs_ioctl_setattr_check_extsize( xfs_ioctl_setattr_check_extsize(
...@@ -1288,86 +1276,65 @@ xfs_ioctl_setattr_check_extsize( ...@@ -1288,86 +1276,65 @@ xfs_ioctl_setattr_check_extsize(
struct fileattr *fa) struct fileattr *fa)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_extlen_t size; xfs_failaddr_t failaddr;
xfs_fsblock_t extsize_fsb; uint16_t new_diflags;
if (!fa->fsx_valid) if (!fa->fsx_valid)
return 0; return 0;
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents && if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
return -EINVAL; return -EINVAL;
if (fa->fsx_extsize == 0) if (fa->fsx_extsize & mp->m_blockmask)
return 0;
extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
if (extsize_fsb > MAXEXTLEN)
return -EINVAL; return -EINVAL;
if (XFS_IS_REALTIME_INODE(ip) || new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
(fa->fsx_xflags & FS_XFLAG_REALTIME)) {
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; /*
} else { * Inode verifiers on older kernels don't check that the extent size
size = mp->m_sb.sb_blocksize; * hint is an integer multiple of the rt extent size on a directory
if (extsize_fsb > mp->m_sb.sb_agblocks / 2) * with both rtinherit and extszinherit flags set. Don't let sysadmins
* misconfigure directories.
*/
if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
unsigned int rtextsize_bytes;
rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
if (fa->fsx_extsize % rtextsize_bytes)
return -EINVAL; return -EINVAL;
} }
if (fa->fsx_extsize % size) failaddr = xfs_inode_validate_extsize(ip->i_mount,
return -EINVAL; XFS_B_TO_FSB(mp, fa->fsx_extsize),
VFS_I(ip)->i_mode, new_diflags);
return 0; return failaddr != NULL ? -EINVAL : 0;
} }
/*
* CoW extent size hint validation rules are:
*
* 1. CoW extent size hint can only be set if reflink is enabled on the fs.
* The inode does not have to have any shared blocks, but it must be a v3.
* 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
* for a directory, the hint is propagated to new files.
* 3. Can be changed on files & directories at any time.
* 4. CoW extsize hint of 0 turns off hints, clears inode flags.
* 5. Extent size must be a multiple of the appropriate block size.
* 6. The extent size hint must be limited to half the AG size to avoid
* alignment extending the extent beyond the limits of the AG.
*
* Please keep this function in sync with xfs_scrub_inode_cowextsize.
*/
static int static int
xfs_ioctl_setattr_check_cowextsize( xfs_ioctl_setattr_check_cowextsize(
struct xfs_inode *ip, struct xfs_inode *ip,
struct fileattr *fa) struct fileattr *fa)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_extlen_t size; xfs_failaddr_t failaddr;
xfs_fsblock_t cowextsize_fsb; uint64_t new_diflags2;
uint16_t new_diflags;
if (!fa->fsx_valid) if (!fa->fsx_valid)
return 0; return 0;
if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) if (fa->fsx_cowextsize & mp->m_blockmask)
return 0;
if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb))
return -EINVAL; return -EINVAL;
if (fa->fsx_cowextsize == 0) new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
return 0; new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); failaddr = xfs_inode_validate_cowextsize(ip->i_mount,
if (cowextsize_fsb > MAXEXTLEN) XFS_B_TO_FSB(mp, fa->fsx_cowextsize),
return -EINVAL; VFS_I(ip)->i_mode, new_diflags, new_diflags2);
return failaddr != NULL ? -EINVAL : 0;
size = mp->m_sb.sb_blocksize;
if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
return -EINVAL;
if (fa->fsx_cowextsize % size)
return -EINVAL;
return 0;
} }
static int static int
......
...@@ -73,6 +73,8 @@ do { \ ...@@ -73,6 +73,8 @@ do { \
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
#define xfs_notice_once(dev, fmt, ...) \ #define xfs_notice_once(dev, fmt, ...) \
xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__) xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__)
#define xfs_info_once(dev, fmt, ...) \
xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__)
void assfail(struct xfs_mount *mp, char *expr, char *f, int l); void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
void asswarn(struct xfs_mount *mp, char *expr, char *f, int l); void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment