Commit 03dc748b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:
 "The most notable fix here prevents premature reuse of freed metadata
  blocks, and adding the ability to detect accidental nested
  transactions, which are not allowed here.

   - Restore a disused sysctl control knob that was inadvertently
     dropped during the merge window to avoid fstests regressions.

   - Don't speculatively release freed blocks from the busy list until
     we're actually allocating them, which fixes a rare log recovery
     regression.

   - Don't nest transactions when scanning for free space.

   - Add an idiot^Wmaintainer light to detect nested transactions. ;)"

* tag 'xfs-5.12-merge-6' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: use current->journal_info for detecting transaction recursion
  xfs: don't nest transactions when scanning for eofblocks
  xfs: don't reuse busy extents on extent trim
  xfs: restore speculative_cow_prealloc_lifetime sysctl
parents 3ab6608e 756b1c34
...@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem: ...@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
removes unused preallocation from clean inodes and releases removes unused preallocation from clean inodes and releases
the unused space back to the free pool. the unused space back to the free pool.
fs.xfs.speculative_cow_prealloc_lifetime
This is an alias for speculative_prealloc_lifetime.
fs.xfs.error_level (Min: 0 Default: 3 Max: 11) fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
A volume knob for error reporting when internal errors occur. A volume knob for error reporting when internal errors occur.
This will generate detailed messages & backtraces for filesystem This will generate detailed messages & backtraces for filesystem
...@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem: ...@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
Deprecated Sysctls Deprecated Sysctls
================== ==================
=========================== ================ =========================================== ================
Name Removal Schedule Name Removal Schedule
=========================== ================ =========================================== ================
fs.xfs.irix_sgid_inherit September 2025 fs.xfs.irix_sgid_inherit September 2025
fs.xfs.irix_symlink_mode September 2025 fs.xfs.irix_symlink_mode September 2025
=========================== ================ fs.xfs.speculative_cow_prealloc_lifetime September 2025
=========================================== ================
Removed Sysctls Removed Sysctls
......
...@@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) ...@@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
PF_MEMALLOC)) PF_MEMALLOC))
goto redirty; goto redirty;
/*
* Given that we do not allow direct reclaim to call us, we should
* never be called in a recursive filesystem reclaim context.
*/
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
goto redirty;
/* /*
* Is this page beyond the end of the file? * Is this page beyond the end of the file?
* *
......
...@@ -2805,7 +2805,7 @@ xfs_btree_split_worker( ...@@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work); struct xfs_btree_split_args, work);
unsigned long pflags; unsigned long pflags;
unsigned long new_pflags = PF_MEMALLOC_NOFS; unsigned long new_pflags = 0;
/* /*
* we are in a transaction context here, but may also be doing work * we are in a transaction context here, but may also be doing work
...@@ -2817,12 +2817,20 @@ xfs_btree_split_worker( ...@@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags); current_set_flags_nested(&pflags, new_pflags);
xfs_trans_set_context(args->cur->bc_tp);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat); args->key, args->curp, args->stat);
complete(args->done);
xfs_trans_clear_context(args->cur->bc_tp);
current_restore_flags_nested(&pflags, new_pflags); current_restore_flags_nested(&pflags, new_pflags);
/*
* Do not access args after complete() has run here. We don't own args
* and the owner may run and free args before we return here.
*/
complete(args->done);
} }
/* /*
......
...@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc( ...@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
* We hand off the transaction to the completion thread now, so * We hand off the transaction to the completion thread now, so
* clear the flag here. * clear the flag here.
*/ */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_clear_context(tp);
return 0; return 0;
} }
...@@ -125,7 +125,7 @@ xfs_setfilesize_ioend( ...@@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
* thus we need to mark ourselves as being in a transaction manually. * thus we need to mark ourselves as being in a transaction manually.
* Similarly for freeze protection. * Similarly for freeze protection.
*/ */
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_set_context(tp);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */ /* we abort the update if there was an IO error */
...@@ -568,6 +568,12 @@ xfs_vm_writepage( ...@@ -568,6 +568,12 @@ xfs_vm_writepage(
{ {
struct xfs_writepage_ctx wpc = { }; struct xfs_writepage_ctx wpc = { };
if (WARN_ON_ONCE(current->journal_info)) {
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
} }
...@@ -578,6 +584,13 @@ xfs_vm_writepages( ...@@ -578,6 +584,13 @@ xfs_vm_writepages(
{ {
struct xfs_writepage_ctx wpc = { }; struct xfs_writepage_ctx wpc = { };
/*
* Writing back data in a transaction context can result in recursive
* transactions. This is bad, so issue a warning and get out of here.
*/
if (WARN_ON_ONCE(current->journal_info))
return 0;
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
} }
......
...@@ -344,7 +344,6 @@ xfs_extent_busy_trim( ...@@ -344,7 +344,6 @@ xfs_extent_busy_trim(
ASSERT(*len > 0); ASSERT(*len > 0);
spin_lock(&args->pag->pagb_lock); spin_lock(&args->pag->pagb_lock);
restart:
fbno = *bno; fbno = *bno;
flen = *len; flen = *len;
rbp = args->pag->pagb_tree.rb_node; rbp = args->pag->pagb_tree.rb_node;
...@@ -363,19 +362,6 @@ xfs_extent_busy_trim( ...@@ -363,19 +362,6 @@ xfs_extent_busy_trim(
continue; continue;
} }
/*
* If this is a metadata allocation, try to reuse the busy
* extent instead of trimming the allocation.
*/
if (!(args->datatype & XFS_ALLOC_USERDATA) &&
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
busyp, fbno, flen,
false))
goto restart;
continue;
}
if (bbno <= fbno) { if (bbno <= fbno) {
/* start overlap */ /* start overlap */
......
...@@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler( ...@@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler(
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
STATIC int STATIC int
xfs_deprecate_irix_sgid_inherit_proc_handler( xfs_deprecated_dointvec_minmax(
struct ctl_table *ctl, struct ctl_table *ctl,
int write, int write,
void *buffer, void *buffer,
...@@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler( ...@@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler(
loff_t *ppos) loff_t *ppos)
{ {
if (write) { if (write) {
printk_once(KERN_WARNING printk_ratelimited(KERN_WARNING
"XFS: " "%s sysctl option is deprecated.\n", "XFS: %s sysctl option is deprecated.\n",
ctl->procname);
}
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
}
STATIC int
xfs_deprecate_irix_symlink_mode_proc_handler(
struct ctl_table *ctl,
int write,
void *buffer,
size_t *lenp,
loff_t *ppos)
{
if (write) {
printk_once(KERN_WARNING
"XFS: " "%s sysctl option is deprecated.\n",
ctl->procname); ctl->procname);
} }
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
...@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = { ...@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.sgid_inherit.val, .data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler, .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.sgid_inherit.min, .extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max .extra2 = &xfs_params.sgid_inherit.max
}, },
...@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = { ...@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.symlink_mode.val, .data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler, .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.symlink_mode.min, .extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max .extra2 = &xfs_params.symlink_mode.max
}, },
...@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = { ...@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
.extra1 = &xfs_params.blockgc_timer.min, .extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max, .extra2 = &xfs_params.blockgc_timer.max,
}, },
{
.procname = "speculative_cow_prealloc_lifetime",
.data = &xfs_params.blockgc_timer.val,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max,
},
/* please keep this the last entry */ /* please keep this the last entry */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
{ {
......
...@@ -72,6 +72,7 @@ xfs_trans_free( ...@@ -72,6 +72,7 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_); trace_xfs_trans_free(tp, _RET_IP_);
xfs_trans_clear_context(tp);
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super); sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp); xfs_trans_free_dqinfo(tp);
...@@ -123,7 +124,8 @@ xfs_trans_dup( ...@@ -123,7 +124,8 @@ xfs_trans_dup(
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
xfs_trans_switch_context(tp, ntp);
/* move deferred ops over to the new tp */ /* move deferred ops over to the new tp */
xfs_defer_move(ntp, tp); xfs_defer_move(ntp, tp);
...@@ -157,9 +159,6 @@ xfs_trans_reserve( ...@@ -157,9 +159,6 @@ xfs_trans_reserve(
int error = 0; int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
/* Mark this thread as being in a transaction */
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
/* /*
* Attempt to reserve the needed disk blocks by decrementing * Attempt to reserve the needed disk blocks by decrementing
* the number needed from the number available. This will * the number needed from the number available. This will
...@@ -167,10 +166,8 @@ xfs_trans_reserve( ...@@ -167,10 +166,8 @@ xfs_trans_reserve(
*/ */
if (blocks > 0) { if (blocks > 0) {
error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
if (error != 0) { if (error != 0)
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return -ENOSPC; return -ENOSPC;
}
tp->t_blk_res += blocks; tp->t_blk_res += blocks;
} }
...@@ -244,9 +241,6 @@ xfs_trans_reserve( ...@@ -244,9 +241,6 @@ xfs_trans_reserve(
xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
tp->t_blk_res = 0; tp->t_blk_res = 0;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return error; return error;
} }
...@@ -260,6 +254,7 @@ xfs_trans_alloc( ...@@ -260,6 +254,7 @@ xfs_trans_alloc(
struct xfs_trans **tpp) struct xfs_trans **tpp)
{ {
struct xfs_trans *tp; struct xfs_trans *tp;
bool want_retry = true;
int error; int error;
/* /*
...@@ -267,9 +262,11 @@ xfs_trans_alloc( ...@@ -267,9 +262,11 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives * GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite(). * by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/ */
retry:
tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
if (!(flags & XFS_TRANS_NO_WRITECOUNT)) if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super); sb_start_intwrite(mp->m_super);
xfs_trans_set_context(tp);
/* /*
* Zero-reservation ("empty") transactions can't modify anything, so * Zero-reservation ("empty") transactions can't modify anything, so
...@@ -289,7 +286,9 @@ xfs_trans_alloc( ...@@ -289,7 +286,9 @@ xfs_trans_alloc(
tp->t_firstblock = NULLFSBLOCK; tp->t_firstblock = NULLFSBLOCK;
error = xfs_trans_reserve(tp, resp, blocks, rtextents); error = xfs_trans_reserve(tp, resp, blocks, rtextents);
if (error == -ENOSPC) { if (error == -ENOSPC && want_retry) {
xfs_trans_cancel(tp);
/* /*
* We weren't able to reserve enough space for the transaction. * We weren't able to reserve enough space for the transaction.
* Flush the other speculative space allocations to free space. * Flush the other speculative space allocations to free space.
...@@ -297,8 +296,11 @@ xfs_trans_alloc( ...@@ -297,8 +296,11 @@ xfs_trans_alloc(
* other locks. * other locks.
*/ */
error = xfs_blockgc_free_space(mp, NULL); error = xfs_blockgc_free_space(mp, NULL);
if (!error) if (error)
error = xfs_trans_reserve(tp, resp, blocks, rtextents); return error;
want_retry = false;
goto retry;
} }
if (error) { if (error) {
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
...@@ -893,7 +895,6 @@ __xfs_trans_commit( ...@@ -893,7 +895,6 @@ __xfs_trans_commit(
xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free(tp); xfs_trans_free(tp);
/* /*
...@@ -925,7 +926,6 @@ __xfs_trans_commit( ...@@ -925,7 +926,6 @@ __xfs_trans_commit(
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, !!error); xfs_trans_free_items(tp, !!error);
xfs_trans_free(tp); xfs_trans_free(tp);
...@@ -985,9 +985,6 @@ xfs_trans_cancel( ...@@ -985,9 +985,6 @@ xfs_trans_cancel(
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, dirty); xfs_trans_free_items(tp, dirty);
xfs_trans_free(tp); xfs_trans_free(tp);
} }
......
...@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, ...@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
struct xfs_trans **tpp); struct xfs_trans **tpp);
static inline void
xfs_trans_set_context(
struct xfs_trans *tp)
{
ASSERT(current->journal_info == NULL);
tp->t_pflags = memalloc_nofs_save();
current->journal_info = tp;
}
static inline void
xfs_trans_clear_context(
struct xfs_trans *tp)
{
if (current->journal_info == tp) {
memalloc_nofs_restore(tp->t_pflags);
current->journal_info = NULL;
}
}
static inline void
xfs_trans_switch_context(
struct xfs_trans *old_tp,
struct xfs_trans *new_tp)
{
ASSERT(current->journal_info == old_tp);
new_tp->t_pflags = old_tp->t_pflags;
old_tp->t_pflags = 0;
current->journal_info = new_tp;
}
#endif /* __XFS_TRANS_H__ */ #endif /* __XFS_TRANS_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment