Commit 8c3c0743 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:
 "As promised last week, this batch changes how xfs interacts with
  memory reclaim; how the log batches and throttles log items; how hard
  writes near ENOSPC will try to squeeze more space out of the
  filesystem; and hopefully fix the last of the umount hangs after a
  catastrophic failure.

  Summary:

   - Validate the realtime geometry in the superblock when mounting

   - Refactor a bunch of tricky flag handling in the log code

   - Flush the CIL more judiciously so that we don't wait until there
     are millions of log items consuming a lot of memory.

   - Throttle transaction commits to prevent the xfs frontend from
     flooding the CIL with too many log items.

   - Account metadata buffers correctly for memory reclaim.

   - Mark slabs properly for memory reclaim. These should help reclaim
     run more effectively when XFS is using a lot of memory.

   - Don't write a garbage log record at unmount time if we're trying to
     trigger summary counter recalculation at next mount.

   - Don't block the AIL on locked dquot/inode buffers; instead trigger
     its backoff mechanism to give the lock holder a chance to finish
     up.

   - Ratelimit writeback flushing when buffered writes encounter ENOSPC.

   - Other minor cleanups.

   - Make reflink a synchronous operation when the fs is mounted with
     wsync or sync, which means that now we force the log to disk to
     record the changes"

* tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (26 commits)
  xfs: reflink should force the log out if mounted with wsync
  xfs: factor out a new xfs_log_force_inode helper
  xfs: fix inode number overflow in ifree cluster helper
  xfs: remove redundant variable assignment in xfs_symlink()
  xfs: ratelimit inode flush on buffered write ENOSPC
  xfs: return locked status of inode buffer on xfsaild push
  xfs: trylock underlying buffer on dquot flush
  xfs: remove unnecessary ternary from xfs_create
  xfs: don't write a corrupt unmount record to force summary counter recalc
  xfs: factor inode lookup from xfs_ifree_cluster
  xfs: tail updates only need to occur when LSN changes
  xfs: factor common AIL item deletion code
  xfs: correctly acount for reclaimable slabs
  xfs: Improve metadata buffer reclaim accountability
  xfs: don't allow log IO to be throttled
  xfs: Throttle commits on delayed background CIL push
  xfs: Lower CIL flush limit for large logs
  xfs: remove some stale comments from the log code
  xfs: refactor unmount record writing
  xfs: merge xlog_commit_record with xlog_write_done
  ...
parents d3e5e977 5833112d
...@@ -328,6 +328,38 @@ xfs_validate_sb_common( ...@@ -328,6 +328,38 @@ xfs_validate_sb_common(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/* Validate the realtime geometry; stolen from xfs_repair */
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
xfs_notice(mp,
"realtime extent sanity check failed");
return -EFSCORRUPTED;
}
if (sbp->sb_rblocks == 0) {
if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) {
xfs_notice(mp,
"realtime zeroed geometry check failed");
return -EFSCORRUPTED;
}
} else {
uint64_t rexts;
uint64_t rbmblocks;
rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize);
rbmblocks = howmany_64(sbp->sb_rextents,
NBBY * sbp->sb_blocksize);
if (sbp->sb_rextents != rexts ||
sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
sbp->sb_rbmblocks != rbmblocks) {
xfs_notice(mp,
"realtime geometry sanity check failed");
return -EFSCORRUPTED;
}
}
if (sbp->sb_unit) { if (sbp->sb_unit) {
if (!xfs_sb_version_hasdalign(sbp) || if (!xfs_sb_version_hasdalign(sbp) ||
sbp->sb_unit > sbp->sb_width || sbp->sb_unit > sbp->sb_width ||
......
...@@ -327,6 +327,9 @@ xfs_buf_free( ...@@ -327,6 +327,9 @@ xfs_buf_free(
__free_page(page); __free_page(page);
} }
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab +=
bp->b_page_count;
} else if (bp->b_flags & _XBF_KMEM) } else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr); kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp); _xfs_buf_free_pages(bp);
...@@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf( ...@@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf(
int __init int __init
xfs_buf_init(void) xfs_buf_init(void)
{ {
xfs_buf_zone = kmem_cache_create("xfs_buf", xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN |
SLAB_HWCACHE_ALIGN, NULL); SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD,
NULL);
if (!xfs_buf_zone) if (!xfs_buf_zone)
goto out; goto out;
......
...@@ -1105,8 +1105,8 @@ xfs_qm_dqflush( ...@@ -1105,8 +1105,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot * Get the buffer containing the on-disk dquot
*/ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp, mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
&xfs_dquot_buf_ops); &bp, &xfs_dquot_buf_ops);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -1177,7 +1177,7 @@ xfs_qm_dqflush( ...@@ -1177,7 +1177,7 @@ xfs_qm_dqflush(
out_unlock: out_unlock:
xfs_dqfunlock(dqp); xfs_dqfunlock(dqp);
return -EIO; return error;
} }
/* /*
......
...@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push( ...@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push(
if (!xfs_buf_delwri_queue(bp, buffer_list)) if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING; rval = XFS_ITEM_FLUSHING;
xfs_buf_relse(bp); xfs_buf_relse(bp);
} } else if (error == -EAGAIN)
rval = XFS_ITEM_LOCKED;
spin_lock(&lip->li_ailp->ail_lock); spin_lock(&lip->li_ailp->ail_lock);
out_unlock: out_unlock:
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_inode_item.h" #include "xfs_inode_item.h"
#include "xfs_icache.h" #include "xfs_icache.h"
#include "xfs_log.h"
#include "xfs_pnfs.h" #include "xfs_pnfs.h"
/* /*
...@@ -221,18 +220,7 @@ STATIC int ...@@ -221,18 +220,7 @@ STATIC int
xfs_fs_nfs_commit_metadata( xfs_fs_nfs_commit_metadata(
struct inode *inode) struct inode *inode)
{ {
struct xfs_inode *ip = XFS_I(inode); return xfs_log_force_inode(XFS_I(inode));
struct xfs_mount *mp = ip->i_mount;
xfs_lsn_t lsn = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
} }
const struct export_operations xfs_export_operations = { const struct export_operations xfs_export_operations = {
......
...@@ -80,19 +80,9 @@ xfs_dir_fsync( ...@@ -80,19 +80,9 @@ xfs_dir_fsync(
int datasync) int datasync)
{ {
struct xfs_inode *ip = XFS_I(file->f_mapping->host); struct xfs_inode *ip = XFS_I(file->f_mapping->host);
struct xfs_mount *mp = ip->i_mount;
xfs_lsn_t lsn = 0;
trace_xfs_dir_fsync(ip); trace_xfs_dir_fsync(ip);
return xfs_log_force_inode(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
} }
STATIC int STATIC int
...@@ -1069,7 +1059,11 @@ xfs_file_remap_range( ...@@ -1069,7 +1059,11 @@ xfs_file_remap_range(
ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
remap_flags); remap_flags);
if (ret)
goto out_unlock;
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_log_force_inode(dest);
out_unlock: out_unlock:
xfs_reflink_remap_unlock(file_in, file_out); xfs_reflink_remap_unlock(file_in, file_out);
if (ret) if (ret)
......
...@@ -1200,8 +1200,7 @@ xfs_create( ...@@ -1200,8 +1200,7 @@ xfs_create(
unlock_dp_on_error = false; unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino, error = xfs_dir_createname(tp, dp, name, ip->i_ino,
resblks ? resblks - XFS_IALLOC_SPACE_RES(mp));
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) { if (error) {
ASSERT(error != -ENOSPC); ASSERT(error != -ENOSPC);
goto out_trans_cancel; goto out_trans_cancel;
...@@ -2503,6 +2502,88 @@ xfs_iunlink_remove( ...@@ -2503,6 +2502,88 @@ xfs_iunlink_remove(
return error; return error;
} }
/*
* Look up the inode number specified and mark it stale if it is found. If it is
* dirty, return the inode so it can be attached to the cluster buffer so it can
* be processed appropriately when the cluster free transaction completes.
*/
static struct xfs_inode *
xfs_ifree_get_one_inode(
struct xfs_perag *pag,
struct xfs_inode *free_ip,
xfs_ino_t inum)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_inode *ip;
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
/* Inode not in memory, nothing to do */
if (!ip)
goto out_rcu_unlock;
/*
* because this is an RCU protected lookup, we could find a recently
* freed or even reallocated inode during the lookup. We need to check
* under the i_flags_lock for a valid inode here. Skip it if it is not
* valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
spin_unlock(&ip->i_flags_lock);
goto out_rcu_unlock;
}
spin_unlock(&ip->i_flags_lock);
/*
* Don't try to lock/unlock the current inode, but we _cannot_ skip the
* other inodes that we did not find in the list attached to the buffer
* and are not already marked stale. If we can't lock it, back off and
* retry.
*/
if (ip != free_ip) {
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
rcu_read_unlock();
delay(1);
goto retry;
}
/*
* Check the inode number again in case we're racing with
* freeing in xfs_reclaim_inode(). See the comments in that
* function for more information as to why the initial check is
* not sufficient.
*/
if (ip->i_ino != inum) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_rcu_unlock;
}
}
rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
/*
* We don't need to attach clean inodes or those only with unlogged
* changes (which we throw away, anyway).
*/
if (!ip->i_itemp || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_no_inode;
}
return ip;
out_rcu_unlock:
rcu_read_unlock();
out_no_inode:
return NULL;
}
/* /*
* A big issue when freeing the inode cluster is that we _cannot_ skip any * A big issue when freeing the inode cluster is that we _cannot_ skip any
* inodes that are in memory - they all must be marked stale and attached to * inodes that are in memory - they all must be marked stale and attached to
...@@ -2603,77 +2684,11 @@ xfs_ifree_cluster( ...@@ -2603,77 +2684,11 @@ xfs_ifree_cluster(
* even trying to lock them. * even trying to lock them.
*/ */
for (i = 0; i < igeo->inodes_per_cluster; i++) { for (i = 0; i < igeo->inodes_per_cluster; i++) {
retry: ip = xfs_ifree_get_one_inode(pag, free_ip, inum + i);
rcu_read_lock(); if (!ip)
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
/* Inode not in memory, nothing to do */
if (!ip) {
rcu_read_unlock();
continue; continue;
}
/*
* because this is an RCU protected lookup, we could
* find a recently freed or even reallocated inode
* during the lookup. We need to check under the
* i_flags_lock for a valid inode here. Skip it if it
* is not valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != inum + i ||
__xfs_iflags_test(ip, XFS_ISTALE)) {
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
continue;
}
spin_unlock(&ip->i_flags_lock);
/*
* Don't try to lock/unlock the current inode, but we
* _cannot_ skip the other inodes that we did not find
* in the list attached to the buffer and are not
* already marked stale. If we can't lock it, back off
* and retry.
*/
if (ip != free_ip) {
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
rcu_read_unlock();
delay(1);
goto retry;
}
/*
* Check the inode number again in case we're
* racing with freeing in xfs_reclaim_inode().
* See the comments in that function for more
* information as to why the initial check is
* not sufficient.
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
rcu_read_unlock();
continue;
}
}
rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
/*
* we don't need to attach clean inodes or those only
* with unlogged changes (which we throw away, anyway).
*/
iip = ip->i_itemp; iip = ip->i_itemp;
if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
iip->ili_last_fields = iip->ili_fields; iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0; iip->ili_fields = 0;
iip->ili_fsync_fields = 0; iip->ili_fsync_fields = 0;
...@@ -3930,3 +3945,22 @@ xfs_irele( ...@@ -3930,3 +3945,22 @@ xfs_irele(
trace_xfs_irele(ip, _RET_IP_); trace_xfs_irele(ip, _RET_IP_);
iput(VFS_I(ip)); iput(VFS_I(ip));
} }
/*
* Ensure all commited transactions touching the inode are written to the log.
*/
int
xfs_log_force_inode(
struct xfs_inode *ip)
{
xfs_lsn_t lsn = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
}
...@@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **, ...@@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **,
struct xfs_inode *, int, xfs_fsize_t, int); struct xfs_inode *, int, xfs_fsize_t, int);
void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iext_realloc(xfs_inode_t *, int, int);
int xfs_log_force_inode(struct xfs_inode *ip);
void xfs_iunpin_wait(xfs_inode_t *); void xfs_iunpin_wait(xfs_inode_t *);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
......
...@@ -552,7 +552,8 @@ xfs_inode_item_push( ...@@ -552,7 +552,8 @@ xfs_inode_item_push(
if (!xfs_buf_delwri_queue(bp, buffer_list)) if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING; rval = XFS_ITEM_FLUSHING;
xfs_buf_relse(bp); xfs_buf_relse(bp);
} } else if (error == -EAGAIN)
rval = XFS_ITEM_LOCKED;
spin_lock(&lip->li_ailp->ail_lock); spin_lock(&lip->li_ailp->ail_lock);
out_unlock: out_unlock:
...@@ -730,29 +731,27 @@ xfs_iflush_done( ...@@ -730,29 +731,27 @@ xfs_iflush_done(
* holding the lock before removing the inode from the AIL. * holding the lock before removing the inode from the AIL.
*/ */
if (need_ail) { if (need_ail) {
bool mlip_changed = false; xfs_lsn_t tail_lsn = 0;
/* this is an opencoded batch version of xfs_trans_ail_delete */ /* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->ail_lock); spin_lock(&ailp->ail_lock);
list_for_each_entry(blip, &tmp, li_bio_list) { list_for_each_entry(blip, &tmp, li_bio_list) {
if (INODE_ITEM(blip)->ili_logged && if (INODE_ITEM(blip)->ili_logged &&
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
mlip_changed |= xfs_ail_delete_one(ailp, blip); /*
else { * xfs_ail_update_finish() only cares about the
* lsn of the first tail item removed, any
* others will be at the same or higher lsn so
* we just ignore them.
*/
xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
if (!tail_lsn && lsn)
tail_lsn = lsn;
} else {
xfs_clear_li_failed(blip); xfs_clear_li_failed(blip);
} }
} }
xfs_ail_update_finish(ailp, tail_lsn);
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->ail_mount);
} }
/* /*
......
...@@ -24,13 +24,6 @@ ...@@ -24,13 +24,6 @@
kmem_zone_t *xfs_log_ticket_zone; kmem_zone_t *xfs_log_ticket_zone;
/* Local miscellaneous function prototypes */ /* Local miscellaneous function prototypes */
STATIC int
xlog_commit_record(
struct xlog *log,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
xfs_lsn_t *commitlsnp);
STATIC struct xlog * STATIC struct xlog *
xlog_alloc_log( xlog_alloc_log(
struct xfs_mount *mp, struct xfs_mount *mp,
...@@ -66,14 +59,6 @@ xlog_grant_push_ail( ...@@ -66,14 +59,6 @@ xlog_grant_push_ail(
struct xlog *log, struct xlog *log,
int need_bytes); int need_bytes);
STATIC void STATIC void
xlog_regrant_reserve_log_space(
struct xlog *log,
struct xlog_ticket *ticket);
STATIC void
xlog_ungrant_log_space(
struct xlog *log,
struct xlog_ticket *ticket);
STATIC void
xlog_sync( xlog_sync(
struct xlog *log, struct xlog *log,
struct xlog_in_core *iclog); struct xlog_in_core *iclog);
...@@ -478,73 +463,6 @@ xfs_log_reserve( ...@@ -478,73 +463,6 @@ xfs_log_reserve(
return error; return error;
} }
/*
* NOTES:
*
* 1. currblock field gets updated at startup and after in-core logs
* marked as with WANT_SYNC.
*/
/*
* This routine is called when a user of a log manager ticket is done with
* the reservation. If the ticket was ever used, then a commit record for
* the associated transaction is written out as a log operation header with
* no data. The flag XLOG_TIC_INITED is set when the first write occurs with
* a given ticket. If the ticket was one with a permanent reservation, then
* a few operations are done differently. Permanent reservation tickets by
* default don't release the reservation. They just commit the current
* transaction with the belief that the reservation is still needed. A flag
* must be passed in before permanent reservations are actually released.
* When these type of tickets are not released, they need to be set into
* the inited state again. By doing this, a start record will be written
* out when the next write occurs.
*/
xfs_lsn_t
xfs_log_done(
struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
bool regrant)
{
struct xlog *log = mp->m_log;
xfs_lsn_t lsn = 0;
if (XLOG_FORCED_SHUTDOWN(log) ||
/*
* If nothing was ever written, don't write out commit record.
* If we get an error, just continue and give back the log ticket.
*/
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(log, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1;
regrant = false;
}
if (!regrant) {
trace_xfs_log_done_nonperm(log, ticket);
/*
* Release ticket if not permanent reservation or a specific
* request has been made to release a permanent reservation.
*/
xlog_ungrant_log_space(log, ticket);
} else {
trace_xfs_log_done_perm(log, ticket);
xlog_regrant_reserve_log_space(log, ticket);
/* If this ticket was a permanent reservation and we aren't
* trying to release it, reset the inited flags; so next time
* we write, a start record will be written out.
*/
ticket->t_flags |= XLOG_TIC_INITED;
}
xfs_log_ticket_put(ticket);
return lsn;
}
static bool static bool
__xlog_state_release_iclog( __xlog_state_release_iclog(
struct xlog *log, struct xlog *log,
...@@ -869,32 +787,44 @@ xlog_wait_on_iclog( ...@@ -869,32 +787,44 @@ xlog_wait_on_iclog(
} }
/* /*
* Final log writes as part of unmount. * Write out an unmount record using the ticket provided. We have to account for
* * the data space used in the unmount ticket as this write is not done from a
* Mark the filesystem clean as unmount happens. Note that during relocation * transaction context that has already done the accounting for us.
* this routine needs to be executed as part of source-bag while the
* deallocation must not be done until source-end.
*/ */
static int
/* Actually write the unmount record to disk. */ xlog_write_unmount_record(
static void struct xlog *log,
xfs_log_write_unmount_record( struct xlog_ticket *ticket,
struct xfs_mount *mp) xfs_lsn_t *lsn,
uint flags)
{ {
/* the data section must be 32 bit size aligned */ struct xfs_unmount_log_format ulf = {
struct xfs_unmount_log_format magic = {
.magic = XLOG_UNMOUNT_TYPE, .magic = XLOG_UNMOUNT_TYPE,
}; };
struct xfs_log_iovec reg = { struct xfs_log_iovec reg = {
.i_addr = &magic, .i_addr = &ulf,
.i_len = sizeof(magic), .i_len = sizeof(ulf),
.i_type = XLOG_REG_TYPE_UNMOUNT, .i_type = XLOG_REG_TYPE_UNMOUNT,
}; };
struct xfs_log_vec vec = { struct xfs_log_vec vec = {
.lv_niovecs = 1, .lv_niovecs = 1,
.lv_iovecp = &reg, .lv_iovecp = &reg,
}; };
struct xlog *log = mp->m_log;
/* account for space used by record data */
ticket->t_curr_res -= sizeof(ulf);
return xlog_write(log, &vec, ticket, lsn, NULL, flags, false);
}
/*
* Mark the filesystem clean by writing an unmount record to the head of the
* log.
*/
static void
xlog_unmount_write(
struct xlog *log)
{
struct xfs_mount *mp = log->l_mp;
struct xlog_in_core *iclog; struct xlog_in_core *iclog;
struct xlog_ticket *tic = NULL; struct xlog_ticket *tic = NULL;
xfs_lsn_t lsn; xfs_lsn_t lsn;
...@@ -905,23 +835,7 @@ xfs_log_write_unmount_record( ...@@ -905,23 +835,7 @@ xfs_log_write_unmount_record(
if (error) if (error)
goto out_err; goto out_err;
/* error = xlog_write_unmount_record(log, tic, &lsn, flags);
* If we think the summary counters are bad, clear the unmount header
* flag in the unmount record so that the summary counters will be
* recalculated during log recovery at next mount. Refer to
* xlog_check_unmount_rec for more details.
*/
if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
xfs_alert(mp, "%s: will fix summary counters at next mount",
__func__);
flags &= ~XLOG_UNMOUNT_TRANS;
}
/* remove inited flag, and account for space used */
tic->t_flags = 0;
tic->t_curr_res -= sizeof(magic);
error = xlog_write(log, &vec, tic, &lsn, NULL, flags);
/* /*
* At this point, we're umounting anyway, so there's no point in * At this point, we're umounting anyway, so there's no point in
* transitioning log state to IOERROR. Just continue... * transitioning log state to IOERROR. Just continue...
...@@ -943,8 +857,7 @@ xfs_log_write_unmount_record( ...@@ -943,8 +857,7 @@ xfs_log_write_unmount_record(
if (tic) { if (tic) {
trace_xfs_log_umount_write(log, tic); trace_xfs_log_umount_write(log, tic);
xlog_ungrant_log_space(log, tic); xfs_log_ticket_ungrant(log, tic);
xfs_log_ticket_put(tic);
} }
} }
...@@ -987,8 +900,22 @@ xfs_log_unmount_write( ...@@ -987,8 +900,22 @@ xfs_log_unmount_write(
if (XLOG_FORCED_SHUTDOWN(log)) if (XLOG_FORCED_SHUTDOWN(log))
return; return;
/*
* If we think the summary counters are bad, avoid writing the unmount
* record to force log recovery at next mount, after which the summary
* counters will be recalculated. Refer to xlog_check_unmount_rec for
* more details.
*/
if (XFS_TEST_ERROR(xfs_fs_has_sickness(mp, XFS_SICK_FS_COUNTERS), mp,
XFS_ERRTAG_FORCE_SUMMARY_RECALC)) {
xfs_alert(mp, "%s: will fix summary counters at next mount",
__func__);
return;
}
xfs_log_unmount_verify_iclog(log); xfs_log_unmount_verify_iclog(log);
xfs_log_write_unmount_record(mp); xlog_unmount_write(log);
} }
/* /*
...@@ -1515,20 +1442,17 @@ xlog_alloc_log( ...@@ -1515,20 +1442,17 @@ xlog_alloc_log(
return ERR_PTR(error); return ERR_PTR(error);
} /* xlog_alloc_log */ } /* xlog_alloc_log */
/* /*
* Write out the commit record of a transaction associated with the given * Write out the commit record of a transaction associated with the given
* ticket. Return the lsn of the commit record. * ticket to close off a running log write. Return the lsn of the commit record.
*/ */
STATIC int int
xlog_commit_record( xlog_commit_record(
struct xlog *log, struct xlog *log,
struct xlog_ticket *ticket, struct xlog_ticket *ticket,
struct xlog_in_core **iclog, struct xlog_in_core **iclog,
xfs_lsn_t *commitlsnp) xfs_lsn_t *lsn)
{ {
struct xfs_mount *mp = log->l_mp;
int error;
struct xfs_log_iovec reg = { struct xfs_log_iovec reg = {
.i_addr = NULL, .i_addr = NULL,
.i_len = 0, .i_len = 0,
...@@ -1538,12 +1462,15 @@ xlog_commit_record( ...@@ -1538,12 +1462,15 @@ xlog_commit_record(
.lv_niovecs = 1, .lv_niovecs = 1,
.lv_iovecp = &reg, .lv_iovecp = &reg,
}; };
int error;
if (XLOG_FORCED_SHUTDOWN(log))
return -EIO;
ASSERT_ALWAYS(iclog); error = xlog_write(log, &vec, ticket, lsn, iclog, XLOG_COMMIT_TRANS,
error = xlog_write(log, &vec, ticket, commitlsnp, iclog, false);
XLOG_COMMIT_TRANS);
if (error) if (error)
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
return error; return error;
} }
...@@ -1761,7 +1688,15 @@ xlog_write_iclog( ...@@ -1761,7 +1688,15 @@ xlog_write_iclog(
iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
iclog->ic_bio.bi_end_io = xlog_bio_end_io; iclog->ic_bio.bi_end_io = xlog_bio_end_io;
iclog->ic_bio.bi_private = iclog; iclog->ic_bio.bi_private = iclog;
iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
/*
* We use REQ_SYNC | REQ_IDLE here to tell the block layer the are more
* IOs coming immediately after this one. This prevents the block layer
* writeback throttle from throttling log writes behind background
* metadata writeback and causing priority inversions.
*/
iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC |
REQ_IDLE | REQ_FUA;
if (need_flush) if (need_flush)
iclog->ic_bio.bi_opf |= REQ_PREFLUSH; iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
...@@ -1981,7 +1916,7 @@ xlog_dealloc_log( ...@@ -1981,7 +1916,7 @@ xlog_dealloc_log(
log->l_mp->m_log = NULL; log->l_mp->m_log = NULL;
destroy_workqueue(log->l_ioend_workqueue); destroy_workqueue(log->l_ioend_workqueue);
kmem_free(log); kmem_free(log);
} /* xlog_dealloc_log */ }
/* /*
* Update counters atomically now that memcpy is done. * Update counters atomically now that memcpy is done.
...@@ -2118,23 +2053,21 @@ xlog_print_trans( ...@@ -2118,23 +2053,21 @@ xlog_print_trans(
} }
/* /*
* Calculate the potential space needed by the log vector. Each region gets * Calculate the potential space needed by the log vector. We may need a start
* its own xlog_op_header_t and may need to be double word aligned. * record, and each region gets its own struct xlog_op_header and may need to be
* double word aligned.
*/ */
static int static int
xlog_write_calc_vec_length( xlog_write_calc_vec_length(
struct xlog_ticket *ticket, struct xlog_ticket *ticket,
struct xfs_log_vec *log_vector) struct xfs_log_vec *log_vector,
bool need_start_rec)
{ {
struct xfs_log_vec *lv; struct xfs_log_vec *lv;
int headers = 0; int headers = need_start_rec ? 1 : 0;
int len = 0; int len = 0;
int i; int i;
/* acct for start rec of xact */
if (ticket->t_flags & XLOG_TIC_INITED)
headers++;
for (lv = log_vector; lv; lv = lv->lv_next) { for (lv = log_vector; lv; lv = lv->lv_next) {
/* we don't write ordered log vectors */ /* we don't write ordered log vectors */
if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
...@@ -2156,27 +2089,16 @@ xlog_write_calc_vec_length( ...@@ -2156,27 +2089,16 @@ xlog_write_calc_vec_length(
return len; return len;
} }
/* static void
* If first write for transaction, insert start record We can't be trying to
* commit if we are inited. We can't have any "partial_copy" if we are inited.
*/
static int
xlog_write_start_rec( xlog_write_start_rec(
struct xlog_op_header *ophdr, struct xlog_op_header *ophdr,
struct xlog_ticket *ticket) struct xlog_ticket *ticket)
{ {
if (!(ticket->t_flags & XLOG_TIC_INITED))
return 0;
ophdr->oh_tid = cpu_to_be32(ticket->t_tid); ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
ophdr->oh_clientid = ticket->t_clientid; ophdr->oh_clientid = ticket->t_clientid;
ophdr->oh_len = 0; ophdr->oh_len = 0;
ophdr->oh_flags = XLOG_START_TRANS; ophdr->oh_flags = XLOG_START_TRANS;
ophdr->oh_res2 = 0; ophdr->oh_res2 = 0;
ticket->t_flags &= ~XLOG_TIC_INITED;
return sizeof(struct xlog_op_header);
} }
static xlog_op_header_t * static xlog_op_header_t *
...@@ -2365,13 +2287,14 @@ xlog_write( ...@@ -2365,13 +2287,14 @@ xlog_write(
struct xlog_ticket *ticket, struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn, xfs_lsn_t *start_lsn,
struct xlog_in_core **commit_iclog, struct xlog_in_core **commit_iclog,
uint flags) uint flags,
bool need_start_rec)
{ {
struct xlog_in_core *iclog = NULL; struct xlog_in_core *iclog = NULL;
struct xfs_log_iovec *vecp; struct xfs_log_vec *lv = log_vector;
struct xfs_log_vec *lv; struct xfs_log_iovec *vecp = lv->lv_iovecp;
int index = 0;
int len; int len;
int index;
int partial_copy = 0; int partial_copy = 0;
int partial_copy_len = 0; int partial_copy_len = 0;
int contwr = 0; int contwr = 0;
...@@ -2379,25 +2302,13 @@ xlog_write( ...@@ -2379,25 +2302,13 @@ xlog_write(
int data_cnt = 0; int data_cnt = 0;
int error = 0; int error = 0;
*start_lsn = 0;
len = xlog_write_calc_vec_length(ticket, log_vector);
/* /*
* Region headers and bytes are already accounted for. * If this is a commit or unmount transaction, we don't need a start
* We only need to take into account start records and * record to be written. We do, however, have to account for the
* split regions in this function. * commit or unmount header that gets written. Hence we always have
* to account for an extra xlog_op_header here.
*/ */
if (ticket->t_flags & XLOG_TIC_INITED) ticket->t_curr_res -= sizeof(struct xlog_op_header);
ticket->t_curr_res -= sizeof(xlog_op_header_t);
/*
* Commit record headers need to be accounted for. These
* come in as separate writes so are easy to detect.
*/
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
if (ticket->t_curr_res < 0) { if (ticket->t_curr_res < 0) {
xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES, xfs_alert_tag(log->l_mp, XFS_PTAG_LOGRES,
"ctx ticket reservation ran out. Need to up reservation"); "ctx ticket reservation ran out. Need to up reservation");
...@@ -2405,9 +2316,8 @@ xlog_write( ...@@ -2405,9 +2316,8 @@ xlog_write(
xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
} }
index = 0; len = xlog_write_calc_vec_length(ticket, log_vector, need_start_rec);
lv = log_vector; *start_lsn = 0;
vecp = lv->lv_iovecp;
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
void *ptr; void *ptr;
int log_offset; int log_offset;
...@@ -2431,7 +2341,6 @@ xlog_write( ...@@ -2431,7 +2341,6 @@ xlog_write(
while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
struct xfs_log_iovec *reg; struct xfs_log_iovec *reg;
struct xlog_op_header *ophdr; struct xlog_op_header *ophdr;
int start_rec_copy;
int copy_len; int copy_len;
int copy_off; int copy_off;
bool ordered = false; bool ordered = false;
...@@ -2447,11 +2356,15 @@ xlog_write( ...@@ -2447,11 +2356,15 @@ xlog_write(
ASSERT(reg->i_len % sizeof(int32_t) == 0); ASSERT(reg->i_len % sizeof(int32_t) == 0);
ASSERT((unsigned long)ptr % sizeof(int32_t) == 0); ASSERT((unsigned long)ptr % sizeof(int32_t) == 0);
start_rec_copy = xlog_write_start_rec(ptr, ticket); /*
if (start_rec_copy) { * Before we start formatting log vectors, we need to
record_cnt++; * write a start record. Only do this for the first
* iclog we write to.
*/
if (need_start_rec) {
xlog_write_start_rec(ptr, ticket);
xlog_write_adv_cnt(&ptr, &len, &log_offset, xlog_write_adv_cnt(&ptr, &len, &log_offset,
start_rec_copy); sizeof(struct xlog_op_header));
} }
ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags); ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
...@@ -2483,8 +2396,13 @@ xlog_write( ...@@ -2483,8 +2396,13 @@ xlog_write(
xlog_write_adv_cnt(&ptr, &len, &log_offset, xlog_write_adv_cnt(&ptr, &len, &log_offset,
copy_len); copy_len);
} }
copy_len += start_rec_copy + sizeof(xlog_op_header_t); copy_len += sizeof(struct xlog_op_header);
record_cnt++; record_cnt++;
if (need_start_rec) {
copy_len += sizeof(struct xlog_op_header);
record_cnt++;
need_start_rec = false;
}
data_cnt += contwr ? copy_len : 0; data_cnt += contwr ? copy_len : 0;
error = xlog_write_copy_finish(log, iclog, flags, error = xlog_write_copy_finish(log, iclog, flags,
...@@ -2541,14 +2459,6 @@ xlog_write( ...@@ -2541,14 +2459,6 @@ xlog_write(
return error; return error;
} }
/*****************************************************************************
*
* State Machine functions
*
*****************************************************************************
*/
static void static void
xlog_state_activate_iclog( xlog_state_activate_iclog(
struct xlog_in_core *iclog, struct xlog_in_core *iclog,
...@@ -2909,7 +2819,7 @@ xlog_state_done_syncing( ...@@ -2909,7 +2819,7 @@ xlog_state_done_syncing(
*/ */
wake_up_all(&iclog->ic_write_wait); wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock); spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log); /* also cleans log */ xlog_state_do_callback(log);
} }
/* /*
...@@ -3029,21 +2939,21 @@ xlog_state_get_iclog_space( ...@@ -3029,21 +2939,21 @@ xlog_state_get_iclog_space(
*logoffsetp = log_offset; *logoffsetp = log_offset;
return 0; return 0;
} /* xlog_state_get_iclog_space */ }
/* The first cnt-1 times through here we don't need to /*
* move the grant write head because the permanent * The first cnt-1 times a ticket goes through here we don't need to move the
* reservation has reserved cnt times the unit amount. * grant write head because the permanent reservation has reserved cnt times the
* Release part of current permanent unit reservation and * unit amount. Release part of current permanent unit reservation and reset
* reset current reservation to be one units worth. Also * current reservation to be one units worth. Also move grant reservation head
* move grant reservation head forward. * forward.
*/ */
STATIC void void
xlog_regrant_reserve_log_space( xfs_log_ticket_regrant(
struct xlog *log, struct xlog *log,
struct xlog_ticket *ticket) struct xlog_ticket *ticket)
{ {
trace_xfs_log_regrant_reserve_enter(log, ticket); trace_xfs_log_ticket_regrant(log, ticket);
if (ticket->t_cnt > 0) if (ticket->t_cnt > 0)
ticket->t_cnt--; ticket->t_cnt--;
...@@ -3055,21 +2965,20 @@ xlog_regrant_reserve_log_space( ...@@ -3055,21 +2965,20 @@ xlog_regrant_reserve_log_space(
ticket->t_curr_res = ticket->t_unit_res; ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket); xlog_tic_reset_res(ticket);
trace_xfs_log_regrant_reserve_sub(log, ticket); trace_xfs_log_ticket_regrant_sub(log, ticket);
/* just return if we still have some of the pre-reserved space */ /* just return if we still have some of the pre-reserved space */
if (ticket->t_cnt > 0) if (!ticket->t_cnt) {
return; xlog_grant_add_space(log, &log->l_reserve_head.grant,
ticket->t_unit_res);
trace_xfs_log_ticket_regrant_exit(log, ticket);
xlog_grant_add_space(log, &log->l_reserve_head.grant, ticket->t_curr_res = ticket->t_unit_res;
ticket->t_unit_res); xlog_tic_reset_res(ticket);
}
trace_xfs_log_regrant_reserve_exit(log, ticket);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
} /* xlog_regrant_reserve_log_space */
xfs_log_ticket_put(ticket);
}
/* /*
* Give back the space left from a reservation. * Give back the space left from a reservation.
...@@ -3085,18 +2994,19 @@ xlog_regrant_reserve_log_space( ...@@ -3085,18 +2994,19 @@ xlog_regrant_reserve_log_space(
* space, the count will stay at zero and the only space remaining will be * space, the count will stay at zero and the only space remaining will be
* in the current reservation field. * in the current reservation field.
*/ */
STATIC void void
xlog_ungrant_log_space( xfs_log_ticket_ungrant(
struct xlog *log, struct xlog *log,
struct xlog_ticket *ticket) struct xlog_ticket *ticket)
{ {
int bytes; int bytes;
trace_xfs_log_ticket_ungrant(log, ticket);
if (ticket->t_cnt > 0) if (ticket->t_cnt > 0)
ticket->t_cnt--; ticket->t_cnt--;
trace_xfs_log_ungrant_enter(log, ticket); trace_xfs_log_ticket_ungrant_sub(log, ticket);
trace_xfs_log_ungrant_sub(log, ticket);
/* /*
* If this is a permanent reservation ticket, we may be able to free * If this is a permanent reservation ticket, we may be able to free
...@@ -3111,18 +3021,15 @@ xlog_ungrant_log_space( ...@@ -3111,18 +3021,15 @@ xlog_ungrant_log_space(
xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
trace_xfs_log_ungrant_exit(log, ticket); trace_xfs_log_ticket_ungrant_exit(log, ticket);
xfs_log_space_wake(log->l_mp); xfs_log_space_wake(log->l_mp);
xfs_log_ticket_put(ticket);
} }
/* /*
* Mark the current iclog in the ring as WANT_SYNC and move the current iclog * This routine will mark the current iclog in the ring as WANT_SYNC and move
* pointer to the next iclog in the ring. * the current iclog pointer to the next iclog in the ring.
*
* When called from xlog_state_get_iclog_space(), the exact size of the iclog
* has not yet been determined, all we know is that we have run out of space in
* the current iclog.
*/ */
STATIC void STATIC void
xlog_state_switch_iclogs( xlog_state_switch_iclogs(
...@@ -3167,7 +3074,7 @@ xlog_state_switch_iclogs( ...@@ -3167,7 +3074,7 @@ xlog_state_switch_iclogs(
} }
ASSERT(iclog == log->l_iclog); ASSERT(iclog == log->l_iclog);
log->l_iclog = iclog->ic_next; log->l_iclog = iclog->ic_next;
} /* xlog_state_switch_iclogs */ }
/* /*
* Write out all data in the in-core log as of this exact moment in time. * Write out all data in the in-core log as of this exact moment in time.
...@@ -3374,13 +3281,6 @@ xfs_log_force_lsn( ...@@ -3374,13 +3281,6 @@ xfs_log_force_lsn(
return ret; return ret;
} }
/*****************************************************************************
*
* TICKET functions
*
*****************************************************************************
*/
/* /*
* Free a used ticket when its refcount falls to zero. * Free a used ticket when its refcount falls to zero.
*/ */
...@@ -3529,7 +3429,6 @@ xlog_ticket_alloc( ...@@ -3529,7 +3429,6 @@ xlog_ticket_alloc(
tic->t_ocnt = cnt; tic->t_ocnt = cnt;
tic->t_tid = prandom_u32(); tic->t_tid = prandom_u32();
tic->t_clientid = client; tic->t_clientid = client;
tic->t_flags = XLOG_TIC_INITED;
if (permanent) if (permanent)
tic->t_flags |= XLOG_TIC_PERM_RESERV; tic->t_flags |= XLOG_TIC_PERM_RESERV;
...@@ -3538,13 +3437,6 @@ xlog_ticket_alloc( ...@@ -3538,13 +3437,6 @@ xlog_ticket_alloc(
return tic; return tic;
} }
/******************************************************************************
*
* Log debug routines
*
******************************************************************************
*/
#if defined(DEBUG) #if defined(DEBUG)
/* /*
* Make sure that the destination ptr is within the valid data region of * Make sure that the destination ptr is within the valid data region of
...@@ -3630,7 +3522,7 @@ xlog_verify_tail_lsn( ...@@ -3630,7 +3522,7 @@ xlog_verify_tail_lsn(
if (blocks < BTOBB(iclog->ic_offset) + 1) if (blocks < BTOBB(iclog->ic_offset) + 1)
xfs_emerg(log->l_mp, "%s: ran out of log space", __func__); xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
} }
} /* xlog_verify_tail_lsn */ }
/* /*
* Perform a number of checks on the iclog before writing to disk. * Perform a number of checks on the iclog before writing to disk.
...@@ -3733,7 +3625,7 @@ xlog_verify_iclog( ...@@ -3733,7 +3625,7 @@ xlog_verify_iclog(
} }
ptr += sizeof(xlog_op_header_t) + op_len; ptr += sizeof(xlog_op_header_t) + op_len;
} }
} /* xlog_verify_iclog */ }
#endif #endif
/* /*
......
...@@ -105,10 +105,6 @@ struct xfs_log_item; ...@@ -105,10 +105,6 @@ struct xfs_log_item;
struct xfs_item_ops; struct xfs_item_ops;
struct xfs_trans; struct xfs_trans;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
bool regrant);
int xfs_log_force(struct xfs_mount *mp, uint flags); int xfs_log_force(struct xfs_mount *mp, uint flags);
int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
int *log_forced); int *log_forced);
......
...@@ -668,6 +668,11 @@ xlog_cil_push_work( ...@@ -668,6 +668,11 @@ xlog_cil_push_work(
push_seq = cil->xc_push_seq; push_seq = cil->xc_push_seq;
ASSERT(push_seq <= ctx->sequence); ASSERT(push_seq <= ctx->sequence);
/*
* Wake up any background push waiters now this context is being pushed.
*/
wake_up_all(&ctx->push_wait);
/* /*
* Check if we've anything to push. If there is nothing, then we don't * Check if we've anything to push. If there is nothing, then we don't
* move on to a new sequence number and so we have to be able to push * move on to a new sequence number and so we have to be able to push
...@@ -744,6 +749,7 @@ xlog_cil_push_work( ...@@ -744,6 +749,7 @@ xlog_cil_push_work(
*/ */
INIT_LIST_HEAD(&new_ctx->committing); INIT_LIST_HEAD(&new_ctx->committing);
INIT_LIST_HEAD(&new_ctx->busy_extents); INIT_LIST_HEAD(&new_ctx->busy_extents);
init_waitqueue_head(&new_ctx->push_wait);
new_ctx->sequence = ctx->sequence + 1; new_ctx->sequence = ctx->sequence + 1;
new_ctx->cil = cil; new_ctx->cil = cil;
cil->xc_ctx = new_ctx; cil->xc_ctx = new_ctx;
...@@ -801,7 +807,7 @@ xlog_cil_push_work( ...@@ -801,7 +807,7 @@ xlog_cil_push_work(
lvhdr.lv_iovecp = &lhdr; lvhdr.lv_iovecp = &lhdr;
lvhdr.lv_next = ctx->lv_chain; lvhdr.lv_next = ctx->lv_chain;
error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true);
if (error) if (error)
goto out_abort_free_ticket; goto out_abort_free_ticket;
...@@ -839,10 +845,11 @@ xlog_cil_push_work( ...@@ -839,10 +845,11 @@ xlog_cil_push_work(
} }
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */ error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn);
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false); if (error)
if (commit_lsn == -1) goto out_abort_free_ticket;
goto out_abort;
xfs_log_ticket_ungrant(log, tic);
spin_lock(&commit_iclog->ic_callback_lock); spin_lock(&commit_iclog->ic_callback_lock);
if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
...@@ -875,7 +882,7 @@ xlog_cil_push_work( ...@@ -875,7 +882,7 @@ xlog_cil_push_work(
return; return;
out_abort_free_ticket: out_abort_free_ticket:
xfs_log_ticket_put(tic); xfs_log_ticket_ungrant(log, tic);
out_abort: out_abort:
ASSERT(XLOG_FORCED_SHUTDOWN(log)); ASSERT(XLOG_FORCED_SHUTDOWN(log));
xlog_cil_committed(ctx); xlog_cil_committed(ctx);
...@@ -890,7 +897,7 @@ xlog_cil_push_work( ...@@ -890,7 +897,7 @@ xlog_cil_push_work(
*/ */
static void static void
xlog_cil_push_background( xlog_cil_push_background(
struct xlog *log) struct xlog *log) __releases(cil->xc_ctx_lock)
{ {
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
...@@ -904,14 +911,36 @@ xlog_cil_push_background( ...@@ -904,14 +911,36 @@ xlog_cil_push_background(
* don't do a background push if we haven't used up all the * don't do a background push if we haven't used up all the
* space available yet. * space available yet.
*/ */
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
up_read(&cil->xc_ctx_lock);
return; return;
}
spin_lock(&cil->xc_push_lock); spin_lock(&cil->xc_push_lock);
if (cil->xc_push_seq < cil->xc_current_sequence) { if (cil->xc_push_seq < cil->xc_current_sequence) {
cil->xc_push_seq = cil->xc_current_sequence; cil->xc_push_seq = cil->xc_current_sequence;
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
} }
/*
* Drop the context lock now, we can't hold that if we need to sleep
* because we are over the blocking threshold. The push_lock is still
* held, so blocking threshold sleep/wakeup is still correctly
* serialised here.
*/
up_read(&cil->xc_ctx_lock);
/*
* If we are well over the space limit, throttle the work that is being
* done until the push work on this context has begun.
*/
if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
ASSERT(cil->xc_ctx->space_used < log->l_logsize);
xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
return;
}
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
} }
...@@ -1007,7 +1036,10 @@ xfs_log_commit_cil( ...@@ -1007,7 +1036,10 @@ xfs_log_commit_cil(
if (commit_lsn) if (commit_lsn)
*commit_lsn = xc_commit_lsn; *commit_lsn = xc_commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, regrant); if (regrant && !XLOG_FORCED_SHUTDOWN(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
else
xfs_log_ticket_ungrant(log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_sb(tp);
...@@ -1028,9 +1060,9 @@ xfs_log_commit_cil( ...@@ -1028,9 +1060,9 @@ xfs_log_commit_cil(
if (lip->li_ops->iop_committing) if (lip->li_ops->iop_committing)
lip->li_ops->iop_committing(lip, xc_commit_lsn); lip->li_ops->iop_committing(lip, xc_commit_lsn);
} }
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock); /* xlog_cil_push_background() releases cil->xc_ctx_lock */
xlog_cil_push_background(log);
} }
/* /*
...@@ -1189,6 +1221,7 @@ xlog_cil_init( ...@@ -1189,6 +1221,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents); INIT_LIST_HEAD(&ctx->busy_extents);
init_waitqueue_head(&ctx->push_wait);
ctx->sequence = 1; ctx->sequence = 1;
ctx->cil = cil; ctx->cil = cil;
cil->xc_ctx = ctx; cil->xc_ctx = ctx;
......
...@@ -51,13 +51,11 @@ enum xlog_iclog_state { ...@@ -51,13 +51,11 @@ enum xlog_iclog_state {
}; };
/* /*
* Flags to log ticket * Log ticket flags
*/ */
#define XLOG_TIC_INITED 0x1 /* has been initialized */ #define XLOG_TIC_PERM_RESERV 0x1 /* permanent reservation */
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
#define XLOG_TIC_FLAGS \ #define XLOG_TIC_FLAGS \
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
{ XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
/* /*
...@@ -242,6 +240,7 @@ struct xfs_cil_ctx { ...@@ -242,6 +240,7 @@ struct xfs_cil_ctx {
struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */
struct list_head iclog_entry; struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */ struct list_head committing; /* ctx committing list */
wait_queue_head_t push_wait; /* background push throttle */
struct work_struct discard_endio_work; struct work_struct discard_endio_work;
}; };
...@@ -318,13 +317,53 @@ struct xfs_cil { ...@@ -318,13 +317,53 @@ struct xfs_cil {
* tries to keep 25% of the log free, so we need to keep below that limit or we * tries to keep 25% of the log free, so we need to keep below that limit or we
* risk running out of free log space to start any new transactions. * risk running out of free log space to start any new transactions.
* *
* In order to keep background CIL push efficient, we will set a lower * In order to keep background CIL push efficient, we only need to ensure the
* threshold at which background pushing is attempted without blocking current * CIL is large enough to maintain sufficient in-memory relogging to avoid
* transaction commits. A separate, higher bound defines when CIL pushes are * repeated physical writes of frequently modified metadata. If we allow the CIL
* enforced to ensure we stay within our maximum checkpoint size bounds. * to grow to a substantial fraction of the log, then we may be pinning hundreds
* threshold, yet give us plenty of space for aggregation on large logs. * of megabytes of metadata in memory until the CIL flushes. This can cause
* issues when we are running low on memory - pinned memory cannot be reclaimed,
* and the CIL consumes a lot of memory. Hence we need to set an upper physical
* size limit for the CIL that limits the maximum amount of memory pinned by the
* CIL but does not limit performance by reducing relogging efficiency
* significantly.
*
* As such, the CIL push threshold ends up being the smaller of two thresholds:
* - a threshold large enough that it allows CIL to be pushed and progress to be
* made without excessive blocking of incoming transaction commits. This is
* defined to be 12.5% of the log space - half the 25% push threshold of the
* AIL.
* - small enough that it doesn't pin excessive amounts of memory but maintains
* close to peak relogging efficiency. This is defined to be 16x the iclog
* buffer window (32MB) as measurements have shown this to be roughly the
* point of diminishing performance increases under highly concurrent
* modification workloads.
*
* To prevent the CIL from overflowing upper commit size bounds, we introduce a
* new threshold at which we block committing transactions until the background
* CIL commit commences and switches to a new context. While this is not a hard
* limit, it forces the process committing a transaction to the CIL to block and
* yeild the CPU, giving the CIL push work a chance to be scheduled and start
* work. This prevents a process running lots of transactions from overfilling
* the CIL because it is not yielding the CPU. We set the blocking limit at
* twice the background push space threshold so we keep in line with the AIL
* push thresholds.
*
* Note: this is not a -hard- limit as blocking is applied after the transaction
* is inserted into the CIL and the push has been triggered. It is largely a
* throttling mechanism that allows the CIL push to be scheduled and run. A hard
* limit will be difficult to implement without introducing global serialisation
* in the CIL commit fast path, and it's not at all clear that we actually need
* such hard limits given the ~7 years we've run without a hard limit before
* finding the first situation where a checkpoint size overflow actually
* occurred. Hence the simple throttle, and an ASSERT check to tell us that
* we've overrun the max size.
*/ */
#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) #define XLOG_CIL_SPACE_LIMIT(log) \
min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \
(XLOG_CIL_SPACE_LIMIT(log) * 2)
/* /*
* ticket grant locks, queues and accounting have their own cachlines * ticket grant locks, queues and accounting have their own cachlines
...@@ -439,14 +478,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) ...@@ -439,14 +478,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
void xlog_print_trans(struct xfs_trans *); void xlog_print_trans(struct xfs_trans *);
int int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
xlog_write( struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
struct xlog *log, struct xlog_in_core **commit_iclog, uint flags,
struct xfs_log_vec *log_vector, bool need_start_rec);
struct xlog_ticket *tic, int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn, struct xlog_in_core **iclog, xfs_lsn_t *lsn);
struct xlog_in_core **commit_iclog, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
uint flags); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
/* /*
* When we crack an atomic LSN, we sample it first so that the value will not * When we crack an atomic LSN, we sample it first so that the value will not
......
...@@ -167,6 +167,7 @@ typedef struct xfs_mount { ...@@ -167,6 +167,7 @@ typedef struct xfs_mount {
struct xfs_kobj m_error_meta_kobj; struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */ struct xstats m_stats; /* per-fs stats */
struct ratelimit_state m_flush_inodes_ratelimit;
struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_unwritten_workqueue;
......
...@@ -121,12 +121,11 @@ xfs_qm_dqpurge( ...@@ -121,12 +121,11 @@ xfs_qm_dqpurge(
{ {
struct xfs_mount *mp = dqp->q_mount; struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo; struct xfs_quotainfo *qi = mp->m_quotainfo;
int error = -EAGAIN;
xfs_dqlock(dqp); xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0)
xfs_dqunlock(dqp); goto out_unlock;
return -EAGAIN;
}
dqp->dq_flags |= XFS_DQ_FREEING; dqp->dq_flags |= XFS_DQ_FREEING;
...@@ -139,7 +138,6 @@ xfs_qm_dqpurge( ...@@ -139,7 +138,6 @@ xfs_qm_dqpurge(
*/ */
if (XFS_DQ_IS_DIRTY(dqp)) { if (XFS_DQ_IS_DIRTY(dqp)) {
struct xfs_buf *bp = NULL; struct xfs_buf *bp = NULL;
int error;
/* /*
* We don't care about getting disk errors here. We need * We don't care about getting disk errors here. We need
...@@ -149,6 +147,8 @@ xfs_qm_dqpurge( ...@@ -149,6 +147,8 @@ xfs_qm_dqpurge(
if (!error) { if (!error) {
error = xfs_bwrite(bp); error = xfs_bwrite(bp);
xfs_buf_relse(bp); xfs_buf_relse(bp);
} else if (error == -EAGAIN) {
goto out_unlock;
} }
xfs_dqflock(dqp); xfs_dqflock(dqp);
} }
...@@ -174,6 +174,10 @@ xfs_qm_dqpurge( ...@@ -174,6 +174,10 @@ xfs_qm_dqpurge(
xfs_qm_dqdestroy(dqp); xfs_qm_dqdestroy(dqp);
return 0; return 0;
out_unlock:
xfs_dqunlock(dqp);
return error;
} }
/* /*
......
...@@ -528,6 +528,9 @@ xfs_flush_inodes( ...@@ -528,6 +528,9 @@ xfs_flush_inodes(
{ {
struct super_block *sb = mp->m_super; struct super_block *sb = mp->m_super;
if (!__ratelimit(&mp->m_flush_inodes_ratelimit))
return;
if (down_read_trylock(&sb->s_umount)) { if (down_read_trylock(&sb->s_umount)) {
sync_inodes_sb(sb); sync_inodes_sb(sb);
up_read(&sb->s_umount); up_read(&sb->s_umount);
...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super( ...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super(
if (error) if (error)
goto out_free_names; goto out_free_names;
/*
* Cap the number of invocations of xfs_flush_inodes to 16 for every
* quarter of a second. The magic numbers here were determined by
* observation neither to cause stalls in writeback when there are a
* lot of IO threads and the fs is near ENOSPC, nor cause any fstest
* regressions. YMMV.
*/
ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16);
ratelimit_set_flags(&mp->m_flush_inodes_ratelimit,
RATELIMIT_MSG_ON_RELEASE);
error = xfs_init_mount_workqueues(mp); error = xfs_init_mount_workqueues(mp);
if (error) if (error)
goto out_close_devices; goto out_close_devices;
...@@ -1861,7 +1875,8 @@ xfs_init_zones(void) ...@@ -1861,7 +1875,8 @@ xfs_init_zones(void)
xfs_ili_zone = kmem_cache_create("xfs_ili", xfs_ili_zone = kmem_cache_create("xfs_ili",
sizeof(struct xfs_inode_log_item), 0, sizeof(struct xfs_inode_log_item), 0,
SLAB_MEM_SPREAD, NULL); SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
NULL);
if (!xfs_ili_zone) if (!xfs_ili_zone)
goto out_destroy_inode_zone; goto out_destroy_inode_zone;
......
...@@ -176,7 +176,6 @@ xfs_symlink( ...@@ -176,7 +176,6 @@ xfs_symlink(
return -ENAMETOOLONG; return -ENAMETOOLONG;
ASSERT(pathlen > 0); ASSERT(pathlen > 0);
udqp = gdqp = NULL;
prid = xfs_get_initial_prid(dp); prid = xfs_get_initial_prid(dp);
/* /*
......
...@@ -1001,8 +1001,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, ...@@ -1001,8 +1001,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
DEFINE_EVENT(xfs_loggrant_class, name, \ DEFINE_EVENT(xfs_loggrant_class, name, \
TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \ TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \
TP_ARGS(log, tic)) TP_ARGS(log, tic))
DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
...@@ -1011,12 +1009,13 @@ DEFINE_LOGGRANT_EVENT(xfs_log_reserve); ...@@ -1011,12 +1009,13 @@ DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant); DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
DECLARE_EVENT_CLASS(xfs_log_item_class, DECLARE_EVENT_CLASS(xfs_log_item_class,
TP_PROTO(struct xfs_log_item *lip), TP_PROTO(struct xfs_log_item *lip),
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "xfs_shared.h" #include "xfs_shared.h"
#include "xfs_format.h" #include "xfs_format.h"
#include "xfs_log_format.h" #include "xfs_log_format.h"
#include "xfs_log_priv.h"
#include "xfs_trans_resv.h" #include "xfs_trans_resv.h"
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_extent_busy.h" #include "xfs_extent_busy.h"
...@@ -150,8 +151,9 @@ xfs_trans_reserve( ...@@ -150,8 +151,9 @@ xfs_trans_reserve(
uint blocks, uint blocks,
uint rtextents) uint rtextents)
{ {
int error = 0; struct xfs_mount *mp = tp->t_mountp;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
/* Mark this thread as being in a transaction */ /* Mark this thread as being in a transaction */
current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
...@@ -162,7 +164,7 @@ xfs_trans_reserve( ...@@ -162,7 +164,7 @@ xfs_trans_reserve(
* fail if the count would go below zero. * fail if the count would go below zero.
*/ */
if (blocks > 0) { if (blocks > 0) {
error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
if (error != 0) { if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return -ENOSPC; return -ENOSPC;
...@@ -191,9 +193,9 @@ xfs_trans_reserve( ...@@ -191,9 +193,9 @@ xfs_trans_reserve(
if (tp->t_ticket != NULL) { if (tp->t_ticket != NULL) {
ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); error = xfs_log_regrant(mp, tp->t_ticket);
} else { } else {
error = xfs_log_reserve(tp->t_mountp, error = xfs_log_reserve(mp,
resp->tr_logres, resp->tr_logres,
resp->tr_logcount, resp->tr_logcount,
&tp->t_ticket, XFS_TRANSACTION, &tp->t_ticket, XFS_TRANSACTION,
...@@ -213,7 +215,7 @@ xfs_trans_reserve( ...@@ -213,7 +215,7 @@ xfs_trans_reserve(
* fail if the count would go below zero. * fail if the count would go below zero.
*/ */
if (rtextents > 0) { if (rtextents > 0) {
error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents)); error = xfs_mod_frextents(mp, -((int64_t)rtextents));
if (error) { if (error) {
error = -ENOSPC; error = -ENOSPC;
goto undo_log; goto undo_log;
...@@ -229,7 +231,7 @@ xfs_trans_reserve( ...@@ -229,7 +231,7 @@ xfs_trans_reserve(
*/ */
undo_log: undo_log:
if (resp->tr_logres > 0) { if (resp->tr_logres > 0) {
xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
tp->t_log_res = 0; tp->t_log_res = 0;
tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
...@@ -237,7 +239,7 @@ xfs_trans_reserve( ...@@ -237,7 +239,7 @@ xfs_trans_reserve(
undo_blocks: undo_blocks:
if (blocks > 0) { if (blocks > 0) {
xfs_mod_fdblocks(tp->t_mountp, (int64_t)blocks, rsvd); xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
tp->t_blk_res = 0; tp->t_blk_res = 0;
} }
...@@ -1004,9 +1006,10 @@ __xfs_trans_commit( ...@@ -1004,9 +1006,10 @@ __xfs_trans_commit(
*/ */
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant); if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log))
if (commit_lsn == -1 && !error) xfs_log_ticket_regrant(mp->m_log, tp->t_ticket);
error = -EIO; else
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
...@@ -1065,7 +1068,7 @@ xfs_trans_cancel( ...@@ -1065,7 +1068,7 @@ xfs_trans_cancel(
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket) {
xfs_log_done(mp, tp->t_ticket, NULL, false); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
......
...@@ -109,17 +109,25 @@ xfs_ail_next( ...@@ -109,17 +109,25 @@ xfs_ail_next(
* We need the AIL lock in order to get a coherent read of the lsn of the last * We need the AIL lock in order to get a coherent read of the lsn of the last
* item in the AIL. * item in the AIL.
*/ */
static xfs_lsn_t
__xfs_ail_min_lsn(
struct xfs_ail *ailp)
{
struct xfs_log_item *lip = xfs_ail_min(ailp);
if (lip)
return lip->li_lsn;
return 0;
}
xfs_lsn_t xfs_lsn_t
xfs_ail_min_lsn( xfs_ail_min_lsn(
struct xfs_ail *ailp) struct xfs_ail *ailp)
{ {
xfs_lsn_t lsn = 0; xfs_lsn_t lsn;
struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock); spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp); lsn = __xfs_ail_min_lsn(ailp);
if (lip)
lsn = lip->li_lsn;
spin_unlock(&ailp->ail_lock); spin_unlock(&ailp->ail_lock);
return lsn; return lsn;
...@@ -681,6 +689,28 @@ xfs_ail_push_all_sync( ...@@ -681,6 +689,28 @@ xfs_ail_push_all_sync(
finish_wait(&ailp->ail_empty, &wait); finish_wait(&ailp->ail_empty, &wait);
} }
void
xfs_ail_update_finish(
struct xfs_ail *ailp,
xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
{
struct xfs_mount *mp = ailp->ail_mount;
/* if the tail lsn hasn't changed, don't do updates or wakeups. */
if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
spin_unlock(&ailp->ail_lock);
return;
}
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(mp);
}
/* /*
* xfs_trans_ail_update - bulk AIL insertion operation. * xfs_trans_ail_update - bulk AIL insertion operation.
* *
...@@ -712,7 +742,7 @@ xfs_trans_ail_update_bulk( ...@@ -712,7 +742,7 @@ xfs_trans_ail_update_bulk(
xfs_lsn_t lsn) __releases(ailp->ail_lock) xfs_lsn_t lsn) __releases(ailp->ail_lock)
{ {
struct xfs_log_item *mlip; struct xfs_log_item *mlip;
int mlip_changed = 0; xfs_lsn_t tail_lsn = 0;
int i; int i;
LIST_HEAD(tmp); LIST_HEAD(tmp);
...@@ -727,9 +757,10 @@ xfs_trans_ail_update_bulk( ...@@ -727,9 +757,10 @@ xfs_trans_ail_update_bulk(
continue; continue;
trace_xfs_ail_move(lip, lip->li_lsn, lsn); trace_xfs_ail_move(lip, lip->li_lsn, lsn);
if (mlip == lip && !tail_lsn)
tail_lsn = lip->li_lsn;
xfs_ail_delete(ailp, lip); xfs_ail_delete(ailp, lip);
if (mlip == lip)
mlip_changed = 1;
} else { } else {
trace_xfs_ail_insert(lip, 0, lsn); trace_xfs_ail_insert(lip, 0, lsn);
} }
...@@ -740,23 +771,23 @@ xfs_trans_ail_update_bulk( ...@@ -740,23 +771,23 @@ xfs_trans_ail_update_bulk(
if (!list_empty(&tmp)) if (!list_empty(&tmp))
xfs_ail_splice(ailp, cur, &tmp, lsn); xfs_ail_splice(ailp, cur, &tmp, lsn);
if (mlip_changed) { xfs_ail_update_finish(ailp, tail_lsn);
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(ailp->ail_mount);
} else {
spin_unlock(&ailp->ail_lock);
}
} }
bool /*
* Delete one log item from the AIL.
*
* If this item was at the tail of the AIL, return the LSN of the log item so
* that we can use it to check if the LSN of the tail of the log has moved
* when finishing up the AIL delete process in xfs_ail_update_finish().
*/
xfs_lsn_t
xfs_ail_delete_one( xfs_ail_delete_one(
struct xfs_ail *ailp, struct xfs_ail *ailp,
struct xfs_log_item *lip) struct xfs_log_item *lip)
{ {
struct xfs_log_item *mlip = xfs_ail_min(ailp); struct xfs_log_item *mlip = xfs_ail_min(ailp);
xfs_lsn_t lsn = lip->li_lsn;
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip); xfs_ail_delete(ailp, lip);
...@@ -764,7 +795,9 @@ xfs_ail_delete_one( ...@@ -764,7 +795,9 @@ xfs_ail_delete_one(
clear_bit(XFS_LI_IN_AIL, &lip->li_flags); clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
lip->li_lsn = 0; lip->li_lsn = 0;
return mlip == lip; if (mlip == lip)
return lsn;
return 0;
} }
/** /**
...@@ -792,10 +825,10 @@ void ...@@ -792,10 +825,10 @@ void
xfs_trans_ail_delete( xfs_trans_ail_delete(
struct xfs_ail *ailp, struct xfs_ail *ailp,
struct xfs_log_item *lip, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->ail_lock) int shutdown_type)
{ {
struct xfs_mount *mp = ailp->ail_mount; struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed; xfs_lsn_t tail_lsn;
if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
spin_unlock(&ailp->ail_lock); spin_unlock(&ailp->ail_lock);
...@@ -808,17 +841,8 @@ xfs_trans_ail_delete( ...@@ -808,17 +841,8 @@ xfs_trans_ail_delete(
return; return;
} }
mlip_changed = xfs_ail_delete_one(ailp, lip); tail_lsn = xfs_ail_delete_one(ailp, lip);
if (mlip_changed) { xfs_ail_update_finish(ailp, tail_lsn);
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->ail_mount);
} }
int int
......
...@@ -91,9 +91,11 @@ xfs_trans_ail_update( ...@@ -91,9 +91,11 @@ xfs_trans_ail_update(
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
} }
bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
__releases(ailp->ail_lock);
void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->ail_lock); int shutdown_type);
static inline void static inline void
xfs_trans_ail_remove( xfs_trans_ail_remove(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment