Commit 88e88374 authored by Alex Elder's avatar Alex Elder

Merge branch 'delayed-logging-for-2.6.35' into for-linus

parents 7e125f7b ccf7c23f
This diff is collapsed.
......@@ -77,6 +77,7 @@ xfs-y += xfs_alloc.o \
xfs_itable.o \
xfs_dfrag.o \
xfs_log.o \
xfs_log_cil.o \
xfs_log_recover.o \
xfs_mount.o \
xfs_mru_cache.o \
......
......@@ -37,6 +37,7 @@
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
......@@ -850,6 +851,12 @@ xfs_buf_lock_value(
* Note that this in no way locks the underlying pages, so it is only
* useful for synchronizing concurrent use of buffer objects, not for
* synchronizing independent access to the underlying pages.
*
* If we come across a stale, pinned, locked buffer, we know that we
* are being asked to lock a buffer that has been reallocated. Because
* it is pinned, we know that the log has not been pushed to disk and
* hence it will still be locked. Rather than sleeping until someone
* else pushes the log, push it ourselves before trying to get the lock.
*/
void
xfs_buf_lock(
......@@ -857,6 +864,8 @@ xfs_buf_lock(
{
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
xfs_log_force(bp->b_mount, 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
down(&bp->b_sema);
......
......@@ -19,6 +19,7 @@
#include "xfs_dmapi.h"
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_quota.h"
......
......@@ -119,6 +119,8 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
/*
* Table driven mount option parser.
......@@ -374,6 +376,13 @@ xfs_parseargs(
mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DMI)) {
mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
cmn_err(CE_WARN,
"Enabling EXPERIMENTAL delayed logging feature "
"- use at your own risk.\n");
} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
} else if (!strcmp(this_char, "ihashsize")) {
cmn_err(CE_WARN,
"XFS: ihashsize no longer used, option is deprecated.");
......@@ -535,6 +544,7 @@ xfs_showargs(
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
{ XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
......@@ -1755,7 +1765,7 @@ xfs_init_zones(void)
* but it is much faster.
*/
xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
(((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
NBWORD) * sizeof(int))), "xfs_buf_item");
if (!xfs_buf_item_zone)
goto out_destroy_trans_zone;
......
......@@ -1059,83 +1059,112 @@ TRACE_EVENT(xfs_bunmap,
);
#define XFS_BUSY_SYNC \
{ 0, "async" }, \
{ 1, "sync" }
TRACE_EVENT(xfs_alloc_busy,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, int slot),
TP_ARGS(mp, agno, agbno, len, slot),
TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, int sync),
TP_ARGS(trans, agno, agbno, len, sync),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(struct xfs_trans *, tp)
__field(int, tid)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(int, slot)
__field(int, sync)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->dev = trans->t_mountp->m_super->s_dev;
__entry->tp = trans;
__entry->tid = trans->t_ticket->t_tid;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->slot = slot;
__entry->sync = sync;
),
TP_printk("dev %d:%d agno %u agbno %u len %u slot %d",
TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tp,
__entry->tid,
__entry->agno,
__entry->agbno,
__entry->len,
__entry->slot)
__print_symbolic(__entry->sync, XFS_BUSY_SYNC))
);
#define XFS_BUSY_STATES \
{ 0, "found" }, \
{ 1, "missing" }
TRACE_EVENT(xfs_alloc_unbusy,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
int slot, int found),
TP_ARGS(mp, agno, slot, found),
xfs_agblock_t agbno, xfs_extlen_t len),
TP_ARGS(mp, agno, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(int, slot)
__field(int, found)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->slot = slot;
__entry->found = found;
__entry->agbno = agbno;
__entry->len = len;
),
TP_printk("dev %d:%d agno %u slot %d %s",
TP_printk("dev %d:%d agno %u agbno %u len %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->slot,
__print_symbolic(__entry->found, XFS_BUSY_STATES))
__entry->agbno,
__entry->len)
);
#define XFS_BUSY_STATES \
{ 0, "missing" }, \
{ 1, "found" }
TRACE_EVENT(xfs_alloc_busysearch,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, xfs_lsn_t lsn),
TP_ARGS(mp, agno, agbno, len, lsn),
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len, int found),
TP_ARGS(mp, agno, agbno, len, found),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(xfs_lsn_t, lsn)
__field(int, found)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->lsn = lsn;
__entry->found = found;
),
TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
TP_printk("dev %d:%d agno %u agbno %u len %u %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
__print_symbolic(__entry->found, XFS_BUSY_STATES))
);
TRACE_EVENT(xfs_trans_commit_lsn,
TP_PROTO(struct xfs_trans *trans),
TP_ARGS(trans),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(struct xfs_trans *, tp)
__field(xfs_lsn_t, lsn)
),
TP_fast_assign(
__entry->dev = trans->t_mountp->m_super->s_dev;
__entry->tp = trans;
__entry->lsn = trans->t_commit_lsn;
),
TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->tp,
__entry->lsn)
);
......
......@@ -344,9 +344,9 @@ xfs_qm_init_dquot_blk(
for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
xfs_qm_dqinit_core(curid, type, d);
xfs_trans_dquot_buf(tp, bp,
(type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
XFS_BLI_GDQUOT_BUF)));
(type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
XFS_BLF_GDQUOT_BUF)));
xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
}
......
......@@ -175,14 +175,20 @@ typedef struct xfs_agfl {
} xfs_agfl_t;
/*
* Busy block/extent entry. Used in perag to mark blocks that have been freed
* but whose transactions aren't committed to disk yet.
* Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
* have been freed but whose transactions aren't committed to disk yet.
*
* Note that we use the transaction ID to record the transaction, not the
* transaction structure itself. See xfs_alloc_busy_insert() for details.
*/
typedef struct xfs_perag_busy {
xfs_agblock_t busy_start;
xfs_extlen_t busy_length;
struct xfs_trans *busy_tp; /* transaction that did the free */
} xfs_perag_busy_t;
struct xfs_busy_extent {
struct rb_node rb_node; /* ag by-bno indexed search tree */
struct list_head list; /* transaction busy extent list */
xfs_agnumber_t agno;
xfs_agblock_t bno;
xfs_extlen_t length;
xlog_tid_t tid; /* transaction that created this */
};
/*
* Per-ag incore structure, copies of information in agf and agi,
......@@ -216,7 +222,8 @@ typedef struct xfs_perag {
xfs_agino_t pagl_leftrec;
xfs_agino_t pagl_rightrec;
#ifdef __KERNEL__
spinlock_t pagb_lock; /* lock for pagb_list */
spinlock_t pagb_lock; /* lock for pagb_tree */
struct rb_root pagb_tree; /* ordered tree of busy extents */
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
......@@ -226,7 +233,6 @@ typedef struct xfs_perag {
int pag_ici_reclaimable; /* reclaimable inodes */
#endif
int pagb_count; /* pagb slots in use */
xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
} xfs_perag_t;
/*
......
This diff is collapsed.
......@@ -22,6 +22,7 @@ struct xfs_buf;
struct xfs_mount;
struct xfs_perag;
struct xfs_trans;
struct xfs_busy_extent;
/*
* Freespace allocation types. Argument to xfs_alloc_[v]extent.
......@@ -119,15 +120,13 @@ xfs_alloc_longest_free_extent(struct xfs_mount *mp,
#ifdef __KERNEL__
void
xfs_alloc_mark_busy(xfs_trans_t *tp,
xfs_alloc_busy_insert(xfs_trans_t *tp,
xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len);
void
xfs_alloc_clear_busy(xfs_trans_t *tp,
xfs_agnumber_t ag,
int idx);
xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
#endif /* __KERNEL__ */
......
......@@ -134,7 +134,7 @@ xfs_allocbt_free_block(
* disk. If a busy block is allocated, the iclog is pushed up to the
* LSN that freed the block.
*/
xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
return 0;
}
......
This diff is collapsed.
......@@ -41,22 +41,22 @@ typedef struct xfs_buf_log_format {
* This flag indicates that the buffer contains on disk inodes
* and requires special recovery handling.
*/
#define XFS_BLI_INODE_BUF 0x1
#define XFS_BLF_INODE_BUF 0x1
/*
* This flag indicates that the buffer should not be replayed
* during recovery because its blocks are being freed.
*/
#define XFS_BLI_CANCEL 0x2
#define XFS_BLF_CANCEL 0x2
/*
* This flag indicates that the buffer contains on disk
* user or group dquots and may require special recovery handling.
*/
#define XFS_BLI_UDQUOT_BUF 0x4
#define XFS_BLI_PDQUOT_BUF 0x8
#define XFS_BLI_GDQUOT_BUF 0x10
#define XFS_BLF_UDQUOT_BUF 0x4
#define XFS_BLF_PDQUOT_BUF 0x8
#define XFS_BLF_GDQUOT_BUF 0x10
#define XFS_BLI_CHUNK 128
#define XFS_BLI_SHIFT 7
#define XFS_BLF_CHUNK 128
#define XFS_BLF_SHIFT 7
#define BIT_TO_WORD_SHIFT 5
#define NBWORD (NBBY * sizeof(unsigned int))
......@@ -69,6 +69,7 @@ typedef struct xfs_buf_log_format {
#define XFS_BLI_LOGGED 0x08
#define XFS_BLI_INODE_ALLOC_BUF 0x10
#define XFS_BLI_STALE_INODE 0x20
#define XFS_BLI_INODE_BUF 0x40
#define XFS_BLI_FLAGS \
{ XFS_BLI_HOLD, "HOLD" }, \
......@@ -76,7 +77,8 @@ typedef struct xfs_buf_log_format {
{ XFS_BLI_STALE, "STALE" }, \
{ XFS_BLI_LOGGED, "LOGGED" }, \
{ XFS_BLI_INODE_ALLOC_BUF, "INODE_ALLOC" }, \
{ XFS_BLI_STALE_INODE, "STALE_INODE" }
{ XFS_BLI_STALE_INODE, "STALE_INODE" }, \
{ XFS_BLI_INODE_BUF, "INODE_BUF" }
#ifdef __KERNEL__
......
......@@ -170,7 +170,7 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
va_list ap;
#ifdef DEBUG
xfs_panic_mask |= XFS_PTAG_SHUTDOWN_CORRUPT;
xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
#endif
if (xfs_panic_mask && (xfs_panic_mask & panic_tag)
......
......@@ -54,9 +54,6 @@ STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
STATIC int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog, uint flags);
/* local state machine functions */
STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
......@@ -86,14 +83,6 @@ STATIC int xlog_regrant_write_log_space(xlog_t *log,
STATIC void xlog_ungrant_log_space(xlog_t *log,
xlog_ticket_t *ticket);
/* local ticket functions */
STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
int unit_bytes,
int count,
char clientid,
uint flags);
#if defined(DEBUG)
STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
......@@ -360,6 +349,15 @@ xfs_log_reserve(
ASSERT(flags & XFS_LOG_PERM_RESERV);
internal_ticket = *ticket;
/*
* this is a new transaction on the ticket, so we need to
* change the transaction ID so that the next transaction has a
* different TID in the log. Just add one to the existing tid
* so that we can see chains of rolling transactions in the log
* easily.
*/
internal_ticket->t_tid++;
trace_xfs_log_reserve(log, internal_ticket);
xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
......@@ -367,7 +365,8 @@ xfs_log_reserve(
} else {
/* may sleep if need to allocate more tickets */
internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
client, flags);
client, flags,
KM_SLEEP|KM_MAYFAIL);
if (!internal_ticket)
return XFS_ERROR(ENOMEM);
internal_ticket->t_trans_type = t_type;
......@@ -452,6 +451,13 @@ xfs_log_mount(
/* Normal transactions can now occur */
mp->m_log->l_flags &= ~XLOG_ACTIVE_RECOVERY;
/*
* Now the log has been fully initialised and we know were our
* space grant counters are, we can initialise the permanent ticket
* needed for delayed logging to work.
*/
xlog_cil_init_post_recovery(mp->m_log);
return 0;
out_destroy_ail:
......@@ -658,6 +664,10 @@ xfs_log_item_init(
item->li_ailp = mp->m_ail;
item->li_type = type;
item->li_ops = ops;
item->li_lv = NULL;
INIT_LIST_HEAD(&item->li_ail);
INIT_LIST_HEAD(&item->li_cil);
}
/*
......@@ -1168,6 +1178,9 @@ xlog_alloc_log(xfs_mount_t *mp,
*iclogp = log->l_iclog; /* complete ring */
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
error = xlog_cil_init(log);
if (error)
goto out_free_iclog;
return log;
out_free_iclog:
......@@ -1494,6 +1507,8 @@ xlog_dealloc_log(xlog_t *log)
xlog_in_core_t *iclog, *next_iclog;
int i;
xlog_cil_destroy(log);
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
sv_destroy(&iclog->ic_force_wait);
......@@ -1536,8 +1551,10 @@ xlog_state_finish_copy(xlog_t *log,
* print out info relating to regions written which consume
* the reservation
*/
STATIC void
xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
void
xlog_print_tic_res(
struct xfs_mount *mp,
struct xlog_ticket *ticket)
{
uint i;
uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
......@@ -1637,6 +1654,10 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
"bad-rtype" : res_type_str[r_type-1]),
ticket->t_res_arr[i].r_len);
}
xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
"xfs_log_write: reservation ran out. Need to up reservation");
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
}
/*
......@@ -1865,7 +1886,7 @@ xlog_write_copy_finish(
* we don't update ic_offset until the end when we know exactly how many
* bytes have been written out.
*/
STATIC int
int
xlog_write(
struct log *log,
struct xfs_log_vec *log_vector,
......@@ -1889,23 +1910,27 @@ xlog_write(
*start_lsn = 0;
len = xlog_write_calc_vec_length(ticket, log_vector);
if (ticket->t_curr_res < len) {
xlog_print_tic_res(log->l_mp, ticket);
#ifdef DEBUG
xlog_panic(
"xfs_log_write: reservation ran out. Need to up reservation");
#else
/* Customer configurable panic */
xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp,
"xfs_log_write: reservation ran out. Need to up reservation");
/* If we did not panic, shutdown the filesystem */
xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE);
#endif
}
if (log->l_cilp) {
/*
* Region headers and bytes are already accounted for.
* We only need to take into account start records and
* split regions in this function.
*/
if (ticket->t_flags & XLOG_TIC_INITED)
ticket->t_curr_res -= sizeof(xlog_op_header_t);
/*
* Commit record headers need to be accounted for. These
* come in as separate writes so are easy to detect.
*/
if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
ticket->t_curr_res -= sizeof(xlog_op_header_t);
} else
ticket->t_curr_res -= len;
if (ticket->t_curr_res < 0)
xlog_print_tic_res(log->l_mp, ticket);
index = 0;
lv = log_vector;
vecp = lv->lv_iovecp;
......@@ -3000,6 +3025,8 @@ _xfs_log_force(
XFS_STATS_INC(xs_log_force);
xlog_cil_push(log, 1);
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
......@@ -3149,6 +3176,12 @@ _xfs_log_force_lsn(
XFS_STATS_INC(xs_log_force);
if (log->l_cilp) {
lsn = xlog_cil_push_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
}
try_again:
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
......@@ -3313,22 +3346,30 @@ xfs_log_ticket_get(
return ticket;
}
xlog_tid_t
xfs_log_get_trans_ident(
struct xfs_trans *tp)
{
return tp->t_ticket->t_tid;
}
/*
* Allocate and initialise a new log ticket.
*/
STATIC xlog_ticket_t *
xlog_ticket_t *
xlog_ticket_alloc(
struct log *log,
int unit_bytes,
int cnt,
char client,
uint xflags)
uint xflags,
int alloc_flags)
{
struct xlog_ticket *tic;
uint num_headers;
int iclog_space;
tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
if (!tic)
return NULL;
......@@ -3647,6 +3688,11 @@ xlog_state_ioerror(
* c. nothing new gets queued up after (a) and (b) are done.
* d. if !logerror, flush the iclogs to disk, then seal them off
* for business.
*
* Note: for delayed logging the !logerror case needs to flush the regions
* held in memory out to the iclogs before flushing them to disk. This needs
* to be done before the log is marked as shutdown, otherwise the flush to the
* iclogs will fail.
*/
int
xfs_log_force_umount(
......@@ -3680,6 +3726,16 @@ xfs_log_force_umount(
return 1;
}
retval = 0;
/*
* Flush the in memory commit item list before marking the log as
* being shut down. We need to do it in this order to ensure all the
* completed transactions are flushed to disk with the xfs_log_force()
* call below.
*/
if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
xlog_cil_push(log, 1);
/*
* We must hold both the GRANT lock and the LOG lock,
* before we mark the filesystem SHUTDOWN and wake
......
......@@ -19,7 +19,6 @@
#define __XFS_LOG_H__
/* get lsn fields */
#define CYCLE_LSN(lsn) ((uint)((lsn)>>32))
#define BLOCK_LSN(lsn) ((uint)(lsn))
......@@ -114,6 +113,9 @@ struct xfs_log_vec {
struct xfs_log_vec *lv_next; /* next lv in build list */
int lv_niovecs; /* number of iovecs in lv */
struct xfs_log_iovec *lv_iovecp; /* iovec array */
struct xfs_log_item *lv_item; /* owner */
char *lv_buf; /* formatted buffer */
int lv_buf_len; /* size of formatted buffer */
};
/*
......@@ -134,6 +136,7 @@ struct xlog_in_core;
struct xlog_ticket;
struct xfs_log_item;
struct xfs_item_ops;
struct xfs_trans;
void xfs_log_item_init(struct xfs_mount *mp,
struct xfs_log_item *item,
......@@ -187,9 +190,16 @@ int xfs_log_need_covered(struct xfs_mount *mp);
void xlog_iodone(struct xfs_buf *);
struct xlog_ticket * xfs_log_ticket_get(struct xlog_ticket *ticket);
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
#endif
......
This diff is collapsed.
......@@ -152,8 +152,6 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
shutdown */
typedef __uint32_t xlog_tid_t;
#ifdef __KERNEL__
/*
......@@ -378,6 +376,99 @@ typedef struct xlog_in_core {
#define ic_header ic_data->hic_header
} xlog_in_core_t;
/*
* The CIL context is used to aggregate per-transaction details as well be
* passed to the iclog for checkpoint post-commit processing. After being
* passed to the iclog, another context needs to be allocated for tracking the
* next set of transactions to be aggregated into a checkpoint.
*/
struct xfs_cil;
struct xfs_cil_ctx {
struct xfs_cil *cil;
xfs_lsn_t sequence; /* chkpt sequence # */
xfs_lsn_t start_lsn; /* first LSN of chkpt commit */
xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
struct xlog_ticket *ticket; /* chkpt ticket */
int nvecs; /* number of regions */
int space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
xfs_log_callback_t log_cb; /* completion callback hook. */
struct list_head committing; /* ctx committing list */
};
/*
* Committed Item List structure
*
* This structure is used to track log items that have been committed but not
* yet written into the log. It is used only when the delayed logging mount
* option is enabled.
*
* This structure tracks the list of committing checkpoint contexts so
* we can avoid the problem of having to hold out new transactions during a
* flush until we have a the commit record LSN of the checkpoint. We can
* traverse the list of committing contexts in xlog_cil_push_lsn() to find a
* sequence match and extract the commit LSN directly from there. If the
* checkpoint is still in the process of committing, we can block waiting for
* the commit LSN to be determined as well. This should make synchronous
* operations almost as efficient as the old logging methods.
*/
struct xfs_cil {
struct log *xc_log;
struct list_head xc_cil;
spinlock_t xc_cil_lock;
struct xfs_cil_ctx *xc_ctx;
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
sv_t xc_commit_wait;
};
/*
* The amount of log space we should the CIL to aggregate is difficult to size.
* Whatever we chose we have to make we can get a reservation for the log space
* effectively, that it is large enough to capture sufficient relogging to
* reduce log buffer IO significantly, but it is not too large for the log or
* induces too much latency when writing out through the iclogs. We track both
* space consumed and the number of vectors in the checkpoint context, so we
* need to decide which to use for limiting.
*
* Every log buffer we write out during a push needs a header reserved, which
* is at least one sector and more for v2 logs. Hence we need a reservation of
* at least 512 bytes per 32k of log space just for the LR headers. That means
* 16KB of reservation per megabyte of delayed logging space we will consume,
* plus various headers. The number of headers will vary based on the num of
* io vectors, so limiting on a specific number of vectors is going to result
* in transactions of varying size. IOWs, it is more consistent to track and
* limit space consumed in the log rather than by the number of objects being
* logged in order to prevent checkpoint ticket overruns.
*
* Further, use of static reservations through the log grant mechanism is
* problematic. It introduces a lot of complexity (e.g. reserve grant vs write
* grant) and a significant deadlock potential because regranting write space
* can block on log pushes. Hence if we have to regrant log space during a log
* push, we can deadlock.
*
* However, we can avoid this by use of a dynamic "reservation stealing"
* technique during transaction commit whereby unused reservation space in the
* transaction ticket is transferred to the CIL ctx commit ticket to cover the
* space needed by the checkpoint transaction. This means that we never need to
* specifically reserve space for the CIL checkpoint transaction, nor do we
* need to regrant space once the checkpoint completes. This also means the
* checkpoint transaction ticket is specific to the checkpoint context, rather
* than the CIL itself.
*
* With dynamic reservations, we can basically make up arbitrary limits for the
* checkpoint size so long as they don't violate any other size rules. Hence
* the initial maximum size for the checkpoint transaction will be set to a
* quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
* right now based on the latency of writing out a large amount of data through
* the circular iclog buffers.
*/
#define XLOG_CIL_SPACE_LIMIT(log) \
(min((log->l_logsize >> 2), (8 * 1024 * 1024)))
/*
* The reservation head lsn is not made up of a cycle number and block number.
* Instead, it uses a cycle number and byte number. Logs don't expect to
......@@ -388,6 +479,7 @@ typedef struct log {
/* The following fields don't need locking */
struct xfs_mount *l_mp; /* mount point */
struct xfs_ail *l_ailp; /* AIL log is working with */
struct xfs_cil *l_cilp; /* CIL log is working with */
struct xfs_buf *l_xbuf; /* extra buffer for log
* wrapping */
struct xfs_buftarg *l_targ; /* buftarg of log */
......@@ -438,7 +530,6 @@ typedef struct log {
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
/* common routines */
extern xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
extern int xlog_recover(xlog_t *log);
......@@ -446,6 +537,10 @@ extern int xlog_recover_finish(xlog_t *log);
extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
extern kmem_zone_t *xfs_log_ticket_zone;
struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes,
int count, char client, uint xflags,
int alloc_flags);
static inline void
xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
......@@ -455,6 +550,21 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
*off += bytes;
}
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
int xlog_write(struct log *log, struct xfs_log_vec *log_vector,
struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog, uint flags);
/*
* Committed Item List interfaces
*/
int xlog_cil_init(struct log *log);
void xlog_cil_init_post_recovery(struct log *log);
void xlog_cil_destroy(struct log *log);
int xlog_cil_push(struct log *log, int push_now);
xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
/*
* Unmount record type is used as a pseudo transaction type for the ticket.
* It's value must be outside the range of XFS_TRANS_* values.
......
......@@ -1576,7 +1576,7 @@ xlog_recover_reorder_trans(
switch (ITEM_TYPE(item)) {
case XFS_LI_BUF:
if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
trace_xfs_log_recover_item_reorder_head(log,
trans, item, pass);
list_move(&item->ri_list, &trans->r_itemq);
......@@ -1638,7 +1638,7 @@ xlog_recover_do_buffer_pass1(
/*
* If this isn't a cancel buffer item, then just return.
*/
if (!(flags & XFS_BLI_CANCEL)) {
if (!(flags & XFS_BLF_CANCEL)) {
trace_xfs_log_recover_buf_not_cancel(log, buf_f);
return;
}
......@@ -1696,7 +1696,7 @@ xlog_recover_do_buffer_pass1(
* Check to see whether the buffer being recovered has a corresponding
* entry in the buffer cancel record table. If it does then return 1
* so that it will be cancelled, otherwise return 0. If the buffer is
* actually a buffer cancel item (XFS_BLI_CANCEL is set), then decrement
* actually a buffer cancel item (XFS_BLF_CANCEL is set), then decrement
* the refcount on the entry in the table and remove it from the table
* if this is the last reference.
*
......@@ -1721,7 +1721,7 @@ xlog_check_buffer_cancelled(
* There is nothing in the table built in pass one,
* so this buffer must not be cancelled.
*/
ASSERT(!(flags & XFS_BLI_CANCEL));
ASSERT(!(flags & XFS_BLF_CANCEL));
return 0;
}
......@@ -1733,7 +1733,7 @@ xlog_check_buffer_cancelled(
* There is no corresponding entry in the table built
* in pass one, so this buffer has not been cancelled.
*/
ASSERT(!(flags & XFS_BLI_CANCEL));
ASSERT(!(flags & XFS_BLF_CANCEL));
return 0;
}
......@@ -1752,7 +1752,7 @@ xlog_check_buffer_cancelled(
* one in the table and remove it if this is the
* last reference.
*/
if (flags & XFS_BLI_CANCEL) {
if (flags & XFS_BLF_CANCEL) {
bcp->bc_refcount--;
if (bcp->bc_refcount == 0) {
if (prevp == NULL) {
......@@ -1772,7 +1772,7 @@ xlog_check_buffer_cancelled(
* We didn't find a corresponding entry in the table, so
* return 0 so that the buffer is NOT cancelled.
*/
ASSERT(!(flags & XFS_BLI_CANCEL));
ASSERT(!(flags & XFS_BLF_CANCEL));
return 0;
}
......@@ -1874,8 +1874,8 @@ xlog_recover_do_inode_buffer(
nbits = xfs_contig_bits(data_map, map_size,
bit);
ASSERT(nbits > 0);
reg_buf_offset = bit << XFS_BLI_SHIFT;
reg_buf_bytes = nbits << XFS_BLI_SHIFT;
reg_buf_offset = bit << XFS_BLF_SHIFT;
reg_buf_bytes = nbits << XFS_BLF_SHIFT;
item_index++;
}
......@@ -1889,7 +1889,7 @@ xlog_recover_do_inode_buffer(
}
ASSERT(item->ri_buf[item_index].i_addr != NULL);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLI_CHUNK) == 0);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
ASSERT((reg_buf_offset + reg_buf_bytes) <= XFS_BUF_COUNT(bp));
/*
......@@ -1955,9 +1955,9 @@ xlog_recover_do_reg_buffer(
nbits = xfs_contig_bits(data_map, map_size, bit);
ASSERT(nbits > 0);
ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0);
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
ASSERT(XFS_BUF_COUNT(bp) >=
((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT));
((uint)bit << XFS_BLF_SHIFT)+(nbits<<XFS_BLF_SHIFT));
/*
* Do a sanity check if this is a dquot buffer. Just checking
......@@ -1966,7 +1966,7 @@ xlog_recover_do_reg_buffer(
*/
error = 0;
if (buf_f->blf_flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
if (item->ri_buf[i].i_addr == NULL) {
cmn_err(CE_ALERT,
"XFS: NULL dquot in %s.", __func__);
......@@ -1987,9 +1987,9 @@ xlog_recover_do_reg_buffer(
}
memcpy(xfs_buf_offset(bp,
(uint)bit << XFS_BLI_SHIFT), /* dest */
(uint)bit << XFS_BLF_SHIFT), /* dest */
item->ri_buf[i].i_addr, /* source */
nbits<<XFS_BLI_SHIFT); /* length */
nbits<<XFS_BLF_SHIFT); /* length */
next:
i++;
bit += nbits;
......@@ -2148,11 +2148,11 @@ xlog_recover_do_dquot_buffer(
}
type = 0;
if (buf_f->blf_flags & XFS_BLI_UDQUOT_BUF)
if (buf_f->blf_flags & XFS_BLF_UDQUOT_BUF)
type |= XFS_DQ_USER;
if (buf_f->blf_flags & XFS_BLI_PDQUOT_BUF)
if (buf_f->blf_flags & XFS_BLF_PDQUOT_BUF)
type |= XFS_DQ_PROJ;
if (buf_f->blf_flags & XFS_BLI_GDQUOT_BUF)
if (buf_f->blf_flags & XFS_BLF_GDQUOT_BUF)
type |= XFS_DQ_GROUP;
/*
* This type of quotas was turned off, so ignore this buffer
......@@ -2173,7 +2173,7 @@ xlog_recover_do_dquot_buffer(
* here which overlaps that may be stale.
*
* When meta-data buffers are freed at run time we log a buffer item
* with the XFS_BLI_CANCEL bit set to indicate that previous copies
* with the XFS_BLF_CANCEL bit set to indicate that previous copies
* of the buffer in the log should not be replayed at recovery time.
* This is so that if the blocks covered by the buffer are reused for
* file data before we crash we don't end up replaying old, freed
......@@ -2207,7 +2207,7 @@ xlog_recover_do_buffer_trans(
if (pass == XLOG_RECOVER_PASS1) {
/*
* In this pass we're only looking for buf items
* with the XFS_BLI_CANCEL bit set.
* with the XFS_BLF_CANCEL bit set.
*/
xlog_recover_do_buffer_pass1(log, buf_f);
return 0;
......@@ -2244,7 +2244,7 @@ xlog_recover_do_buffer_trans(
mp = log->l_mp;
buf_flags = XBF_LOCK;
if (!(flags & XFS_BLI_INODE_BUF))
if (!(flags & XFS_BLF_INODE_BUF))
buf_flags |= XBF_MAPPED;
bp = xfs_buf_read(mp->m_ddev_targp, blkno, len, buf_flags);
......@@ -2257,10 +2257,10 @@ xlog_recover_do_buffer_trans(
}
error = 0;
if (flags & XFS_BLI_INODE_BUF) {
if (flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
} else if (flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
(XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
......
......@@ -28,7 +28,7 @@
#define XLOG_RHASH(tid) \
((((__uint32_t)tid)>>XLOG_RHASH_SHIFT) & (XLOG_RHASH_SIZE-1))
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK / 2 + 1)
#define XLOG_MAX_REGIONS_IN_ITEM (XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK / 2 + 1)
/*
......
......@@ -268,6 +268,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
must be synchronous except
for space allocations */
#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
#define XFS_MOUNT_DMAPI (1ULL << 2) /* dmapi is enabled */
#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
......
......@@ -44,6 +44,7 @@
#include "xfs_trans_priv.h"
#include "xfs_trans_space.h"
#include "xfs_inode_item.h"
#include "xfs_trace.h"
kmem_zone_t *xfs_trans_zone;
......@@ -243,9 +244,8 @@ _xfs_trans_alloc(
tp->t_type = type;
tp->t_mountp = mp;
tp->t_items_free = XFS_LIC_NUM_SLOTS;
tp->t_busy_free = XFS_LBC_NUM_SLOTS;
xfs_lic_init(&(tp->t_items));
XFS_LBC_INIT(&(tp->t_busy));
INIT_LIST_HEAD(&tp->t_busy);
return tp;
}
......@@ -255,8 +255,13 @@ _xfs_trans_alloc(
*/
STATIC void
xfs_trans_free(
xfs_trans_t *tp)
struct xfs_trans *tp)
{
struct xfs_busy_extent *busyp, *n;
list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
xfs_alloc_busy_clear(tp->t_mountp, busyp);
atomic_dec(&tp->t_mountp->m_active_trans);
xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp);
......@@ -285,9 +290,8 @@ xfs_trans_dup(
ntp->t_type = tp->t_type;
ntp->t_mountp = tp->t_mountp;
ntp->t_items_free = XFS_LIC_NUM_SLOTS;
ntp->t_busy_free = XFS_LBC_NUM_SLOTS;
xfs_lic_init(&(ntp->t_items));
XFS_LBC_INIT(&(ntp->t_busy));
INIT_LIST_HEAD(&ntp->t_busy);
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(tp->t_ticket != NULL);
......@@ -423,7 +427,6 @@ xfs_trans_reserve(
return error;
}
/*
* Record the indicated change to the given field for application
* to the file system's superblock when the transaction commits.
......@@ -652,7 +655,7 @@ xfs_trans_apply_sb_deltas(
* XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
* still need to update the incore superblock with the changes.
*/
STATIC void
void
xfs_trans_unreserve_and_mod_sb(
xfs_trans_t *tp)
{
......@@ -880,7 +883,7 @@ xfs_trans_fill_vecs(
* they could be immediately flushed and we'd have to race with the flusher
* trying to pull the item from the AIL as we add it.
*/
static void
void
xfs_trans_item_committed(
struct xfs_log_item *lip,
xfs_lsn_t commit_lsn,
......@@ -930,26 +933,6 @@ xfs_trans_item_committed(
IOP_UNPIN(lip);
}
/* Clear all the per-AG busy list items listed in this transaction */
static void
xfs_trans_clear_busy_extents(
struct xfs_trans *tp)
{
xfs_log_busy_chunk_t *lbcp;
xfs_log_busy_slot_t *lbsp;
int i;
for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) {
i = 0;
for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
if (XFS_LBC_ISFREE(lbcp, i))
continue;
xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx);
}
}
xfs_trans_free_busy(tp);
}
/*
* This is typically called by the LM when a transaction has been fully
* committed to disk. It needs to unpin the items which have
......@@ -984,7 +967,6 @@ xfs_trans_committed(
kmem_free(licp);
}
xfs_trans_clear_busy_extents(tp);
xfs_trans_free(tp);
}
......@@ -1012,8 +994,7 @@ xfs_trans_uncommit(
xfs_trans_unreserve_and_mod_sb(tp);
xfs_trans_unreserve_and_mod_dquots(tp);
xfs_trans_free_items(tp, flags);
xfs_trans_free_busy(tp);
xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
xfs_trans_free(tp);
}
......@@ -1075,6 +1056,8 @@ xfs_trans_commit_iclog(
*commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
tp->t_commit_lsn = *commit_lsn;
trace_xfs_trans_commit_lsn(tp);
if (nvec > XFS_TRANS_LOGVEC_COUNT)
kmem_free(log_vector);
......@@ -1161,6 +1144,93 @@ xfs_trans_commit_iclog(
return xfs_log_release_iclog(mp, commit_iclog);
}
/*
* Walk the log items and allocate log vector structures for
* each item large enough to fit all the vectors they require.
* Note that this format differs from the old log vector format in
* that there is no transaction header in these log vectors.
*/
STATIC struct xfs_log_vec *
xfs_trans_alloc_log_vecs(
xfs_trans_t *tp)
{
xfs_log_item_desc_t *lidp;
struct xfs_log_vec *lv = NULL;
struct xfs_log_vec *ret_lv = NULL;
lidp = xfs_trans_first_item(tp);
/* Bail out if we didn't find a log item. */
if (!lidp) {
ASSERT(0);
return NULL;
}
while (lidp != NULL) {
struct xfs_log_vec *new_lv;
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
lidp = xfs_trans_next_item(tp, lidp);
continue;
}
/* Skip items that do not have any vectors for writing */
lidp->lid_size = IOP_SIZE(lidp->lid_item);
if (!lidp->lid_size) {
lidp = xfs_trans_next_item(tp, lidp);
continue;
}
new_lv = kmem_zalloc(sizeof(*new_lv) +
lidp->lid_size * sizeof(struct xfs_log_iovec),
KM_SLEEP);
/* The allocated iovec region lies beyond the log vector. */
new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
new_lv->lv_niovecs = lidp->lid_size;
new_lv->lv_item = lidp->lid_item;
if (!ret_lv)
ret_lv = new_lv;
else
lv->lv_next = new_lv;
lv = new_lv;
lidp = xfs_trans_next_item(tp, lidp);
}
return ret_lv;
}
static int
xfs_trans_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
xfs_lsn_t *commit_lsn,
int flags)
{
struct xfs_log_vec *log_vector;
int error;
/*
* Get each log item to allocate a vector structure for
* the log item to to pass to the log write code. The
* CIL commit code will format the vector and save it away.
*/
log_vector = xfs_trans_alloc_log_vecs(tp);
if (!log_vector)
return ENOMEM;
error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
if (error)
return error;
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
/* xfs_trans_free_items() unlocks them first */
xfs_trans_free_items(tp, *commit_lsn, 0);
xfs_trans_free(tp);
return 0;
}
/*
* xfs_trans_commit
......@@ -1221,7 +1291,11 @@ _xfs_trans_commit(
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
if (mp->m_flags & XFS_MOUNT_DELAYLOG)
error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
else
error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
if (error == ENOMEM) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
error = XFS_ERROR(EIO);
......@@ -1259,8 +1333,7 @@ _xfs_trans_commit(
error = XFS_ERROR(EIO);
}
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0);
xfs_trans_free_busy(tp);
xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
xfs_trans_free(tp);
XFS_STATS_INC(xs_trans_empty);
......@@ -1338,8 +1411,7 @@ xfs_trans_cancel(
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free_items(tp, flags);
xfs_trans_free_busy(tp);
xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
xfs_trans_free(tp);
}
......
......@@ -106,7 +106,8 @@ typedef struct xfs_trans_header {
#define XFS_TRANS_GROWFSRT_FREE 39
#define XFS_TRANS_SWAPEXT 40
#define XFS_TRANS_SB_COUNT 41
#define XFS_TRANS_TYPE_MAX 41
#define XFS_TRANS_CHECKPOINT 42
#define XFS_TRANS_TYPE_MAX 42
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
......@@ -148,6 +149,7 @@ typedef struct xfs_trans_header {
{ XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
{ XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
{ XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
{ XFS_TRANS_CHECKPOINT, "CHECKPOINT" }, \
{ XFS_TRANS_DUMMY1, "DUMMY1" }, \
{ XFS_TRANS_DUMMY2, "DUMMY2" }, \
{ XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
......@@ -813,6 +815,7 @@ struct xfs_log_item_desc;
struct xfs_mount;
struct xfs_trans;
struct xfs_dquot_acct;
struct xfs_busy_extent;
typedef struct xfs_log_item {
struct list_head li_ail; /* AIL pointers */
......@@ -828,6 +831,11 @@ typedef struct xfs_log_item {
/* buffer item iodone */
/* callback func */
struct xfs_item_ops *li_ops; /* function list */
/* delayed logging */
struct list_head li_cil; /* CIL pointers */
struct xfs_log_vec *li_lv; /* active log vector */
xfs_lsn_t li_seq; /* CIL commit seq */
} xfs_log_item_t;
#define XFS_LI_IN_AIL 0x1
......@@ -871,34 +879,6 @@ typedef struct xfs_item_ops {
#define XFS_ITEM_LOCKED 2
#define XFS_ITEM_PUSHBUF 3
/*
* This structure is used to maintain a list of block ranges that have been
* freed in the transaction. The ranges are listed in the perag[] busy list
* between when they're freed and the transaction is committed to disk.
*/
typedef struct xfs_log_busy_slot {
xfs_agnumber_t lbc_ag;
ushort lbc_idx; /* index in perag.busy[] */
} xfs_log_busy_slot_t;
#define XFS_LBC_NUM_SLOTS 31
typedef struct xfs_log_busy_chunk {
struct xfs_log_busy_chunk *lbc_next;
uint lbc_free; /* free slots bitmask */
ushort lbc_unused; /* first unused */
xfs_log_busy_slot_t lbc_busy[XFS_LBC_NUM_SLOTS];
} xfs_log_busy_chunk_t;
#define XFS_LBC_MAX_SLOT (XFS_LBC_NUM_SLOTS - 1)
#define XFS_LBC_FREEMASK ((1U << XFS_LBC_NUM_SLOTS) - 1)
#define XFS_LBC_INIT(cp) ((cp)->lbc_free = XFS_LBC_FREEMASK)
#define XFS_LBC_CLAIM(cp, slot) ((cp)->lbc_free &= ~(1 << (slot)))
#define XFS_LBC_SLOT(cp, slot) (&((cp)->lbc_busy[(slot)]))
#define XFS_LBC_VACANCY(cp) (((cp)->lbc_free) & XFS_LBC_FREEMASK)
#define XFS_LBC_ISFREE(cp, slot) ((cp)->lbc_free & (1 << (slot)))
/*
* This is the type of function which can be given to xfs_trans_callback()
* to be called upon the transaction's commit to disk.
......@@ -950,8 +930,7 @@ typedef struct xfs_trans {
unsigned int t_items_free; /* log item descs free */
xfs_log_item_chunk_t t_items; /* first log item desc chunk */
xfs_trans_header_t t_header; /* header for in-log trans */
unsigned int t_busy_free; /* busy descs free */
xfs_log_busy_chunk_t t_busy; /* busy/async free blocks */
struct list_head t_busy; /* list of busy extents */
unsigned long t_pflags; /* saved process flags state */
} xfs_trans_t;
......@@ -1025,9 +1004,6 @@ int _xfs_trans_commit(xfs_trans_t *,
void xfs_trans_cancel(xfs_trans_t *, int);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
xfs_agnumber_t ag,
xfs_extlen_t idx);
extern kmem_zone_t *xfs_trans_zone;
......
......@@ -114,7 +114,7 @@ _xfs_trans_bjoin(
xfs_buf_item_init(bp, tp->t_mountp);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
if (reset_recur)
bip->bli_recur = 0;
......@@ -511,7 +511,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
/*
......@@ -619,7 +619,7 @@ xfs_trans_bhold(xfs_trans_t *tp,
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_HOLD;
trace_xfs_trans_bhold(bip);
......@@ -641,7 +641,7 @@ xfs_trans_bhold_release(xfs_trans_t *tp,
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(bip->bli_flags & XFS_BLI_HOLD);
bip->bli_flags &= ~XFS_BLI_HOLD;
......@@ -704,7 +704,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
bip->bli_flags &= ~XFS_BLI_STALE;
ASSERT(XFS_BUF_ISSTALE(bp));
XFS_BUF_UNSTALE(bp);
bip->bli_format.blf_flags &= ~XFS_BLI_CANCEL;
bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
}
lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)bip);
......@@ -762,8 +762,8 @@ xfs_trans_binval(
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_INODE_BUF));
ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(lidp->lid_flags & XFS_LID_DIRTY);
ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
return;
......@@ -774,7 +774,7 @@ xfs_trans_binval(
* in the buf log item. The STALE flag will be used in
* xfs_buf_item_unpin() to determine if it should clean up
* when the last reference to the buf item is given up.
* We set the XFS_BLI_CANCEL flag in the buf log format structure
* We set the XFS_BLF_CANCEL flag in the buf log format structure
* and log the buf item. This will be used at recovery time
* to determine that copies of the buffer in the log before
* this should not be replayed.
......@@ -792,9 +792,9 @@ xfs_trans_binval(
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_STALE(bp);
bip->bli_flags |= XFS_BLI_STALE;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_DIRTY);
bip->bli_format.blf_flags &= ~XFS_BLI_INODE_BUF;
bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
memset((char *)(bip->bli_format.blf_data_map), 0,
(bip->bli_format.blf_map_size * sizeof(uint)));
lidp->lid_flags |= XFS_LID_DIRTY;
......@@ -802,16 +802,16 @@ xfs_trans_binval(
}
/*
* This call is used to indicate that the buffer contains on-disk
* inodes which must be handled specially during recovery. They
* require special handling because only the di_next_unlinked from
* the inodes in the buffer should be recovered. The rest of the
* data in the buffer is logged via the inodes themselves.
* This call is used to indicate that the buffer contains on-disk inodes which
* must be handled specially during recovery. They require special handling
* because only the di_next_unlinked from the inodes in the buffer should be
* recovered. The rest of the data in the buffer is logged via the inodes
* themselves.
*
* All we do is set the XFS_BLI_INODE_BUF flag in the buffer's log
* format structure so that we'll know what to do at recovery time.
* All we do is set the XFS_BLI_INODE_BUF flag in the items flags so it can be
* transferred to the buffer's log format structure so that we'll know what to
* do at recovery time.
*/
/* ARGSUSED */
void
xfs_trans_inode_buf(
xfs_trans_t *tp,
......@@ -826,7 +826,7 @@ xfs_trans_inode_buf(
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF;
bip->bli_flags |= XFS_BLI_INODE_BUF;
}
/*
......@@ -908,9 +908,9 @@ xfs_trans_dquot_buf(
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
ASSERT(type == XFS_BLI_UDQUOT_BUF ||
type == XFS_BLI_PDQUOT_BUF ||
type == XFS_BLI_GDQUOT_BUF);
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
type == XFS_BLF_GDQUOT_BUF);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
......
......@@ -299,6 +299,7 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
void
xfs_trans_free_items(
xfs_trans_t *tp,
xfs_lsn_t commit_lsn,
int flags)
{
xfs_log_item_chunk_t *licp;
......@@ -311,7 +312,7 @@ xfs_trans_free_items(
* Special case the embedded chunk so we don't free it below.
*/
if (!xfs_lic_are_all_free(licp)) {
(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
(void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
xfs_lic_all_free(licp);
licp->lic_unused = 0;
}
......@@ -322,7 +323,7 @@ xfs_trans_free_items(
*/
while (licp != NULL) {
ASSERT(!xfs_lic_are_all_free(licp));
(void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
(void) xfs_trans_unlock_chunk(licp, 1, abort, commit_lsn);
next_licp = licp->lic_next;
kmem_free(licp);
licp = next_licp;
......@@ -438,112 +439,3 @@ xfs_trans_unlock_chunk(
return freed;
}
/*
* This is called to add the given busy item to the transaction's
* list of busy items. It must find a free busy item descriptor
* or allocate a new one and add the item to that descriptor.
* The function returns a pointer to busy descriptor used to point
* to the new busy entry. The log busy entry will now point to its new
* descriptor with its ???? field.
*/
xfs_log_busy_slot_t *
xfs_trans_add_busy(xfs_trans_t *tp, xfs_agnumber_t ag, xfs_extlen_t idx)
{
xfs_log_busy_chunk_t *lbcp;
xfs_log_busy_slot_t *lbsp;
int i=0;
/*
* If there are no free descriptors, allocate a new chunk
* of them and put it at the front of the chunk list.
*/
if (tp->t_busy_free == 0) {
lbcp = (xfs_log_busy_chunk_t*)
kmem_alloc(sizeof(xfs_log_busy_chunk_t), KM_SLEEP);
ASSERT(lbcp != NULL);
/*
* Initialize the chunk, and then
* claim the first slot in the newly allocated chunk.
*/
XFS_LBC_INIT(lbcp);
XFS_LBC_CLAIM(lbcp, 0);
lbcp->lbc_unused = 1;
lbsp = XFS_LBC_SLOT(lbcp, 0);
/*
* Link in the new chunk and update the free count.
*/
lbcp->lbc_next = tp->t_busy.lbc_next;
tp->t_busy.lbc_next = lbcp;
tp->t_busy_free = XFS_LIC_NUM_SLOTS - 1;
/*
* Initialize the descriptor and the generic portion
* of the log item.
*
* Point the new slot at this item and return it.
* Also point the log item at its currently active
* descriptor and set the item's mount pointer.
*/
lbsp->lbc_ag = ag;
lbsp->lbc_idx = idx;
return lbsp;
}
/*
* Find the free descriptor. It is somewhere in the chunklist
* of descriptors.
*/
lbcp = &tp->t_busy;
while (lbcp != NULL) {
if (XFS_LBC_VACANCY(lbcp)) {
if (lbcp->lbc_unused <= XFS_LBC_MAX_SLOT) {
i = lbcp->lbc_unused;
break;
} else {
/* out-of-order vacancy */
cmn_err(CE_DEBUG, "OOO vacancy lbcp 0x%p\n", lbcp);
ASSERT(0);
}
}
lbcp = lbcp->lbc_next;
}
ASSERT(lbcp != NULL);
/*
* If we find a free descriptor, claim it,
* initialize it, and return it.
*/
XFS_LBC_CLAIM(lbcp, i);
if (lbcp->lbc_unused <= i) {
lbcp->lbc_unused = i + 1;
}
lbsp = XFS_LBC_SLOT(lbcp, i);
tp->t_busy_free--;
lbsp->lbc_ag = ag;
lbsp->lbc_idx = idx;
return lbsp;
}
/*
* xfs_trans_free_busy
* Free all of the busy lists from a transaction
*/
void
xfs_trans_free_busy(xfs_trans_t *tp)
{
xfs_log_busy_chunk_t *lbcp;
xfs_log_busy_chunk_t *lbcq;
lbcp = tp->t_busy.lbc_next;
while (lbcp != NULL) {
lbcq = lbcp->lbc_next;
kmem_free(lbcp);
lbcp = lbcq;
}
XFS_LBC_INIT(&tp->t_busy);
tp->t_busy.lbc_unused = 0;
}
......@@ -35,13 +35,14 @@ struct xfs_log_item_desc *xfs_trans_find_item(struct xfs_trans *,
struct xfs_log_item_desc *xfs_trans_first_item(struct xfs_trans *);
struct xfs_log_item_desc *xfs_trans_next_item(struct xfs_trans *,
struct xfs_log_item_desc *);
void xfs_trans_free_items(struct xfs_trans *, int);
void xfs_trans_unlock_items(struct xfs_trans *,
xfs_lsn_t);
void xfs_trans_free_busy(xfs_trans_t *tp);
xfs_log_busy_slot_t *xfs_trans_add_busy(xfs_trans_t *tp,
xfs_agnumber_t ag,
xfs_extlen_t idx);
void xfs_trans_unlock_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
int flags);
void xfs_trans_item_committed(struct xfs_log_item *lip,
xfs_lsn_t commit_lsn, int aborted);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
/*
* AIL traversal cursor.
......
......@@ -75,6 +75,8 @@ typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */
typedef __uint32_t xlog_tid_t; /* transaction ID type */
/*
* These types are 64 bits on disk but are either 32 or 64 bits in memory.
* Disk based types:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment