Commit 69e1aadd authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates for 3.4 from Ted Ts'o:
 "Ext4 commits for 3.3 merge window; mostly cleanups and bug fixes

  The changes to export dirty_writeback_interval are from Artem's s_dirt
  cleanup patch series.  The same is true of the change to remove the
  s_dirt helper functions which never got used by anyone in-tree.  I've
  run these changes by Al Viro, and am carrying them so that Artem can
  more easily fix up the rest of the file systems during the next merge
  window.  (Originally we had hopped to remove the use of s_dirt from
  ext4 during this merge window, but his patches had some bugs, so I
  ultimately ended dropping them from the ext4 tree.)"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (66 commits)
  vfs: remove unused superblock helpers
  mm: export dirty_writeback_interval
  ext4: remove useless s_dirt assignment
  ext4: write superblock only once on unmount
  ext4: do not mark superblock as dirty unnecessarily
  ext4: correct ext4_punch_hole return codes
  ext4: remove restrictive checks for EOFBLOCKS_FL
  ext4: always set then trimmed blocks count into len
  ext4: fix trimmed block count accunting
  ext4: fix start and len arguments handling in ext4_trim_fs()
  ext4: update s_free_{inodes,blocks}_count during online resize
  ext4: change some printk() calls to use ext4_msg() instead
  ext4: avoid output message interleaving in ext4_error_<foo>()
  ext4: remove trailing newlines from ext4_msg() and ext4_error() messages
  ext4: add no_printk argument validation, fix fallout
  ext4: remove redundant "EXT4-fs: " from uses of ext4_msg
  ext4: give more helpful error message in ext4_ext_rm_leaf()
  ext4: remove unused code from ext4_ext_map_blocks()
  ext4: rewrite punch hole to use ext4_ext_remove_space()
  jbd2: cleanup journal tail after transaction commit
  ...
parents 56b59b42 9d547c35
......@@ -144,9 +144,6 @@ journal_async_commit Commit block can be written to disk without waiting
mount the device. This will enable 'journal_checksum'
internally.
journal=update Update the ext4 file system's journal to the current
format.
journal_dev=devnum When the external journal device's major/minor numbers
have changed, this option allows the user to specify
the new journal location. The journal device is
......@@ -356,11 +353,6 @@ nouid32 Disables 32-bit UIDs and GIDs. This is for
interoperability with older kernels which only
store and expect 16-bit values.
resize Allows to resize filesystem to the end of the last
existing block group, further resize has to be done
with resize2fs either online, or offline. It can be
used only with conjunction with remount.
block_validity This options allows to enables/disables the in-kernel
noblock_validity facility for tracking filesystem metadata blocks
within internal data structures. This allows multi-
......
......@@ -336,10 +336,10 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
* Return buffer_head on success or NULL in case of failure.
*/
struct buffer_head *
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
{
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
struct buffer_head *bh;
ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL);
......@@ -348,7 +348,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
ext4_error(sb, "Cannot read block bitmap - "
ext4_error(sb, "Cannot get buffer for block bitmap - "
"block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk);
return NULL;
......@@ -382,25 +382,50 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
return bh;
}
/*
* submit the buffer_head for read. We can
* safely mark the bitmap as uptodate now.
* We do it here so the bitmap uptodate bit
* get set with buffer lock held.
* submit the buffer_head for reading
*/
set_buffer_new(bh);
trace_ext4_read_block_bitmap_load(sb, block_group);
set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
submit_bh(READ, bh);
return bh;
}
/* Returns 0 on success, 1 on error */
int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
struct buffer_head *bh)
{
struct ext4_group_desc *desc;
if (!buffer_new(bh))
return 0;
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return 1;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ext4_error(sb, "Cannot read block bitmap - "
"block_group = %u, block_bitmap = %llu",
block_group, bitmap_blk);
return NULL;
block_group, (unsigned long long) bh->b_blocknr);
return 1;
}
clear_buffer_new(bh);
/* Panic or remount fs read-only if block bitmap is invalid */
ext4_valid_block_bitmap(sb, desc, block_group, bh);
/*
* file system mounted not to panic on error,
* continue with corrupt bitmap
*/
return 0;
}
struct buffer_head *
ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
{
struct buffer_head *bh;
bh = ext4_read_block_bitmap_nowait(sb, block_group);
if (ext4_wait_block_bitmap(sb, block_group, bh)) {
put_bh(bh);
return NULL;
}
return bh;
}
......
......@@ -91,17 +91,17 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
return 0;
if (filp)
ext4_error_file(filp, function, line, bh ? bh->b_blocknr : 0,
ext4_error_file(filp, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u(%u), "
"inode=%u, rec_len=%d, name_len=%d",
error_msg, (unsigned) (offset%bh->b_size),
error_msg, (unsigned) (offset % bh->b_size),
offset, le32_to_cpu(de->inode),
rlen, de->name_len);
else
ext4_error_inode(dir, function, line, bh ? bh->b_blocknr : 0,
ext4_error_inode(dir, function, line, bh->b_blocknr,
"bad entry in directory: %s - offset=%u(%u), "
"inode=%u, rec_len=%d, name_len=%d",
error_msg, (unsigned) (offset%bh->b_size),
error_msg, (unsigned) (offset % bh->b_size),
offset, le32_to_cpu(de->inode),
rlen, de->name_len);
......@@ -425,8 +425,9 @@ static int call_filldir(struct file *filp, void *dirent,
sb = inode->i_sb;
if (!fname) {
printk(KERN_ERR "EXT4-fs: call_filldir: called with "
"null fname?!?\n");
ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
"called with null fname?!?", __func__, __LINE__,
inode->i_ino, current->comm);
return 0;
}
curr_pos = hash2pos(fname->hash, fname->minor_hash);
......
......@@ -53,7 +53,7 @@
printk(KERN_DEBUG f, ## a); \
} while (0)
#else
#define ext4_debug(f, a...) do {} while (0)
#define ext4_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \
......@@ -184,6 +184,8 @@ struct mpage_da_data {
#define EXT4_IO_END_UNWRITTEN 0x0001
#define EXT4_IO_END_ERROR 0x0002
#define EXT4_IO_END_QUEUED 0x0004
#define EXT4_IO_END_DIRECT 0x0008
#define EXT4_IO_END_IN_FSYNC 0x0010
struct ext4_io_page {
struct page *p_page;
......@@ -192,18 +194,25 @@ struct ext4_io_page {
#define MAX_IO_PAGES 128
/*
* For converting uninitialized extents on a work queue.
*
* 'page' is only used from the writepage() path; 'pages' is only used for
* buffered writes; they are used to keep page references until conversion
* takes place. For AIO/DIO, neither field is filled in.
*/
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */
struct page *page; /* for writepage() path */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
int num_io_pages;
struct ext4_io_page *pages[MAX_IO_PAGES];
int num_io_pages; /* for writepages() */
struct ext4_io_page *pages[MAX_IO_PAGES]; /* for writepages() */
} ext4_io_end_t;
struct ext4_io_submit {
......@@ -923,6 +932,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
#define EXT4_MOUNT_ERRORS_MASK 0x00070
#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
......@@ -941,7 +951,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
......@@ -1142,6 +1151,7 @@ struct ext4_sb_info {
unsigned int s_mount_opt;
unsigned int s_mount_opt2;
unsigned int s_mount_flags;
unsigned int s_def_mount_opt;
ext4_fsblk_t s_sb_block;
uid_t s_resuid;
gid_t s_resgid;
......@@ -1420,8 +1430,9 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_BG_USE_META_CSUM 0x2000 /* use crc32c for bg */
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x8000 /* data in inode */
#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
......@@ -1794,7 +1805,13 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
ext4_group_t block_group);
extern int ext4_wait_block_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct buffer_head *bh);
extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
ext4_group_t block_group);
extern void ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh,
......@@ -1841,6 +1858,7 @@ extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
extern void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate);
/* mballoc.c */
extern long ext4_mb_stats;
......
......@@ -47,9 +47,9 @@
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
#define ext_debug(a...) printk(a)
#define ext_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
#else
#define ext_debug(a...)
#define ext_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
/*
......
......@@ -104,6 +104,78 @@
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
/**
* struct ext4_journal_cb_entry - Base structure for callback information.
*
* This struct is a 'seed' structure for a using with your own callback
* structs. If you are using callbacks you must allocate one of these
* or another struct of your own definition which has this struct
* as it's first element and pass it to ext4_journal_callback_add().
*/
struct ext4_journal_cb_entry {
/* list information for other callbacks attached to the same handle */
struct list_head jce_list;
/* Function to call with this callback structure */
void (*jce_func)(struct super_block *sb,
struct ext4_journal_cb_entry *jce, int error);
/* user data goes here */
};
/**
* ext4_journal_callback_add: add a function to call after transaction commit
* @handle: active journal transaction handle to register callback on
* @func: callback function to call after the transaction has committed:
* @sb: superblock of current filesystem for transaction
* @jce: returned journal callback data
* @rc: journal state at commit (0 = transaction committed properly)
* @jce: journal callback data (internal and function private data struct)
*
* The registered function will be called in the context of the journal thread
* after the transaction for which the handle was created has completed.
*
* No locks are held when the callback function is called, so it is safe to
* call blocking functions from within the callback, but the callback should
* not block or run for too long, or the filesystem will be blocked waiting for
* the next transaction to commit. No journaling functions can be used, or
* there is a risk of deadlock.
*
* There is no guaranteed calling order of multiple registered callbacks on
* the same transaction.
*/
static inline void ext4_journal_callback_add(handle_t *handle,
void (*func)(struct super_block *sb,
struct ext4_journal_cb_entry *jce,
int rc),
struct ext4_journal_cb_entry *jce)
{
struct ext4_sb_info *sbi =
EXT4_SB(handle->h_transaction->t_journal->j_private);
/* Add the jce to transaction's private list */
jce->jce_func = func;
spin_lock(&sbi->s_md_lock);
list_add_tail(&jce->jce_list, &handle->h_transaction->t_private_list);
spin_unlock(&sbi->s_md_lock);
}
/**
* ext4_journal_callback_del: delete a registered callback
* @handle: active journal transaction handle on which callback was registered
* @jce: registered journal callback entry to unregister
*/
static inline void ext4_journal_callback_del(handle_t *handle,
struct ext4_journal_cb_entry *jce)
{
struct ext4_sb_info *sbi =
EXT4_SB(handle->h_transaction->t_journal->j_private);
spin_lock(&sbi->s_md_lock);
list_del_init(&jce->jce_list);
spin_unlock(&sbi->s_md_lock);
}
int
ext4_mark_iloc_dirty(handle_t *handle,
struct inode *inode,
......@@ -261,43 +333,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
/* super.c */
int ext4_force_commit(struct super_block *sb);
static inline int ext4_should_journal_data(struct inode *inode)
/*
* Ext4 inode journal modes
*/
#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
static inline int ext4_inode_journal_mode(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
return 0;
if (!S_ISREG(inode->i_mode))
return 1;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return 1;
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 1;
return 0;
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
!test_opt(inode->i_sb, DELALLOC))
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
else
BUG();
}
static inline int ext4_should_journal_data(struct inode *inode)
{
return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
}
static inline int ext4_should_order_data(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
return 0;
if (!S_ISREG(inode->i_mode))
return 0;
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return 1;
return 0;
return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
}
static inline int ext4_should_writeback_data(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
return 1;
if (!S_ISREG(inode->i_mode))
return 0;
if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
return 0;
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return 1;
return 0;
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
}
/*
......
This diff is collapsed.
......@@ -89,6 +89,7 @@ int ext4_flush_completed_IO(struct inode *inode)
io = list_entry(ei->i_completed_io_list.next,
ext4_io_end_t, list);
list_del_init(&io->list);
io->flag |= EXT4_IO_END_IN_FSYNC;
/*
* Calling ext4_end_io_nolock() to convert completed
* IO to written.
......@@ -108,6 +109,7 @@ int ext4_flush_completed_IO(struct inode *inode)
if (ret < 0)
ret2 = ret;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
io->flag &= ~EXT4_IO_END_IN_FSYNC;
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
return (ret2 < 0) ? ret2 : 0;
......
This diff is collapsed.
......@@ -272,7 +272,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
trace_ext4_da_update_reserve_space(inode, used, quota_claim);
if (unlikely(used > ei->i_reserved_data_blocks)) {
ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
"with only %d reserved data blocks\n",
"with only %d reserved data blocks",
__func__, inode->i_ino, used,
ei->i_reserved_data_blocks);
WARN_ON(1);
......@@ -1165,7 +1165,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
*/
ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
"ino %lu, to_free %d with only %d reserved "
"data blocks\n", inode->i_ino, to_free,
"data blocks", inode->i_ino, to_free,
ei->i_reserved_data_blocks);
WARN_ON(1);
to_free = ei->i_reserved_data_blocks;
......@@ -1428,20 +1428,22 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
static void ext4_print_free_blocks(struct inode *inode)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
printk(KERN_CRIT "Total free blocks count %lld\n",
struct super_block *sb = inode->i_sb;
ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
EXT4_C2B(EXT4_SB(inode->i_sb),
ext4_count_free_clusters(inode->i_sb)));
printk(KERN_CRIT "Free/Dirty block details\n");
printk(KERN_CRIT "free_blocks=%lld\n",
ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_freeclusters_counter)));
printk(KERN_CRIT "dirty_blocks=%lld\n",
ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
(long long) EXT4_C2B(EXT4_SB(inode->i_sb),
percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
printk(KERN_CRIT "Block reservation details\n");
printk(KERN_CRIT "i_reserved_data_blocks=%u\n",
ext4_msg(sb, KERN_CRIT, "Block reservation details");
ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
EXT4_I(inode)->i_reserved_data_blocks);
printk(KERN_CRIT "i_reserved_meta_blocks=%u\n",
ext4_msg(sb, KERN_CRIT, "i_reserved_meta_blocks=%u",
EXT4_I(inode)->i_reserved_meta_blocks);
return;
}
......@@ -2482,13 +2484,14 @@ static int ext4_da_write_end(struct file *file,
int write_mode = (int)(unsigned long)fsdata;
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
if (ext4_should_order_data(inode)) {
switch (ext4_inode_journal_mode(inode)) {
case EXT4_INODE_ORDERED_DATA_MODE:
return ext4_ordered_write_end(file, mapping, pos,
len, copied, page, fsdata);
} else if (ext4_should_writeback_data(inode)) {
case EXT4_INODE_WRITEBACK_DATA_MODE:
return ext4_writeback_write_end(file, mapping, pos,
len, copied, page, fsdata);
} else {
default:
BUG();
}
}
......@@ -2763,7 +2766,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
goto out;
ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
iocb->private, io_end->inode->i_ino, iocb, offset,
size);
......@@ -2795,9 +2798,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
/* queue the work to convert unwritten extents to written */
queue_work(wq, &io_end->work);
/* XXX: probably should move into the real I/O completion handler */
inode_dio_done(inode);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
......@@ -2811,7 +2811,8 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
goto out;
if (!(io_end->inode->i_sb->s_flags & MS_ACTIVE)) {
printk("sb umounted, discard end_io request for inode %lu\n",
ext4_msg(io_end->inode->i_sb, KERN_INFO,
"sb umounted, discard end_io request for inode %lu",
io_end->inode->i_ino);
ext4_free_io_end(io_end);
goto out;
......@@ -2921,9 +2922,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
iocb->private = NULL;
EXT4_I(inode)->cur_aio_dio = NULL;
if (!is_sync_kiocb(iocb)) {
iocb->private = ext4_init_io_end(inode, GFP_NOFS);
if (!iocb->private)
ext4_io_end_t *io_end =
ext4_init_io_end(inode, GFP_NOFS);
if (!io_end)
return -ENOMEM;
io_end->flag |= EXT4_IO_END_DIRECT;
iocb->private = io_end;
/*
* we save the io structure for current async
* direct IO, so that later ext4_map_blocks()
......@@ -2940,7 +2944,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ext4_get_block_write,
ext4_end_io_dio,
NULL,
DIO_LOCKING | DIO_SKIP_HOLES);
DIO_LOCKING);
if (iocb->private)
EXT4_I(inode)->cur_aio_dio = NULL;
/*
......@@ -3086,18 +3090,25 @@ static const struct address_space_operations ext4_da_aops = {
void ext4_set_aops(struct inode *inode)
{
if (ext4_should_order_data(inode) &&
test_opt(inode->i_sb, DELALLOC))
switch (ext4_inode_journal_mode(inode)) {
case EXT4_INODE_ORDERED_DATA_MODE:
if (test_opt(inode->i_sb, DELALLOC))
inode->i_mapping->a_ops = &ext4_da_aops;
else if (ext4_should_order_data(inode))
else
inode->i_mapping->a_ops = &ext4_ordered_aops;
else if (ext4_should_writeback_data(inode) &&
test_opt(inode->i_sb, DELALLOC))
break;
case EXT4_INODE_WRITEBACK_DATA_MODE:
if (test_opt(inode->i_sb, DELALLOC))
inode->i_mapping->a_ops = &ext4_da_aops;
else if (ext4_should_writeback_data(inode))
inode->i_mapping->a_ops = &ext4_writeback_aops;
else
inode->i_mapping->a_ops = &ext4_writeback_aops;
break;
case EXT4_INODE_JOURNAL_DATA_MODE:
inode->i_mapping->a_ops = &ext4_journalled_aops;
break;
default:
BUG();
}
}
......@@ -3329,16 +3340,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
{
struct inode *inode = file->f_path.dentry->d_inode;
if (!S_ISREG(inode->i_mode))
return -ENOTSUPP;
return -EOPNOTSUPP;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
/* TODO: Add support for non extent hole punching */
return -ENOTSUPP;
return -EOPNOTSUPP;
}
if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1) {
/* TODO: Add support for bigalloc file systems */
return -ENOTSUPP;
return -EOPNOTSUPP;
}
return ext4_ext_punch_hole(file, offset, length);
......@@ -3924,10 +3935,8 @@ static int ext4_do_update_inode(handle_t *handle,
ext4_update_dynamic_rev(sb);
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1;
ext4_handle_sync(handle);
err = ext4_handle_dirty_metadata(handle, NULL,
EXT4_SB(sb)->s_sbh);
err = ext4_handle_dirty_super(handle, sb);
}
}
raw_inode->i_generation = cpu_to_le32(inode->i_generation);
......@@ -4152,11 +4161,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(inode)) {
if (attr->ia_size != i_size_read(inode))
truncate_setsize(inode, attr->ia_size);
ext4_truncate(inode);
} else if (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
ext4_truncate(inode);
}
if (!rc) {
......@@ -4314,7 +4321,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
if (test_opt(inode->i_sb, I_VERSION))
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
/* the do_update_inode consumes one bh->b_count */
......
This diff is collapsed.
......@@ -96,21 +96,23 @@ extern u8 mb_enable_debug;
struct ext4_free_data {
/* this links the free block information from group_info */
struct rb_node node;
/* MUST be the first member */
struct ext4_journal_cb_entry efd_jce;
/* ext4_free_data private data starts from here */
/* this links the free block information from ext4_sb_info */
struct list_head list;
/* this links the free block information from group_info */
struct rb_node efd_node;
/* group which free block extent belongs */
ext4_group_t group;
ext4_group_t efd_group;
/* free block extent */
ext4_grpblk_t start_cluster;
ext4_grpblk_t count;
ext4_grpblk_t efd_start_cluster;
ext4_grpblk_t efd_count;
/* transaction which freed this extent */
tid_t t_tid;
tid_t efd_tid;
};
struct ext4_prealloc_space {
......@@ -210,8 +212,6 @@ struct ext4_buddy {
__u16 bd_blkbits;
ext4_group_t bd_group;
};
#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
struct ext4_free_extent *fex)
......
......@@ -471,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode)
tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
S_IFREG, NULL, goal, owner);
if (IS_ERR(tmp_inode)) {
retval = PTR_ERR(inode);
retval = PTR_ERR(tmp_inode);
ext4_journal_stop(handle);
return retval;
}
......
......@@ -257,8 +257,8 @@ int ext4_multi_mount_protect(struct super_block *sb,
* If check_interval in MMP block is larger, use that instead of
* update_interval from the superblock.
*/
if (mmp->mmp_check_interval > mmp_check_interval)
mmp_check_interval = mmp->mmp_check_interval;
if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
seq = le32_to_cpu(mmp->mmp_seq);
if (seq == EXT4_MMP_SEQ_CLEAN)
......
......@@ -468,7 +468,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
fail:
if (*err == ERR_BAD_DX_DIR)
ext4_warning(dir->i_sb,
"Corrupt dir inode %ld, running e2fsck is "
"Corrupt dir inode %lu, running e2fsck is "
"recommended.", dir->i_ino);
return NULL;
}
......
......@@ -60,7 +60,6 @@ void ext4_ioend_wait(struct inode *inode)
static void put_io_page(struct ext4_io_page *io_page)
{
if (atomic_dec_and_test(&io_page->p_count)) {
end_page_writeback(io_page->p_page);
put_page(io_page->p_page);
kmem_cache_free(io_page_cachep, io_page);
}
......@@ -110,6 +109,8 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
if (io->flag & EXT4_IO_END_DIRECT)
inode_dio_done(inode);
/* Wake up anyone waiting on unwritten extent conversion */
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
wake_up_all(ext4_ioend_wq(io->inode));
......@@ -127,12 +128,18 @@ static void ext4_end_io_work(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
if (io->flag & EXT4_IO_END_IN_FSYNC)
goto requeue;
if (list_empty(&io->list)) {
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
goto free;
}
if (!mutex_trylock(&inode->i_mutex)) {
bool was_queued;
requeue:
was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
io->flag |= EXT4_IO_END_QUEUED;
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/*
* Requeue the work instead of waiting so that the work
......@@ -145,9 +152,8 @@ static void ext4_end_io_work(struct work_struct *work)
* yield the cpu if it sees an end_io request that has already
* been requeued.
*/
if (io->flag & EXT4_IO_END_QUEUED)
if (was_queued)
yield();
io->flag |= EXT4_IO_END_QUEUED;
return;
}
list_del_init(&io->list);
......@@ -227,9 +233,9 @@ static void ext4_end_bio(struct bio *bio, int error)
} while (bh != head);
}
put_io_page(io_end->pages[i]);
if (atomic_read(&io_end->pages[i]->p_count) == 1)
end_page_writeback(io_end->pages[i]->p_page);
}
io_end->num_io_pages = 0;
inode = io_end->inode;
if (error) {
......@@ -421,6 +427,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* PageWriteback bit from the page to prevent the system from
* wedging later on.
*/
if (atomic_read(&io_page->p_count) == 1)
end_page_writeback(page);
put_io_page(io_page);
return ret;
}
......@@ -1163,8 +1163,11 @@ static void ext4_update_super(struct super_block *sb,
do_div(reserved_blocks, 100);
ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
flex_gd->count);
/*
* We need to protect s_groups_count against other CPUs seeing
......@@ -1465,6 +1468,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
}
ext4_blocks_count_set(es, o_blocks_count + add);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + add);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */
......@@ -1512,16 +1516,17 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n",
ext4_msg(sb, KERN_DEBUG,
"extending last group from %llu to %llu blocks",
o_blocks_count, n_blocks_count);
if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
return 0;
if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
printk(KERN_ERR "EXT4-fs: filesystem on %s:"
" too large to resize to %llu blocks safely\n",
sb->s_id, n_blocks_count);
ext4_msg(sb, KERN_ERR,
"filesystem too large to resize to %llu blocks safely",
n_blocks_count);
if (sizeof(sector_t) < 8)
ext4_warning(sb, "CONFIG_LBDAF not enabled");
return -EINVAL;
......@@ -1582,7 +1587,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_group_t o_group;
ext4_group_t n_group;
ext4_grpblk_t offset;
ext4_grpblk_t offset, add;
unsigned long n_desc_blocks;
unsigned long o_desc_blocks;
unsigned long desc_blocks;
......@@ -1591,8 +1596,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
o_blocks_count = ext4_blocks_count(es);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
"upto %llu blocks\n", o_blocks_count, n_blocks_count);
ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu "
"to %llu blocks", o_blocks_count, n_blocks_count);
if (n_blocks_count < o_blocks_count) {
/* On-line shrinking not supported */
......@@ -1605,7 +1610,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
return 0;
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
EXT4_DESC_PER_BLOCK(sb);
......@@ -1634,10 +1639,12 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
}
brelse(bh);
if (offset != 0) {
/* extend the last group */
ext4_grpblk_t add;
add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
if (n_group == o_group)
add = n_blocks_count - o_blocks_count;
else
add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
if (add > 0) {
err = ext4_group_extend_no_check(sb, o_blocks_count, add);
if (err)
goto out;
......@@ -1674,7 +1681,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
iput(resize_inode);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
"upto %llu blocks\n", o_blocks_count, n_blocks_count);
ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
"upto %llu blocks", o_blocks_count, n_blocks_count);
return err;
}
This diff is collapsed.
......@@ -82,8 +82,8 @@
printk("\n"); \
} while (0)
#else
# define ea_idebug(f...)
# define ea_bdebug(f...)
# define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
# define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
static void ext4_xattr_cache_insert(struct buffer_head *);
......@@ -158,13 +158,10 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
static inline int
ext4_xattr_check_block(struct buffer_head *bh)
{
int error;
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
return -EIO;
error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
return error;
return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
}
static inline int
......@@ -220,7 +217,8 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ENODATA;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh)
goto cleanup;
......@@ -363,7 +361,8 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
error = 0;
if (!EXT4_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %llu",
(unsigned long long)EXT4_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
......@@ -487,18 +486,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
unlock_buffer(bh);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
if (ce)
mb_cache_entry_release(ce);
unlock_buffer(bh);
error = ext4_handle_dirty_metadata(handle, inode, bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, 1);
ea_bdebug(bh, "refcount now=%d; releasing",
le32_to_cpu(BHDR(bh)->h_refcount));
if (ce)
mb_cache_entry_release(ce);
}
unlock_buffer(bh);
out:
ext4_std_error(inode->i_sb, error);
return;
......@@ -834,7 +834,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
ea_idebug(inode, "creating block %d", block);
ea_idebug(inode, "creating block %llu",
(unsigned long long)block);
new_bh = sb_getblk(sb, block);
if (!new_bh) {
......
......@@ -88,14 +88,13 @@ static inline void __buffer_relink_io(struct journal_head *jh)
* whole transaction.
*
* Requires j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __try_to_free_cp_buf(struct journal_head *jh)
{
int ret = 0;
struct buffer_head *bh = jh2bh(jh);
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
if (jh->b_transaction == NULL && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
/*
* Get our reference so that bh cannot be freed before
......@@ -104,11 +103,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
jbd_unlock_bh_state(bh);
}
return ret;
}
......@@ -179,21 +175,6 @@ void __jbd2_log_wait_for_space(journal_t *journal)
}
}
/*
* We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
* The caller must restart a list walk. Wait for someone else to run
* jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
__releases(journal->j_list_lock)
{
get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_lock_bh_state(bh);
jbd_unlock_bh_state(bh);
put_bh(bh);
}
/*
* Clean up transaction's list of buffers submitted for io.
* We wait for any pending IO to complete and remove any clean
......@@ -222,15 +203,9 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
while (!released && transaction->t_checkpoint_io_list) {
jh = transaction->t_checkpoint_io_list;
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
spin_lock(&journal->j_list_lock);
goto restart;
}
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
......@@ -246,7 +221,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
* it has been written out and so we can drop it from the list
*/
released = __jbd2_journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
__brelse(bh);
}
......@@ -266,7 +240,6 @@ __flush_batch(journal_t *journal, int *batch_count)
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = journal->j_chkpt_bhs[i];
clear_buffer_jwrite(bh);
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
}
......@@ -281,7 +254,6 @@ __flush_batch(journal_t *journal, int *batch_count)
* be written out.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/
static int __process_buffer(journal_t *journal, struct journal_head *jh,
int *batch_count, transaction_t *transaction)
......@@ -292,7 +264,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
if (buffer_locked(bh)) {
get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
......@@ -304,7 +275,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
if (unlikely(journal->j_flags & JBD2_UNMOUNT))
/*
* The journal thread is dead; so starting and
......@@ -323,11 +293,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
__brelse(bh);
} else {
/*
......@@ -340,10 +308,8 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
BUFFER_TRACE(bh, "queue");
get_bh(bh);
J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_buffer_jwrite(bh);
journal->j_chkpt_bhs[*batch_count] = bh;
__buffer_relink_io(jh);
jbd_unlock_bh_state(bh);
transaction->t_chp_stats.cs_written++;
(*batch_count)++;
if (*batch_count == JBD2_NR_BATCH) {
......@@ -407,15 +373,7 @@ int jbd2_log_do_checkpoint(journal_t *journal)
int retry = 0, err;
while (!retry && transaction->t_checkpoint_list) {
struct buffer_head *bh;
jh = transaction->t_checkpoint_list;
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
retry = 1;
break;
}
retry = __process_buffer(journal, jh, &batch_count,
transaction);
if (retry < 0 && !result)
......@@ -478,79 +436,28 @@ int jbd2_log_do_checkpoint(journal_t *journal)
int jbd2_cleanup_journal_tail(journal_t *journal)
{
transaction_t * transaction;
tid_t first_tid;
unsigned long blocknr, freed;
unsigned long blocknr;
if (is_journal_aborted(journal))
return 1;
/* OK, work out the oldest transaction remaining in the log, and
* the log block it starts at.
*
* If the log is now empty, we need to work out which is the
* next transaction ID we will write, and where it will
* start. */
write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions;
if (transaction) {
first_tid = transaction->t_tid;
blocknr = transaction->t_log_start;
} else if ((transaction = journal->j_committing_transaction) != NULL) {
first_tid = transaction->t_tid;
blocknr = transaction->t_log_start;
} else if ((transaction = journal->j_running_transaction) != NULL) {
first_tid = transaction->t_tid;
blocknr = journal->j_head;
} else {
first_tid = journal->j_transaction_sequence;
blocknr = journal->j_head;
}
spin_unlock(&journal->j_list_lock);
J_ASSERT(blocknr != 0);
/* If the oldest pinned transaction is at the tail of the log
already then there's not much we can do right now. */
if (journal->j_tail_sequence == first_tid) {
write_unlock(&journal->j_state_lock);
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
}
/* OK, update the superblock to recover the freed space.
* Physical blocks come first: have we wrapped beyond the end of
* the log? */
freed = blocknr - journal->j_tail;
if (blocknr < journal->j_tail)
freed = freed + journal->j_last - journal->j_first;
trace_jbd2_cleanup_journal_tail(journal, first_tid, blocknr, freed);
jbd_debug(1,
"Cleaning journal tail from %d to %d (offset %lu), "
"freeing %lu\n",
journal->j_tail_sequence, first_tid, blocknr, freed);
journal->j_free += freed;
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
write_unlock(&journal->j_state_lock);
J_ASSERT(blocknr != 0);
/*
* If there is an external journal, we need to make sure that
* any data blocks that were recently written out --- perhaps
* by jbd2_log_do_checkpoint() --- are flushed out before we
* drop the transactions from the external journal. It's
* unlikely this will be necessary, especially with a
* appropriately sized journal, but we need this to guarantee
* correctness. Fortunately jbd2_cleanup_journal_tail()
* doesn't get called all that often.
* We need to make sure that any blocks that were recently written out
* --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before
* we drop the transactions from the journal. It's unlikely this will
* be necessary, especially with an appropriately sized journal, but we
* need this to guarantee correctness. Fortunately
* jbd2_cleanup_journal_tail() doesn't get called all that often.
*/
if ((journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
__jbd2_update_log_tail(journal, first_tid, blocknr);
return 0;
}
......@@ -582,8 +489,6 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
do {
jh = next_jh;
next_jh = jh->b_cpnext;
/* Use trylock because of the ranking */
if (jbd_trylock_bh_state(jh2bh(jh))) {
ret = __try_to_free_cp_buf(jh);
if (ret) {
freed++;
......@@ -592,7 +497,6 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
return freed;
}
}
}
/*
* This function only frees up some memory
* if possible so we dont have an obligation
......@@ -673,9 +577,7 @@ int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
* The function can free jh and bh.
*
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
{
struct transaction_chp_stats_s *stats;
......@@ -722,7 +624,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
transaction->t_tid, stats);
__jbd2_journal_drop_transaction(journal, transaction);
kfree(transaction);
jbd2_journal_free_transaction(transaction);
/* Just in case anybody was waiting for more transactions to be
checkpointed... */
......@@ -797,5 +699,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
trace_jbd2_drop_transaction(journal, transaction);
jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
}
......@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
struct blk_plug plug;
/* Tail of the journal */
unsigned long first_block;
tid_t first_tid;
int update_tail;
/*
* First job: lock down the current transaction and wait for
......@@ -340,7 +344,18 @@ void jbd2_journal_commit_transaction(journal_t *journal)
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
jbd2_journal_update_superblock(journal, 1);
mutex_lock(&journal->j_checkpoint_mutex);
/*
* We hold j_checkpoint_mutex so tail cannot change under us.
* We don't need any special data guarantees for writing sb
* since journal is empty and it is ok for write to be
* flushed only with transaction commit.
*/
jbd2_journal_update_sb_log_tail(journal,
journal->j_tail_sequence,
journal->j_tail,
WRITE_SYNC);
mutex_unlock(&journal->j_checkpoint_mutex);
} else {
jbd_debug(3, "superblock not updated\n");
}
......@@ -677,10 +692,30 @@ void jbd2_journal_commit_transaction(journal_t *journal)
err = 0;
}
/*
* Get current oldest transaction in the log before we issue flush
* to the filesystem device. After the flush we can be sure that
* blocks of all older transactions are checkpointed to persistent
* storage and we will be safe to update journal start in the
* superblock with the numbers we get here.
*/
update_tail =
jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
write_lock(&journal->j_state_lock);
if (update_tail) {
long freed = first_block - journal->j_tail;
if (first_block < journal->j_tail)
freed += journal->j_last - journal->j_first;
/* Update tail only if we free significant amount of space */
if (freed < journal->j_maxlen / 4)
update_tail = 0;
}
J_ASSERT(commit_transaction->t_state == T_COMMIT);
commit_transaction->t_state = T_COMMIT_DFLUSH;
write_unlock(&journal->j_state_lock);
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
......@@ -831,6 +866,14 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
jbd2_journal_abort(journal, err);
/*
* Now disk caches for filesystem device are flushed so we are safe to
* erase checkpointed transactions from the log by updating journal
* superblock.
*/
if (update_tail)
jbd2_update_log_tail(journal, first_tid, first_block);
/* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this
transaction can be removed from any checkpoint list it was on
......@@ -1048,7 +1091,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(1, "JBD2: commit %d complete, head %d\n",
journal->j_commit_sequence, journal->j_tail_sequence);
if (to_free)
kfree(commit_transaction);
jbd2_journal_free_transaction(commit_transaction);
wake_up(&journal->j_wait_done_commit);
}
This diff is collapsed.
......@@ -21,6 +21,7 @@
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/crc32.h>
#include <linux/blkdev.h>
#endif
/*
......@@ -265,7 +266,9 @@ int jbd2_journal_recover(journal_t *journal)
err2 = sync_blockdev(journal->j_fs_dev);
if (!err)
err = err2;
/* Make sure all replayed data is on permanent storage */
if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
return err;
}
......
......@@ -208,17 +208,13 @@ int __init jbd2_journal_init_revoke_caches(void)
J_ASSERT(!jbd2_revoke_record_cache);
J_ASSERT(!jbd2_revoke_table_cache);
jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
sizeof(struct jbd2_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
jbd2_revoke_record_cache = KMEM_CACHE(jbd2_revoke_record_s,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY);
if (!jbd2_revoke_record_cache)
goto record_cache_failure;
jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
sizeof(struct jbd2_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
jbd2_revoke_table_cache = KMEM_CACHE(jbd2_revoke_table_s,
SLAB_TEMPORARY);
if (!jbd2_revoke_table_cache)
goto table_cache_failure;
return 0;
......
......@@ -33,6 +33,35 @@
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
static struct kmem_cache *transaction_cache;
int __init jbd2_journal_init_transaction_cache(void)
{
J_ASSERT(!transaction_cache);
transaction_cache = kmem_cache_create("jbd2_transaction_s",
sizeof(transaction_t),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (transaction_cache)
return 0;
return -ENOMEM;
}
void jbd2_journal_destroy_transaction_cache(void)
{
if (transaction_cache) {
kmem_cache_destroy(transaction_cache);
transaction_cache = NULL;
}
}
void jbd2_journal_free_transaction(transaction_t *transaction)
{
if (unlikely(ZERO_OR_NULL_PTR(transaction)))
return;
kmem_cache_free(transaction_cache, transaction);
}
/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
......@@ -133,7 +162,8 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
alloc_transaction:
if (!journal->j_running_transaction) {
new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
new_transaction = kmem_cache_alloc(transaction_cache,
gfp_mask | __GFP_ZERO);
if (!new_transaction) {
/*
* If __GFP_FS is not present, then we may be
......@@ -162,7 +192,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
read_unlock(&journal->j_state_lock);
kfree(new_transaction);
jbd2_journal_free_transaction(new_transaction);
return -EROFS;
}
......@@ -284,7 +314,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
read_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map);
kfree(new_transaction);
jbd2_journal_free_transaction(new_transaction);
return 0;
}
......@@ -1549,9 +1579,9 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
* of these pointers, it could go bad. Generally the caller needs to re-read
* the pointer from the transaction_t.
*
* Called under j_list_lock. The journal may not be locked.
* Called under j_list_lock.
*/
void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
{
struct journal_head **list = NULL;
transaction_t *transaction;
......@@ -1646,11 +1676,9 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
spin_lock(&journal->j_list_lock);
if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__jbd2_journal_remove_checkpoint(jh);
}
}
spin_unlock(&journal->j_list_lock);
out:
return;
......@@ -1949,6 +1977,8 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
bh->b_bdev = NULL;
return may_free;
}
......
......@@ -1872,19 +1872,6 @@ extern struct dentry *mount_pseudo(struct file_system_type *, char *,
const struct dentry_operations *dops,
unsigned long);
static inline void sb_mark_dirty(struct super_block *sb)
{
sb->s_dirt = 1;
}
static inline void sb_mark_clean(struct super_block *sb)
{
sb->s_dirt = 0;
}
static inline int sb_is_dirty(struct super_block *sb)
{
return sb->s_dirt;
}
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
#define fops_get(fops) \
(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
......
......@@ -971,6 +971,10 @@ extern void __journal_clean_data_list(transaction_t *transaction);
/* Log buffer allocation */
extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block);
void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
/* Commit management */
extern void jbd2_journal_commit_transaction(journal_t *);
......@@ -1020,6 +1024,11 @@ jbd2_journal_write_metadata_buffer(transaction_t *transaction,
/* Transaction locking */
extern void __wait_on_journal (journal_t *);
/* Transaction cache support */
extern void jbd2_journal_destroy_transaction_cache(void);
extern int jbd2_journal_init_transaction_cache(void);
extern void jbd2_journal_free_transaction(transaction_t *);
/*
* Journal locking.
*
......@@ -1082,7 +1091,8 @@ extern int jbd2_journal_destroy (journal_t *);
extern int jbd2_journal_recover (journal_t *journal);
extern int jbd2_journal_wipe (journal_t *, int);
extern int jbd2_journal_skip_recovery (journal_t *);
extern void jbd2_journal_update_superblock (journal_t *, int);
extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
unsigned long, int);
extern void __jbd2_journal_abort_hard (journal_t *);
extern void jbd2_journal_abort (journal_t *, int);
extern int jbd2_journal_errno (journal_t *);
......
......@@ -66,6 +66,8 @@ struct journal_head {
* transaction (if there is one). Only applies to buffers on a
* transaction's data or metadata journaling list.
* [j_list_lock] [jbd_lock_bh_state()]
* Either of these locks is enough for reading, both are needed for
* changes.
*/
transaction_t *b_transaction;
......
......@@ -81,6 +81,13 @@ DEFINE_EVENT(jbd2_commit, jbd2_commit_logging,
TP_ARGS(journal, commit_transaction)
);
DEFINE_EVENT(jbd2_commit, jbd2_drop_transaction,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
TP_ARGS(journal, commit_transaction)
);
TRACE_EVENT(jbd2_end_commit,
TP_PROTO(journal_t *journal, transaction_t *commit_transaction),
......@@ -200,7 +207,7 @@ TRACE_EVENT(jbd2_checkpoint_stats,
__entry->forced_to_close, __entry->written, __entry->dropped)
);
TRACE_EVENT(jbd2_cleanup_journal_tail,
TRACE_EVENT(jbd2_update_log_tail,
TP_PROTO(journal_t *journal, tid_t first_tid,
unsigned long block_nr, unsigned long freed),
......@@ -229,6 +236,26 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
__entry->block_nr, __entry->freed)
);
TRACE_EVENT(jbd2_write_superblock,
TP_PROTO(journal_t *journal, int write_op),
TP_ARGS(journal, write_op),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, write_op )
),
TP_fast_assign(
__entry->dev = journal->j_fs_dev->bd_dev;
__entry->write_op = write_op;
),
TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev),
MINOR(__entry->dev), __entry->write_op)
);
#endif /* _TRACE_JBD2_H */
/* This part must be outside protection */
......
......@@ -95,6 +95,8 @@ unsigned long vm_dirty_bytes;
*/
unsigned int dirty_writeback_interval = 5 * 100; /* centiseconds */
EXPORT_SYMBOL_GPL(dirty_writeback_interval);
/*
* The longest time for which data is allowed to remain dirty
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment