Commit faeb20ec authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Performance improvements in SEEK_DATA and xattr scalability
  improvements, plus a lot of clean ups and bug fixes"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (38 commits)
  ext4: clean up error handling in the MMP support
  jbd2: do not fail journal because of frozen_buffer allocation failure
  ext4: use __GFP_NOFAIL in ext4_free_blocks()
  ext4: fix compile error while opening the macro DOUBLE_CHECK
  ext4: print ext4 mount option data_err=abort correctly
  ext4: fix NULL pointer dereference in ext4_mark_inode_dirty()
  ext4: drop unneeded BUFFER_TRACE in ext4_delete_inline_entry()
  ext4: fix misspellings in comments.
  jbd2: fix FS corruption possibility in jbd2_journal_destroy() on umount path
  ext4: more efficient SEEK_DATA implementation
  ext4: cleanup handling of bh->b_state in DAX mmap
  ext4: return hole from ext4_map_blocks()
  ext4: factor out determining of hole size
  ext4: fix setting of referenced bit in ext4_es_lookup_extent()
  ext4: remove i_ioend_count
  ext4: simplify io_end handling for AIO DIO
  ext4: move trans handling and completion deferal out of _ext4_get_block
  ext4: rename and split get blocks functions
  ext4: use i_mutex to serialize unaligned AIO DIO
  ext4: pack ioend structure better
  ...
parents 364e8dd9 03046886
...@@ -61,6 +61,8 @@ struct ext2_block_alloc_info { ...@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
#define rsv_start rsv_window._rsv_start #define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end #define rsv_end rsv_window._rsv_end
struct mb_cache;
/* /*
* second extended-fs super-block data in memory * second extended-fs super-block data in memory
*/ */
...@@ -111,6 +113,7 @@ struct ext2_sb_info { ...@@ -111,6 +113,7 @@ struct ext2_sb_info {
* of the mount options. * of the mount options.
*/ */
spinlock_t s_lock; spinlock_t s_lock;
struct mb_cache *s_mb_cache;
}; };
static inline spinlock_t * static inline spinlock_t *
......
...@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb) ...@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
ext2_xattr_put_super(sb); if (sbi->s_mb_cache) {
ext2_xattr_destroy_cache(sbi->s_mb_cache);
sbi->s_mb_cache = NULL;
}
if (!(sb->s_flags & MS_RDONLY)) { if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es; struct ext2_super_block *es = sbi->s_es;
...@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_ERR, "error: insufficient memory"); ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3; goto failed_mount3;
} }
#ifdef CONFIG_EXT2_FS_XATTR
sbi->s_mb_cache = ext2_xattr_create_cache();
if (!sbi->s_mb_cache) {
ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
goto failed_mount3;
}
#endif
/* /*
* set up enough so that it can read an inode * set up enough so that it can read an inode
*/ */
...@@ -1149,6 +1160,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1149,6 +1160,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
sb->s_id); sb->s_id);
goto failed_mount; goto failed_mount;
failed_mount3: failed_mount3:
if (sbi->s_mb_cache)
ext2_xattr_destroy_cache(sbi->s_mb_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirs_counter);
...@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2"); ...@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
static int __init init_ext2_fs(void) static int __init init_ext2_fs(void)
{ {
int err = init_ext2_xattr(); int err;
if (err)
return err;
err = init_inodecache(); err = init_inodecache();
if (err) if (err)
goto out1; return err;
err = register_filesystem(&ext2_fs_type); err = register_filesystem(&ext2_fs_type);
if (err) if (err)
goto out; goto out;
return 0; return 0;
out: out:
destroy_inodecache(); destroy_inodecache();
out1:
exit_ext2_xattr();
return err; return err;
} }
...@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void) ...@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
{ {
unregister_filesystem(&ext2_fs_type); unregister_filesystem(&ext2_fs_type);
destroy_inodecache(); destroy_inodecache();
exit_ext2_xattr();
} }
MODULE_AUTHOR("Remy Card and others"); MODULE_AUTHOR("Remy Card and others");
......
This diff is collapsed.
...@@ -53,6 +53,8 @@ struct ext2_xattr_entry { ...@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
#define EXT2_XATTR_SIZE(size) \ #define EXT2_XATTR_SIZE(size) \
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
struct mb_cache;
# ifdef CONFIG_EXT2_FS_XATTR # ifdef CONFIG_EXT2_FS_XATTR
extern const struct xattr_handler ext2_xattr_user_handler; extern const struct xattr_handler ext2_xattr_user_handler;
...@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); ...@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern void ext2_xattr_delete_inode(struct inode *); extern void ext2_xattr_delete_inode(struct inode *);
extern void ext2_xattr_put_super(struct super_block *);
extern int init_ext2_xattr(void); extern struct mb_cache *ext2_xattr_create_cache(void);
extern void exit_ext2_xattr(void); extern void ext2_xattr_destroy_cache(struct mb_cache *cache);
extern const struct xattr_handler *ext2_xattr_handlers[]; extern const struct xattr_handler *ext2_xattr_handlers[];
...@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode) ...@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
{ {
} }
static inline void static inline void ext2_xattr_destroy_cache(struct mb_cache *cache)
ext2_xattr_put_super(struct super_block *sb)
{
}
static inline int
init_ext2_xattr(void)
{
return 0;
}
static inline void
exit_ext2_xattr(void)
{ {
} }
......
...@@ -41,6 +41,18 @@ ...@@ -41,6 +41,18 @@
* The fourth extended filesystem constants/structures * The fourth extended filesystem constants/structures
*/ */
/*
* with AGGRESSIVE_CHECK allocator runs consistency checks over
* structures. these checks slow things down a lot
*/
#define AGGRESSIVE_CHECK__
/*
* with DOUBLE_CHECK defined mballoc creates persistent in-core
* bitmaps, maintains and uses them to check for double allocations
*/
#define DOUBLE_CHECK__
/* /*
* Define EXT4FS_DEBUG to produce debug messages * Define EXT4FS_DEBUG to produce debug messages
*/ */
...@@ -182,9 +194,9 @@ typedef struct ext4_io_end { ...@@ -182,9 +194,9 @@ typedef struct ext4_io_end {
struct bio *bio; /* Linked list of completed struct bio *bio; /* Linked list of completed
* bios covering the extent */ * bios covering the extent */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
atomic_t count; /* reference counter */
loff_t offset; /* offset in the file */ loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */ ssize_t size; /* size of the extent */
atomic_t count; /* reference counter */
} ext4_io_end_t; } ext4_io_end_t;
struct ext4_io_submit { struct ext4_io_submit {
...@@ -1024,13 +1036,8 @@ struct ext4_inode_info { ...@@ -1024,13 +1036,8 @@ struct ext4_inode_info {
* transaction reserved * transaction reserved
*/ */
struct list_head i_rsv_conversion_list; struct list_head i_rsv_conversion_list;
/*
* Completed IOs that need unwritten extents handling and don't have
* transaction reserved
*/
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_rsv_conversion_work; struct work_struct i_rsv_conversion_work;
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock; spinlock_t i_block_reservation_lock;
...@@ -1513,16 +1520,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, ...@@ -1513,16 +1520,6 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
} }
} }
static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
{
return inode->i_private;
}
static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
{
inode->i_private = io;
}
/* /*
* Inode dynamic state flags * Inode dynamic state flags
*/ */
...@@ -2506,12 +2503,14 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); ...@@ -2506,12 +2503,14 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
int ext4_inode_is_fast_symlink(struct inode *inode); int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock, int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock, int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create); struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle, int ext4_walk_page_buffers(handle_t *handle,
...@@ -2559,6 +2558,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode, ...@@ -2559,6 +2558,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim); int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len); ext4_fsblk_t pblk, ext4_lblk_t len);
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned int map_len,
struct extent_status *result);
/* indirect.c */ /* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
...@@ -3285,10 +3287,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode) ...@@ -3285,10 +3287,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
#define EXT4_WQ_HASH_SZ 37 #define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\ #define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ]) EXT4_WQ_HASH_SZ])
#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
#define EXT4_RESIZING 0 #define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb); extern int ext4_resize_begin(struct super_block *sb);
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public Licens * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/ */
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public Licens * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/ */
...@@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, ...@@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/ */
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0; return 0;
/*
* The check for IO to unwritten extent is somewhat racy as we
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
* dropping i_data_sem. But reserved blocks should save us in that
* case.
*/
if (ext4_ext_is_unwritten(ex1) && if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
atomic_read(&EXT4_I(inode)->i_unwritten) || atomic_read(&EXT4_I(inode)->i_unwritten) ||
...@@ -2293,59 +2299,69 @@ static int ext4_fill_fiemap_extents(struct inode *inode, ...@@ -2293,59 +2299,69 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
} }
/* /*
* ext4_ext_put_gap_in_cache: * ext4_ext_determine_hole - determine hole around given block
* calculate boundaries of the gap that the requested block fits into * @inode: inode we lookup in
* and cache this gap * @path: path in extent tree to @lblk
* @lblk: pointer to logical block around which we want to determine hole
*
* Determine hole length (and start if easily possible) around given logical
* block. We don't try too hard to find the beginning of the hole but @path
* actually points to extent before @lblk, we provide it.
*
* The function returns the length of a hole starting at @lblk. We update @lblk
* to the beginning of the hole if we managed to find it.
*/ */
static void static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, struct ext4_ext_path *path,
ext4_lblk_t block) ext4_lblk_t *lblk)
{ {
int depth = ext_depth(inode); int depth = ext_depth(inode);
ext4_lblk_t len;
ext4_lblk_t lblock;
struct ext4_extent *ex; struct ext4_extent *ex;
struct extent_status es; ext4_lblk_t len;
ex = path[depth].p_ext; ex = path[depth].p_ext;
if (ex == NULL) { if (ex == NULL) {
/* there is no extent yet, so gap is [0;-] */ /* there is no extent yet, so gap is [0;-] */
lblock = 0; *lblk = 0;
len = EXT_MAX_BLOCKS; len = EXT_MAX_BLOCKS;
ext_debug("cache gap(whole file):"); } else if (*lblk < le32_to_cpu(ex->ee_block)) {
} else if (block < le32_to_cpu(ex->ee_block)) { len = le32_to_cpu(ex->ee_block) - *lblk;
lblock = block; } else if (*lblk >= le32_to_cpu(ex->ee_block)
len = le32_to_cpu(ex->ee_block) - block;
ext_debug("cache gap(before): %u [%u:%u]",
block,
le32_to_cpu(ex->ee_block),
ext4_ext_get_actual_len(ex));
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) { + ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next; ext4_lblk_t next;
lblock = le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex);
*lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
next = ext4_ext_next_allocated_block(path); next = ext4_ext_next_allocated_block(path);
ext_debug("cache gap(after): [%u:%u] %u", BUG_ON(next == *lblk);
le32_to_cpu(ex->ee_block), len = next - *lblk;
ext4_ext_get_actual_len(ex),
block);
BUG_ON(next == lblock);
len = next - lblock;
} else { } else {
BUG(); BUG();
} }
return len;
}
ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es); /*
* ext4_ext_put_gap_in_cache:
* calculate boundaries of the gap that the requested block fits into
* and cache this gap
*/
static void
ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
ext4_lblk_t hole_len)
{
struct extent_status es;
ext4_es_find_delayed_extent_range(inode, hole_start,
hole_start + hole_len - 1, &es);
if (es.es_len) { if (es.es_len) {
/* There's delayed extent containing lblock? */ /* There's delayed extent containing lblock? */
if (es.es_lblk <= lblock) if (es.es_lblk <= hole_start)
return; return;
len = min(es.es_lblk - lblock, len); hole_len = min(es.es_lblk - hole_start, hole_len);
} }
ext_debug(" -> %u:%u\n", lblock, len); ext_debug(" -> %u:%u\n", hole_start, hole_len);
ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE); ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
EXTENT_STATUS_HOLE);
} }
/* /*
...@@ -3927,7 +3943,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, ...@@ -3927,7 +3943,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
static int static int
convert_initialized_extent(handle_t *handle, struct inode *inode, convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, struct ext4_map_blocks *map,
struct ext4_ext_path **ppath, int flags, struct ext4_ext_path **ppath,
unsigned int allocated) unsigned int allocated)
{ {
struct ext4_ext_path *path = *ppath; struct ext4_ext_path *path = *ppath;
...@@ -4007,7 +4023,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4007,7 +4023,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path = *ppath; struct ext4_ext_path *path = *ppath;
int ret = 0; int ret = 0;
int err = 0; int err = 0;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical " ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n", "block %llu, max_blocks %u, flags %x, allocated %u\n",
...@@ -4030,15 +4045,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4030,15 +4045,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
flags | EXT4_GET_BLOCKS_CONVERT); flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0) if (ret <= 0)
goto out; goto out;
/*
* Flag the inode(non aio case) or end_io struct (aio case)
* that this IO needs to conversion to written when IO is
* completed
*/
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNWRITTEN; map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out; goto out;
} }
...@@ -4283,9 +4289,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4283,9 +4289,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int allocated = 0, offset = 0; unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0; unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar; struct ext4_allocation_request ar;
ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset; ext4_lblk_t cluster_offset;
int set_unwritten = 0;
bool map_from_cluster = false; bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n", ext_debug("blocks %u/%u requested for inode %lu\n",
...@@ -4347,7 +4351,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4347,7 +4351,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
allocated = convert_initialized_extent( allocated = convert_initialized_extent(
handle, inode, map, &path, handle, inode, map, &path,
flags, allocated); allocated);
goto out2; goto out2;
} else if (!ext4_ext_is_unwritten(ex)) } else if (!ext4_ext_is_unwritten(ex))
goto out; goto out;
...@@ -4368,11 +4372,22 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4368,11 +4372,22 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero * we couldn't try to create block if create flag is zero
*/ */
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
ext4_lblk_t hole_start, hole_len;
hole_start = map->m_lblk;
hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
/* /*
* put just found gap into cache to speed up * put just found gap into cache to speed up
* subsequent requests * subsequent requests
*/ */
ext4_ext_put_gap_in_cache(inode, path, map->m_lblk); ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
/* Update hole_len to reflect hole size after map->m_lblk */
if (hole_start != map->m_lblk)
hole_len -= map->m_lblk - hole_start;
map->m_pblk = 0;
map->m_len = min_t(unsigned int, map->m_len, hole_len);
goto out2; goto out2;
} }
...@@ -4482,15 +4497,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4482,15 +4497,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){ if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
ext4_ext_mark_unwritten(&newex); ext4_ext_mark_unwritten(&newex);
map->m_flags |= EXT4_MAP_UNWRITTEN; map->m_flags |= EXT4_MAP_UNWRITTEN;
/*
* io_end structure was created for every IO write to an
* unwritten extent. To avoid unnecessary conversion,
* here we flag the IO that really needs the conversion.
* For non asycn direct IO case, flag the inode state
* that we need to perform conversion when IO is done.
*/
if (flags & EXT4_GET_BLOCKS_PRE_IO)
set_unwritten = 1;
} }
err = 0; err = 0;
...@@ -4501,14 +4507,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4501,14 +4507,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
err = ext4_ext_insert_extent(handle, inode, &path, err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags); &newex, flags);
if (!err && set_unwritten) {
if (io)
ext4_set_io_unwritten_flag(inode, io);
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
if (err && free_on_err) { if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
......
...@@ -823,8 +823,8 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -823,8 +823,8 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
es->es_lblk = es1->es_lblk; es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len; es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk; es->es_pblk = es1->es_pblk;
if (!ext4_es_is_referenced(es)) if (!ext4_es_is_referenced(es1))
ext4_es_set_referenced(es); ext4_es_set_referenced(es1);
stats->es_stats_cache_hits++; stats->es_stats_cache_hits++;
} else { } else {
stats->es_stats_cache_misses++; stats->es_stats_cache_misses++;
......
...@@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp); struct inode *inode = file_inode(iocb->ki_filp);
struct mutex *aio_mutex = NULL;
struct blk_plug plug; struct blk_plug plug;
int o_direct = iocb->ki_flags & IOCB_DIRECT; int o_direct = iocb->ki_flags & IOCB_DIRECT;
int unaligned_aio = 0;
int overwrite = 0; int overwrite = 0;
ssize_t ret; ssize_t ret;
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
/* /*
* Unaligned direct AIO must be serialized; see comment above * Unaligned direct AIO must be serialized among each other as zeroing
* In the case of O_APPEND, assume that we must always serialize * of partial blocks of two competing unaligned AIOs can result in data
* corruption.
*/ */
if (o_direct && if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) && !is_sync_kiocb(iocb) &&
(iocb->ki_flags & IOCB_APPEND || ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
ext4_unaligned_aio(inode, from, iocb->ki_pos))) { unaligned_aio = 1;
aio_mutex = ext4_aio_mutex(inode);
mutex_lock(aio_mutex);
ext4_unwritten_wait(inode); ext4_unwritten_wait(inode);
} }
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
/* /*
* If we have encountered a bitmap-format file, the size limit * If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files. * is smaller than s_maxbytes, which is for extent-mapped files.
...@@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug); blk_start_plug(&plug);
/* check whether we do a DIO overwrite or not */ /* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !aio_mutex && if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
...@@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct) if (o_direct)
blk_finish_plug(&plug); blk_finish_plug(&plug);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret; return ret;
out: out:
inode_unlock(inode); inode_unlock(inode);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret; return ret;
} }
...@@ -417,7 +411,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) ...@@ -417,7 +411,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
*/ */
static int ext4_find_unwritten_pgoff(struct inode *inode, static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence, int whence,
struct ext4_map_blocks *map, ext4_lblk_t end_blk,
loff_t *offset) loff_t *offset)
{ {
struct pagevec pvec; struct pagevec pvec;
...@@ -432,7 +426,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, ...@@ -432,7 +426,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
blkbits = inode->i_sb->s_blocksize_bits; blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset; startoff = *offset;
lastoff = startoff; lastoff = startoff;
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_CACHE_SHIFT; index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT;
...@@ -550,12 +544,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, ...@@ -550,12 +544,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map;
struct extent_status es; struct extent_status es;
ext4_lblk_t start, last, end; ext4_lblk_t start, last, end;
loff_t dataoff, isize; loff_t dataoff, isize;
int blkbits; int blkbits;
int ret = 0; int ret;
inode_lock(inode); inode_lock(inode);
...@@ -572,41 +565,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ...@@ -572,41 +565,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = offset; dataoff = offset;
do { do {
map.m_lblk = last; ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
map.m_len = end - last + 1; if (ret <= 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0); /* No extent found -> no data */
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { if (ret == 0)
if (last != start) ret = -ENXIO;
dataoff = (loff_t)last << blkbits; inode_unlock(inode);
break; return ret;
} }
/* last = es.es_lblk;
* If there is a delay extent at this offset, if (last != start)
* it will be as a data. dataoff = (loff_t)last << blkbits;
*/ if (!ext4_es_is_unwritten(&es))
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
dataoff = (loff_t)last << blkbits;
break; break;
}
/* /*
* If there is a unwritten extent at this offset, * If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page * it will be as a data or a hole according to page
* cache that has data or not. * cache that has data or not.
*/ */
if (map.m_flags & EXT4_MAP_UNWRITTEN) { if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
int unwritten; es.es_lblk + es.es_len, &dataoff))
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, break;
&map, &dataoff); last += es.es_len;
if (unwritten)
break;
}
last++;
dataoff = (loff_t)last << blkbits; dataoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end); } while (last <= end);
inode_unlock(inode); inode_unlock(inode);
...@@ -623,12 +607,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ...@@ -623,12 +607,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map;
struct extent_status es; struct extent_status es;
ext4_lblk_t start, last, end; ext4_lblk_t start, last, end;
loff_t holeoff, isize; loff_t holeoff, isize;
int blkbits; int blkbits;
int ret = 0; int ret;
inode_lock(inode); inode_lock(inode);
...@@ -645,44 +628,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ...@@ -645,44 +628,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
holeoff = offset; holeoff = offset;
do { do {
map.m_lblk = last; ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
map.m_len = end - last + 1; if (ret < 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0); inode_unlock(inode);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { return ret;
last += ret;
holeoff = (loff_t)last << blkbits;
continue;
} }
/* Found a hole? */
/* if (ret == 0 || es.es_lblk > last) {
* If there is a delay extent at this offset, if (last != start)
* we will skip this extent. holeoff = (loff_t)last << blkbits;
*/ break;
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
holeoff = (loff_t)last << blkbits;
continue;
} }
/* /*
* If there is a unwritten extent at this offset, * If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page * it will be as a data or a hole according to page
* cache that has data or not. * cache that has data or not.
*/ */
if (map.m_flags & EXT4_MAP_UNWRITTEN) { if (ext4_es_is_unwritten(&es) &&
int unwritten; ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, last + es.es_len, &holeoff))
&map, &holeoff); break;
if (!unwritten) {
last += ret;
holeoff = (loff_t)last << blkbits;
continue;
}
}
/* find a hole */ last += es.es_len;
break; holeoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end); } while (last <= end);
inode_unlock(inode); inode_unlock(inode);
......
...@@ -787,7 +787,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, ...@@ -787,7 +787,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
/* /*
* Initalize owners and quota early so that we don't have to account * Initialize owners and quota early so that we don't have to account
* for quota initialization worst case in standard inode creating * for quota initialization worst case in standard inode creating
* transaction * transaction
*/ */
......
...@@ -555,8 +555,23 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ...@@ -555,8 +555,23 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
goto got_it; goto got_it;
} }
/* Next simple case - plain lookup or failed read of indirect block */ /* Next simple case - plain lookup failed */
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
int i;
/* Count number blocks in a subtree under 'partial' */
count = 1;
for (i = 0; partial + i != chain + depth - 1; i++)
count *= epb;
/* Fill in size of a hole we found */
map->m_pblk = 0;
map->m_len = min_t(unsigned int, map->m_len, count);
goto cleanup;
}
/* Failed read of indirect block */
if (err == -EIO)
goto cleanup; goto cleanup;
/* /*
...@@ -693,21 +708,21 @@ ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ...@@ -693,21 +708,21 @@ ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
} }
if (IS_DAX(inode)) if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset, ret = dax_do_io(iocb, inode, iter, offset,
ext4_get_block, NULL, 0); ext4_dio_get_block, NULL, 0);
else else
ret = __blockdev_direct_IO(iocb, inode, ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter, inode->i_sb->s_bdev, iter,
offset, ext4_get_block, NULL, offset, ext4_dio_get_block,
NULL, 0); NULL, NULL, 0);
inode_dio_end(inode); inode_dio_end(inode);
} else { } else {
locked: locked:
if (IS_DAX(inode)) if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset, ret = dax_do_io(iocb, inode, iter, offset,
ext4_get_block, NULL, DIO_LOCKING); ext4_dio_get_block, NULL, DIO_LOCKING);
else else
ret = blockdev_direct_IO(iocb, inode, iter, offset, ret = blockdev_direct_IO(iocb, inode, iter, offset,
ext4_get_block); ext4_dio_get_block);
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode); loff_t isize = i_size_read(inode);
......
...@@ -581,9 +581,10 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping, ...@@ -581,9 +581,10 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
if (ret) if (ret)
goto out; goto out;
if (ext4_should_dioread_nolock(inode)) if (ext4_should_dioread_nolock(inode)) {
ret = __block_write_begin(page, from, to, ext4_get_block_write); ret = __block_write_begin(page, from, to,
else ext4_get_block_unwritten);
} else
ret = __block_write_begin(page, from, to, ext4_get_block); ret = __block_write_begin(page, from, to, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) { if (!ret && ext4_should_journal_data(inode)) {
...@@ -1696,7 +1697,6 @@ int ext4_delete_inline_entry(handle_t *handle, ...@@ -1696,7 +1697,6 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err) if (err)
goto out; goto out;
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_mark_inode_dirty(handle, dir); err = ext4_mark_inode_dirty(handle, dir);
if (unlikely(err)) if (unlikely(err))
goto out; goto out;
......
This diff is collapsed.
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public Licens * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/ */
...@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b) ...@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine! * for this page; do not hold this lock when calling this routine!
*/ */
static int ext4_mb_init_cache(struct page *page, char *incore) static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
{ {
ext4_group_t ngroups; ext4_group_t ngroups;
int blocksize; int blocksize;
...@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */ /* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) { if (groups_per_page > 1) {
i = sizeof(struct buffer_head *) * groups_per_page; i = sizeof(struct buffer_head *) * groups_per_page;
bh = kzalloc(i, GFP_NOFS); bh = kzalloc(i, gfp);
if (bh == NULL) { if (bh == NULL) {
err = -ENOMEM; err = -ENOMEM;
goto out; goto out;
...@@ -983,7 +983,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -983,7 +983,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
* are on the same page e4b->bd_buddy_page is NULL and return value is 0. * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
*/ */
static int ext4_mb_get_buddy_page_lock(struct super_block *sb, static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
ext4_group_t group, struct ext4_buddy *e4b) ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
{ {
struct inode *inode = EXT4_SB(sb)->s_buddy_cache; struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff; int block, pnum, poff;
...@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, ...@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2; block = group * 2;
pnum = block / blocks_per_page; pnum = block / blocks_per_page;
poff = block % blocks_per_page; poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
...@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb, ...@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block++; block++;
pnum = block / blocks_per_page; pnum = block / blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
...@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b) ...@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
* calling this routine! * calling this routine!
*/ */
static noinline_for_stack static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
{ {
struct ext4_group_info *this_grp; struct ext4_group_info *this_grp;
...@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ...@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* The call to ext4_mb_get_buddy_page_lock will mark the * The call to ext4_mb_get_buddy_page_lock will mark the
* page accessed. * page accessed.
*/ */
ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b); ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) { if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
/* /*
* somebody initialized the group * somebody initialized the group
...@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ...@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
} }
page = e4b.bd_bitmap_page; page = e4b.bd_bitmap_page;
ret = ext4_mb_init_cache(page, NULL); ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret) if (ret)
goto err; goto err;
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
...@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ...@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
} }
/* init buddy cache */ /* init buddy cache */
page = e4b.bd_buddy_page; page = e4b.bd_buddy_page;
ret = ext4_mb_init_cache(page, e4b.bd_bitmap); ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
if (ret) if (ret)
goto err; goto err;
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
...@@ -1109,8 +1109,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) ...@@ -1109,8 +1109,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* calling this routine! * calling this routine!
*/ */
static noinline_for_stack int static noinline_for_stack int
ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
struct ext4_buddy *e4b) struct ext4_buddy *e4b, gfp_t gfp)
{ {
int blocks_per_page; int blocks_per_page;
int block; int block;
...@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* we need full data about the group * we need full data about the group
* to make a good selection * to make a good selection
*/ */
ret = ext4_mb_init_group(sb, group); ret = ext4_mb_init_group(sb, group, gfp);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* wait for it to initialize. * wait for it to initialize.
*/ */
page_cache_release(page); page_cache_release(page);
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) { if (page) {
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = ext4_mb_init_cache(page, NULL); ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret) { if (ret) {
unlock_page(page); unlock_page(page);
goto err; goto err;
...@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
if (page == NULL || !PageUptodate(page)) { if (page == NULL || !PageUptodate(page)) {
if (page) if (page)
page_cache_release(page); page_cache_release(page);
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) { if (page) {
BUG_ON(page->mapping != inode->i_mapping); BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = ext4_mb_init_cache(page, e4b->bd_bitmap); ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
gfp);
if (ret) { if (ret) {
unlock_page(page); unlock_page(page);
goto err; goto err;
...@@ -1247,6 +1248,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, ...@@ -1247,6 +1248,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
return ret; return ret;
} }
static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
struct ext4_buddy *e4b)
{
return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
}
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b) static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{ {
if (e4b->bd_bitmap_page) if (e4b->bd_bitmap_page)
...@@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, ...@@ -2045,7 +2052,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
/* We only do this if the grp has never been initialized */ /* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group); int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -4694,16 +4701,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4694,16 +4701,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
inode, bh, block); inode, bh, block);
} }
/*
* We need to make sure we don't reuse the freed block until
* after the transaction is committed, which we can do by
* treating the block as metadata, below. We make an
* exception if the inode is to be written in writeback mode
* since writeback mode has weak data consistency guarantees.
*/
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
/* /*
* If the extent to be freed does not begin on a cluster * If the extent to be freed does not begin on a cluster
* boundary, we need to deal with partial clusters at the * boundary, we need to deal with partial clusters at the
...@@ -4738,14 +4735,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4738,14 +4735,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) { if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
int i; int i;
int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
cond_resched(); cond_resched();
bh = sb_find_get_block(inode->i_sb, block + i); if (is_metadata)
if (!bh) bh = sb_find_get_block(inode->i_sb, block + i);
continue; ext4_forget(handle, is_metadata, inode, bh, block + i);
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, bh, block + i);
} }
} }
...@@ -4815,16 +4811,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4815,16 +4811,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
#endif #endif
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters); trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
err = ext4_mb_load_buddy(sb, block_group, &e4b); /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
GFP_NOFS|__GFP_NOFAIL);
if (err) if (err)
goto error_return; goto error_return;
if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) { /*
* We need to make sure we don't reuse the freed block until after the
* transaction is committed. We make an exception if the inode is to be
* written in writeback mode since writeback mode has weak data
* consistency guarantees.
*/
if (ext4_handle_valid(handle) &&
((flags & EXT4_FREE_BLOCKS_METADATA) ||
!ext4_should_writeback_data(inode))) {
struct ext4_free_data *new_entry; struct ext4_free_data *new_entry;
/* /*
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
*
* We use __GFP_NOFAIL because ext4_free_blocks() is not allowed * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
* to fail. * to fail.
*/ */
...@@ -5217,7 +5220,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ...@@ -5217,7 +5220,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
grp = ext4_get_group_info(sb, group); grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */ /* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
ret = ext4_mb_init_group(sb, group); ret = ext4_mb_init_group(sb, group, GFP_NOFS);
if (ret) if (ret)
break; break;
} }
......
...@@ -22,18 +22,6 @@ ...@@ -22,18 +22,6 @@
#include "ext4_jbd2.h" #include "ext4_jbd2.h"
#include "ext4.h" #include "ext4.h"
/*
* with AGGRESSIVE_CHECK allocator runs consistency checks over
* structures. these checks slow things down a lot
*/
#define AGGRESSIVE_CHECK__
/*
* with DOUBLE_CHECK defined mballoc creates persistent in-core
* bitmaps, maintains and uses them to check for double allocations
*/
#define DOUBLE_CHECK__
/* /*
*/ */
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
......
...@@ -361,7 +361,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, ...@@ -361,7 +361,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* blocks. * blocks.
* *
* While converting to extents we need not * While converting to extents we need not
* update the orignal inode i_blocks for extent blocks * update the original inode i_blocks for extent blocks
* via quota APIs. The quota update happened via tmp_inode already. * via quota APIs. The quota update happened via tmp_inode already.
*/ */
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
......
...@@ -91,21 +91,22 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh, ...@@ -91,21 +91,22 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh); submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh); wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) { if (!buffer_uptodate(*bh)) {
brelse(*bh);
*bh = NULL;
ret = -EIO; ret = -EIO;
goto warn_exit; goto warn_exit;
} }
mmp = (struct mmp_struct *)((*bh)->b_data); mmp = (struct mmp_struct *)((*bh)->b_data);
if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
ret = -EFSCORRUPTED; ret = -EFSCORRUPTED;
else if (!ext4_mmp_csum_verify(sb, mmp)) goto warn_exit;
}
if (!ext4_mmp_csum_verify(sb, mmp)) {
ret = -EFSBADCRC; ret = -EFSBADCRC;
else goto warn_exit;
return 0; }
return 0;
warn_exit: warn_exit:
brelse(*bh);
*bh = NULL;
ext4_warning(sb, "Error %d while reading MMP block %llu", ext4_warning(sb, "Error %d while reading MMP block %llu",
ret, mmp_block); ret, mmp_block);
return ret; return ret;
...@@ -181,15 +182,13 @@ static int kmmpd(void *data) ...@@ -181,15 +182,13 @@ static int kmmpd(void *data)
EXT4_FEATURE_INCOMPAT_MMP)) { EXT4_FEATURE_INCOMPAT_MMP)) {
ext4_warning(sb, "kmmpd being stopped since MMP feature" ext4_warning(sb, "kmmpd being stopped since MMP feature"
" has been disabled."); " has been disabled.");
EXT4_SB(sb)->s_mmp_tsk = NULL; goto exit_thread;
goto failed;
} }
if (sb->s_flags & MS_RDONLY) { if (sb->s_flags & MS_RDONLY) {
ext4_warning(sb, "kmmpd being stopped since filesystem " ext4_warning(sb, "kmmpd being stopped since filesystem "
"has been remounted as readonly."); "has been remounted as readonly.");
EXT4_SB(sb)->s_mmp_tsk = NULL; goto exit_thread;
goto failed;
} }
diff = jiffies - last_update_time; diff = jiffies - last_update_time;
...@@ -211,9 +210,7 @@ static int kmmpd(void *data) ...@@ -211,9 +210,7 @@ static int kmmpd(void *data)
if (retval) { if (retval) {
ext4_error(sb, "error reading MMP data: %d", ext4_error(sb, "error reading MMP data: %d",
retval); retval);
goto exit_thread;
EXT4_SB(sb)->s_mmp_tsk = NULL;
goto failed;
} }
mmp_check = (struct mmp_struct *)(bh_check->b_data); mmp_check = (struct mmp_struct *)(bh_check->b_data);
...@@ -225,7 +222,9 @@ static int kmmpd(void *data) ...@@ -225,7 +222,9 @@ static int kmmpd(void *data)
"The filesystem seems to have been" "The filesystem seems to have been"
" multiply mounted."); " multiply mounted.");
ext4_error(sb, "abort"); ext4_error(sb, "abort");
goto failed; put_bh(bh_check);
retval = -EBUSY;
goto exit_thread;
} }
put_bh(bh_check); put_bh(bh_check);
} }
...@@ -248,7 +247,8 @@ static int kmmpd(void *data) ...@@ -248,7 +247,8 @@ static int kmmpd(void *data)
retval = write_mmp_block(sb, bh); retval = write_mmp_block(sb, bh);
failed: exit_thread:
EXT4_SB(sb)->s_mmp_tsk = NULL;
kfree(data); kfree(data);
brelse(bh); brelse(bh);
return retval; return retval;
......
...@@ -128,9 +128,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) ...@@ -128,9 +128,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle); WARN_ON(io_end->handle);
if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io_end->inode));
for (bio = io_end->bio; bio; bio = next_bio) { for (bio = io_end->bio; bio; bio = next_bio) {
next_bio = bio->bi_private; next_bio = bio->bi_private;
ext4_finish_bio(bio); ext4_finish_bio(bio);
...@@ -265,7 +262,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) ...@@ -265,7 +262,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{ {
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) { if (io) {
atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode; io->inode = inode;
INIT_LIST_HEAD(&io->list); INIT_LIST_HEAD(&io->list);
atomic_set(&io->count, 1); atomic_set(&io->count, 1);
......
...@@ -55,7 +55,6 @@ ...@@ -55,7 +55,6 @@
static struct ext4_lazy_init *ext4_li_info; static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx; static struct mutex ext4_li_mtx;
static int ext4_mballoc_ready;
static struct ratelimit_state ext4_mount_msg_ratelimit; static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *, static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
...@@ -844,7 +843,6 @@ static void ext4_put_super(struct super_block *sb) ...@@ -844,7 +843,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb); ext4_release_system_zone(sb);
ext4_mb_release(sb); ext4_mb_release(sb);
ext4_ext_release(sb); ext4_ext_release(sb);
ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) { if (!(sb->s_flags & MS_RDONLY)) {
ext4_clear_feature_journal_needs_recovery(sb); ext4_clear_feature_journal_needs_recovery(sb);
...@@ -944,7 +942,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ...@@ -944,7 +942,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_completed_io_lock); spin_lock_init(&ei->i_completed_io_lock);
ei->i_sync_tid = 0; ei->i_sync_tid = 0;
ei->i_datasync_tid = 0; ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_unwritten, 0); atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
#ifdef CONFIG_EXT4_FS_ENCRYPTION #ifdef CONFIG_EXT4_FS_ENCRYPTION
...@@ -1425,9 +1422,9 @@ static const struct mount_opts { ...@@ -1425,9 +1422,9 @@ static const struct mount_opts {
{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, {Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, {Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, {Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
MOPT_NO_EXT2 | MOPT_SET}, MOPT_NO_EXT2},
{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, {Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
MOPT_NO_EXT2 | MOPT_CLEAR}, MOPT_NO_EXT2},
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
...@@ -1705,6 +1702,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -1705,6 +1702,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
ext4_msg(sb, KERN_INFO, "dax option not supported"); ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1; return -1;
#endif #endif
} else if (token == Opt_data_err_abort) {
sbi->s_mount_opt |= m->mount_opt;
} else if (token == Opt_data_err_ignore) {
sbi->s_mount_opt &= ~m->mount_opt;
} else { } else {
if (!args->from) if (!args->from)
arg = 1; arg = 1;
...@@ -1914,6 +1915,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, ...@@ -1914,6 +1915,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb) if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
if (test_opt(sb, DATA_ERR_ABORT))
SEQ_OPTS_PUTS("data_err=abort");
ext4_show_quota_options(seq, sb); ext4_show_quota_options(seq, sb);
return 0; return 0;
...@@ -3796,12 +3799,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -3796,12 +3799,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal: no_journal:
if (ext4_mballoc_ready) { sbi->s_mb_cache = ext4_xattr_create_cache();
sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); if (!sbi->s_mb_cache) {
if (!sbi->s_mb_cache) { ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); goto failed_mount_wq;
goto failed_mount_wq;
}
} }
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
...@@ -4027,6 +4028,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ...@@ -4027,6 +4028,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (EXT4_SB(sb)->rsv_conversion_wq) if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq: failed_mount_wq:
if (sbi->s_mb_cache) {
ext4_xattr_destroy_cache(sbi->s_mb_cache);
sbi->s_mb_cache = NULL;
}
if (sbi->s_journal) { if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal); jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL; sbi->s_journal = NULL;
...@@ -5321,7 +5326,6 @@ MODULE_ALIAS_FS("ext4"); ...@@ -5321,7 +5326,6 @@ MODULE_ALIAS_FS("ext4");
/* Shared across all ext4 file systems */ /* Shared across all ext4 file systems */
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
static int __init ext4_init_fs(void) static int __init ext4_init_fs(void)
{ {
...@@ -5334,10 +5338,8 @@ static int __init ext4_init_fs(void) ...@@ -5334,10 +5338,8 @@ static int __init ext4_init_fs(void)
/* Build-time check for flags consistency */ /* Build-time check for flags consistency */
ext4_check_flag_values(); ext4_check_flag_values();
for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
mutex_init(&ext4__aio_mutex[i]);
init_waitqueue_head(&ext4__ioend_wq[i]); init_waitqueue_head(&ext4__ioend_wq[i]);
}
err = ext4_init_es(); err = ext4_init_es();
if (err) if (err)
...@@ -5358,8 +5360,6 @@ static int __init ext4_init_fs(void) ...@@ -5358,8 +5360,6 @@ static int __init ext4_init_fs(void)
err = ext4_init_mballoc(); err = ext4_init_mballoc();
if (err) if (err)
goto out2; goto out2;
else
ext4_mballoc_ready = 1;
err = init_inodecache(); err = init_inodecache();
if (err) if (err)
goto out1; goto out1;
...@@ -5375,7 +5375,6 @@ static int __init ext4_init_fs(void) ...@@ -5375,7 +5375,6 @@ static int __init ext4_init_fs(void)
unregister_as_ext3(); unregister_as_ext3();
destroy_inodecache(); destroy_inodecache();
out1: out1:
ext4_mballoc_ready = 0;
ext4_exit_mballoc(); ext4_exit_mballoc();
out2: out2:
ext4_exit_sysfs(); ext4_exit_sysfs();
......
...@@ -545,30 +545,44 @@ static void ...@@ -545,30 +545,44 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode, ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh) struct buffer_head *bh)
{ {
struct mb_cache_entry *ce = NULL;
int error = 0;
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
u32 hash, ref;
int error = 0;
ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access"); BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh); error = ext4_journal_get_write_access(handle, bh);
if (error) if (error)
goto out; goto out;
lock_buffer(bh); lock_buffer(bh);
if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
if (ref == 1) {
ea_bdebug(bh, "refcount now=0; freeing"); ea_bdebug(bh, "refcount now=0; freeing");
if (ce) /*
mb_cache_entry_free(ce); * This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
get_bh(bh); get_bh(bh);
unlock_buffer(bh); unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1, ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET); EXT4_FREE_BLOCKS_FORGET);
} else { } else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1); ref--;
if (ce) BHDR(bh)->h_refcount = cpu_to_le32(ref);
mb_cache_entry_release(ce); if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
struct mb_cache_entry *ce;
ce = mb_cache_entry_get(ext4_mb_cache, hash,
bh->b_blocknr);
if (ce) {
ce->e_reusable = 1;
mb_cache_entry_put(ext4_mb_cache, ce);
}
}
/* /*
* Beware of this ugliness: Releasing of xattr block references * Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect * from different inodes can race and so we have to protect
...@@ -790,8 +804,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -790,8 +804,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (i->value && i->value_len > sb->s_blocksize) if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC; return -ENOSPC;
if (s->base) { if (s->base) {
ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access"); BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh); error = ext4_journal_get_write_access(handle, bs->bh);
if (error) if (error)
...@@ -799,10 +811,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -799,10 +811,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh); lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) { if (header(s->base)->h_refcount == cpu_to_le32(1)) {
if (ce) { __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
mb_cache_entry_free(ce);
ce = NULL; /*
} * This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect modified
* block
*/
mb_cache_entry_delete_block(ext4_mb_cache, hash,
bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place"); ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s); error = ext4_xattr_set_entry(i, s);
if (!error) { if (!error) {
...@@ -826,10 +843,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -826,10 +843,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data; int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh); unlock_buffer(bs->bh);
if (ce) {
mb_cache_entry_release(ce);
ce = NULL;
}
ea_bdebug(bs->bh, "cloning"); ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS); s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM; error = -ENOMEM;
...@@ -872,6 +885,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -872,6 +885,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (new_bh == bs->bh) if (new_bh == bs->bh)
ea_bdebug(new_bh, "keeping"); ea_bdebug(new_bh, "keeping");
else { else {
u32 ref;
/* The old block is released after updating /* The old block is released after updating
the inode. */ the inode. */
error = dquot_alloc_block(inode, error = dquot_alloc_block(inode,
...@@ -884,9 +899,40 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -884,9 +899,40 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error) if (error)
goto cleanup_dquot; goto cleanup_dquot;
lock_buffer(new_bh); lock_buffer(new_bh);
le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); /*
* We have to be careful about races with
* freeing, rehashing or adding references to
* xattr block. Once we hold buffer lock xattr
* block's state is stable so we can check
* whether the block got freed / rehashed or
* not. Since we unhash mbcache entry under
* buffer lock when freeing / rehashing xattr
* block, checking whether entry is still
* hashed is reliable. Same rules hold for
* e_reusable handling.
*/
if (hlist_bl_unhashed(&ce->e_hash_list) ||
!ce->e_reusable) {
/*
* Undo everything and check mbcache
* again.
*/
unlock_buffer(new_bh);
dquot_free_block(inode,
EXT4_C2B(EXT4_SB(sb),
1));
brelse(new_bh);
mb_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
new_bh = NULL;
goto inserted;
}
ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
if (ref >= EXT4_XATTR_REFCOUNT_MAX)
ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d", ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount)); ref);
unlock_buffer(new_bh); unlock_buffer(new_bh);
error = ext4_handle_dirty_xattr_block(handle, error = ext4_handle_dirty_xattr_block(handle,
inode, inode,
...@@ -894,7 +940,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -894,7 +940,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error) if (error)
goto cleanup_dquot; goto cleanup_dquot;
} }
mb_cache_entry_release(ce); mb_cache_entry_touch(ext4_mb_cache, ce);
mb_cache_entry_put(ext4_mb_cache, ce);
ce = NULL; ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) { } else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */ /* We were modifying this block in-place. */
...@@ -959,7 +1006,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ...@@ -959,7 +1006,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
cleanup: cleanup:
if (ce) if (ce)
mb_cache_entry_release(ce); mb_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh); brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data)) if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base); kfree(s->base);
...@@ -1070,6 +1117,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, ...@@ -1070,6 +1117,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
return 0; return 0;
} }
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
struct ext4_xattr_info *i)
{
void *value;
if (le32_to_cpu(s->here->e_value_size) != i->value_len)
return 0;
value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
return !memcmp(value, i->value, i->value_len);
}
/* /*
* ext4_xattr_set_handle() * ext4_xattr_set_handle()
* *
...@@ -1146,6 +1204,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ...@@ -1146,6 +1204,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
else if (!bs.s.not_found) else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs); error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else { } else {
error = 0;
/* Xattr value did not change? Save us some work and bail out */
if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
goto cleanup;
if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
goto cleanup;
error = ext4_xattr_ibody_set(handle, inode, &i, &is); error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) { if (!error && !bs.s.not_found) {
i.value = NULL; i.value = NULL;
...@@ -1511,17 +1576,6 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode) ...@@ -1511,17 +1576,6 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
brelse(bh); brelse(bh);
} }
/*
* ext4_xattr_put_super()
*
* This is called when a file system is unmounted.
*/
void
ext4_xattr_put_super(struct super_block *sb)
{
mb_cache_shrink(sb->s_bdev);
}
/* /*
* ext4_xattr_cache_insert() * ext4_xattr_cache_insert()
* *
...@@ -1533,26 +1587,19 @@ ext4_xattr_put_super(struct super_block *sb) ...@@ -1533,26 +1587,19 @@ ext4_xattr_put_super(struct super_block *sb)
static void static void
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
{ {
__u32 hash = le32_to_cpu(BHDR(bh)->h_hash); struct ext4_xattr_header *header = BHDR(bh);
struct mb_cache_entry *ce; __u32 hash = le32_to_cpu(header->h_hash);
int reusable = le32_to_cpu(header->h_refcount) <
EXT4_XATTR_REFCOUNT_MAX;
int error; int error;
ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
if (!ce) { bh->b_blocknr, reusable);
ea_bdebug(bh, "out of memory");
return;
}
error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
if (error) { if (error) {
mb_cache_entry_free(ce); if (error == -EBUSY)
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache"); ea_bdebug(bh, "already in cache");
error = 0; } else
}
} else {
ea_bdebug(bh, "inserting [%x]", (int)hash); ea_bdebug(bh, "inserting [%x]", (int)hash);
mb_cache_entry_release(ce);
}
} }
/* /*
...@@ -1614,33 +1661,20 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, ...@@ -1614,33 +1661,20 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
if (!header->h_hash) if (!header->h_hash)
return NULL; /* never share */ return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again: ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
hash);
while (ce) { while (ce) {
struct buffer_head *bh; struct buffer_head *bh;
if (IS_ERR(ce)) {
if (PTR_ERR(ce) == -EAGAIN)
goto again;
break;
}
bh = sb_bread(inode->i_sb, ce->e_block); bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) { if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error", EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long) ce->e_block); (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT4_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %lu refcount %d>=%d",
(unsigned long) ce->e_block,
le32_to_cpu(BHDR(bh)->h_refcount),
EXT4_XATTR_REFCOUNT_MAX);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce; *pce = ce;
return bh; return bh;
} }
brelse(bh); brelse(bh);
ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
} }
return NULL; return NULL;
} }
...@@ -1716,9 +1750,9 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, ...@@ -1716,9 +1750,9 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10 #define HASH_BUCKET_BITS 10
struct mb_cache * struct mb_cache *
ext4_xattr_create_cache(char *name) ext4_xattr_create_cache(void)
{ {
return mb_cache_create(name, HASH_BUCKET_BITS); return mb_cache_create(HASH_BUCKET_BITS);
} }
void ext4_xattr_destroy_cache(struct mb_cache *cache) void ext4_xattr_destroy_cache(struct mb_cache *cache)
......
...@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_ ...@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *); extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle); struct ext4_inode *raw_inode, handle_t *handle);
...@@ -124,7 +123,7 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, ...@@ -124,7 +123,7 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i, struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is); struct ext4_xattr_ibody_find *is);
extern struct mb_cache *ext4_xattr_create_cache(char *name); extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *); extern void ext4_xattr_destroy_cache(struct mb_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY #ifdef CONFIG_EXT4_FS_SECURITY
......
...@@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal, ...@@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal,
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return 0; return 0;
bh = jbd2_journal_get_descriptor_buffer(journal); bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
JBD2_COMMIT_BLOCK);
if (!bh) if (!bh)
return 1; return 1;
tmp = (struct commit_header *)bh->b_data; tmp = (struct commit_header *)bh->b_data;
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tmp->h_commit_sec = cpu_to_be64(now.tv_sec); tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
...@@ -222,7 +220,7 @@ static int journal_submit_data_buffers(journal_t *journal, ...@@ -222,7 +220,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping; mapping = jinode->i_vfs_inode->i_mapping;
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
/* /*
* submit the inode data buffers. We use writepage * submit the inode data buffers. We use writepage
...@@ -236,8 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal, ...@@ -236,8 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err; ret = err;
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction); J_ASSERT(jinode->i_transaction == commit_transaction);
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb__after_atomic(); smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
...@@ -258,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, ...@@ -258,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */ /* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) { if (err) {
...@@ -274,8 +272,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal, ...@@ -274,8 +272,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err; ret = err;
} }
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); jinode->i_flags &= ~JI_COMMIT_RUNNING;
smp_mb__after_atomic(); smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
} }
...@@ -319,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag, ...@@ -319,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
} }
static void jbd2_descr_block_csum_set(journal_t *j,
struct buffer_head *bh)
{
struct jbd2_journal_block_tail *tail;
__u32 csum;
if (!jbd2_journal_has_csum_v2or3(j))
return;
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_block_tail));
tail->t_checksum = 0;
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
tail->t_checksum = cpu_to_be32(csum);
}
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence) struct buffer_head *bh, __u32 sequence)
{ {
...@@ -379,7 +361,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -379,7 +361,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
ktime_t start_time; ktime_t start_time;
u64 commit_time; u64 commit_time;
char *tagp = NULL; char *tagp = NULL;
journal_header_t *header;
journal_block_tag_t *tag = NULL; journal_block_tag_t *tag = NULL;
int space_left = 0; int space_left = 0;
int first_tag = 0; int first_tag = 0;
...@@ -554,8 +535,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -554,8 +535,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_abort(journal, err); jbd2_journal_abort(journal, err);
blk_start_plug(&plug); blk_start_plug(&plug);
jbd2_journal_write_revoke_records(journal, commit_transaction, jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
&log_bufs, WRITE_SYNC);
jbd_debug(3, "JBD2: commit phase 2b\n"); jbd_debug(3, "JBD2: commit phase 2b\n");
...@@ -616,7 +596,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -616,7 +596,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: get descriptor\n"); jbd_debug(4, "JBD2: get descriptor\n");
descriptor = jbd2_journal_get_descriptor_buffer(journal); descriptor = jbd2_journal_get_descriptor_buffer(
commit_transaction,
JBD2_DESCRIPTOR_BLOCK);
if (!descriptor) { if (!descriptor) {
jbd2_journal_abort(journal, -EIO); jbd2_journal_abort(journal, -EIO);
continue; continue;
...@@ -625,11 +607,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -625,11 +607,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: got buffer %llu (%p)\n", jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)descriptor->b_blocknr, (unsigned long long)descriptor->b_blocknr,
descriptor->b_data); descriptor->b_data);
header = (journal_header_t *)descriptor->b_data;
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tagp = &descriptor->b_data[sizeof(journal_header_t)]; tagp = &descriptor->b_data[sizeof(journal_header_t)];
space_left = descriptor->b_size - space_left = descriptor->b_size -
sizeof(journal_header_t); sizeof(journal_header_t);
...@@ -721,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -721,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
jbd2_descr_block_csum_set(journal, descriptor); jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io: start_journal_io:
for (i = 0; i < bufs; i++) { for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i]; struct buffer_head *bh = wbuf[i];
......
...@@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, ...@@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
* But we don't bother doing that, so there will be coherency problems with * But we don't bother doing that, so there will be coherency problems with
* mmaps of blockdevs which hold live JBD-controlled filesystems. * mmaps of blockdevs which hold live JBD-controlled filesystems.
*/ */
struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) struct buffer_head *
jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
{ {
journal_t *journal = transaction->t_journal;
struct buffer_head *bh; struct buffer_head *bh;
unsigned long long blocknr; unsigned long long blocknr;
journal_header_t *header;
int err; int err;
err = jbd2_journal_next_log_block(journal, &blocknr); err = jbd2_journal_next_log_block(journal, &blocknr);
...@@ -821,12 +824,31 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) ...@@ -821,12 +824,31 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return NULL; return NULL;
lock_buffer(bh); lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize); memset(bh->b_data, 0, journal->j_blocksize);
header = (journal_header_t *)bh->b_data;
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
header->h_blocktype = cpu_to_be32(type);
header->h_sequence = cpu_to_be32(transaction->t_tid);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
BUFFER_TRACE(bh, "return this buffer"); BUFFER_TRACE(bh, "return this buffer");
return bh; return bh;
} }
void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
{
struct jbd2_journal_block_tail *tail;
__u32 csum;
if (!jbd2_journal_has_csum_v2or3(j))
return;
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_block_tail));
tail->t_checksum = 0;
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
tail->t_checksum = cpu_to_be32(csum);
}
/* /*
* Return tid of the oldest transaction in the journal and block in the journal * Return tid of the oldest transaction in the journal and block in the journal
* where the transaction starts. * where the transaction starts.
...@@ -1408,11 +1430,12 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, ...@@ -1408,11 +1430,12 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
/** /**
* jbd2_mark_journal_empty() - Mark on disk journal as empty. * jbd2_mark_journal_empty() - Mark on disk journal as empty.
* @journal: The journal to update. * @journal: The journal to update.
* @write_op: With which operation should we write the journal sb
* *
* Update a journal's dynamic superblock fields to show that journal is empty. * Update a journal's dynamic superblock fields to show that journal is empty.
* Write updated superblock to disk waiting for IO to complete. * Write updated superblock to disk waiting for IO to complete.
*/ */
static void jbd2_mark_journal_empty(journal_t *journal) static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
{ {
journal_superblock_t *sb = journal->j_superblock; journal_superblock_t *sb = journal->j_superblock;
...@@ -1430,7 +1453,7 @@ static void jbd2_mark_journal_empty(journal_t *journal) ...@@ -1430,7 +1453,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
sb->s_start = cpu_to_be32(0); sb->s_start = cpu_to_be32(0);
read_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, WRITE_FUA); jbd2_write_superblock(journal, write_op);
/* Log is no longer empty */ /* Log is no longer empty */
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
...@@ -1716,7 +1739,13 @@ int jbd2_journal_destroy(journal_t *journal) ...@@ -1716,7 +1739,13 @@ int jbd2_journal_destroy(journal_t *journal)
if (journal->j_sb_buffer) { if (journal->j_sb_buffer) {
if (!is_journal_aborted(journal)) { if (!is_journal_aborted(journal)) {
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
jbd2_mark_journal_empty(journal);
write_lock(&journal->j_state_lock);
journal->j_tail_sequence =
++journal->j_transaction_sequence;
write_unlock(&journal->j_state_lock);
jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
} else } else
err = -EIO; err = -EIO;
...@@ -1975,7 +2004,7 @@ int jbd2_journal_flush(journal_t *journal) ...@@ -1975,7 +2004,7 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future * the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current * commits of data to the journal will restore the current
* s_start value. */ * s_start value. */
jbd2_mark_journal_empty(journal); jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
J_ASSERT(!journal->j_running_transaction); J_ASSERT(!journal->j_running_transaction);
...@@ -2021,7 +2050,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) ...@@ -2021,7 +2050,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (write) { if (write) {
/* Lock to make assertions happy... */ /* Lock to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
jbd2_mark_journal_empty(journal); jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
} }
...@@ -2565,7 +2594,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal, ...@@ -2565,7 +2594,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart: restart:
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */ /* Is commit writing out inode - we have to wait */
if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) { if (jinode->i_flags & JI_COMMIT_RUNNING) {
wait_queue_head_t *wq; wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
......
...@@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, ...@@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return 0; return 0;
} }
static int jbd2_descr_block_csum_verify(journal_t *j, static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
void *buf)
{ {
struct jbd2_journal_block_tail *tail; struct jbd2_journal_block_tail *tail;
__be32 provided; __be32 provided;
...@@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal, ...@@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal,
descr_csum_size = descr_csum_size =
sizeof(struct jbd2_journal_block_tail); sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 && if (descr_csum_size > 0 &&
!jbd2_descr_block_csum_verify(journal, !jbd2_descriptor_block_csum_verify(journal,
bh->b_data)) { bh->b_data)) {
printk(KERN_ERR "JBD2: Invalid checksum " printk(KERN_ERR "JBD2: Invalid checksum "
"recovering block %lu in log\n", "recovering block %lu in log\n",
next_log_block); next_log_block);
...@@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal, ...@@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal,
return err; return err;
} }
static int jbd2_revoke_block_csum_verify(journal_t *j,
void *buf)
{
struct jbd2_journal_revoke_tail *tail;
__be32 provided;
__u32 calculated;
if (!jbd2_journal_has_csum_v2or3(j))
return 1;
tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
sizeof(struct jbd2_journal_revoke_tail));
provided = tail->r_checksum;
tail->r_checksum = 0;
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
tail->r_checksum = provided;
return provided == cpu_to_be32(calculated);
}
/* Scan a revoke record, marking all blocks mentioned as revoked. */ /* Scan a revoke record, marking all blocks mentioned as revoked. */
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
...@@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, ...@@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t); offset = sizeof(jbd2_journal_revoke_header_t);
rcount = be32_to_cpu(header->r_count); rcount = be32_to_cpu(header->r_count);
if (!jbd2_revoke_block_csum_verify(journal, header)) if (!jbd2_descriptor_block_csum_verify(journal, header))
return -EFSBADCRC; return -EFSBADCRC;
if (jbd2_journal_has_csum_v2or3(journal)) if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_revoke_tail); csum_size = sizeof(struct jbd2_journal_block_tail);
if (rcount > journal->j_blocksize - csum_size) if (rcount > journal->j_blocksize - csum_size)
return -EINVAL; return -EINVAL;
max = rcount; max = rcount;
......
...@@ -122,11 +122,11 @@ struct jbd2_revoke_table_s ...@@ -122,11 +122,11 @@ struct jbd2_revoke_table_s
#ifdef __KERNEL__ #ifdef __KERNEL__
static void write_one_revoke_record(journal_t *, transaction_t *, static void write_one_revoke_record(transaction_t *,
struct list_head *, struct list_head *,
struct buffer_head **, int *, struct buffer_head **, int *,
struct jbd2_revoke_record_s *, int); struct jbd2_revoke_record_s *);
static void flush_descriptor(journal_t *, struct buffer_head *, int, int); static void flush_descriptor(journal_t *, struct buffer_head *, int);
#endif #endif
/* Utility functions to maintain the revoke table */ /* Utility functions to maintain the revoke table */
...@@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) ...@@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
* Write revoke records to the journal for all entries in the current * Write revoke records to the journal for all entries in the current
* revoke hash, deleting the entries as we go. * revoke hash, deleting the entries as we go.
*/ */
void jbd2_journal_write_revoke_records(journal_t *journal, void jbd2_journal_write_revoke_records(transaction_t *transaction,
transaction_t *transaction, struct list_head *log_bufs)
struct list_head *log_bufs,
int write_op)
{ {
journal_t *journal = transaction->t_journal;
struct buffer_head *descriptor; struct buffer_head *descriptor;
struct jbd2_revoke_record_s *record; struct jbd2_revoke_record_s *record;
struct jbd2_revoke_table_s *revoke; struct jbd2_revoke_table_s *revoke;
...@@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal, ...@@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
while (!list_empty(hash_list)) { while (!list_empty(hash_list)) {
record = (struct jbd2_revoke_record_s *) record = (struct jbd2_revoke_record_s *)
hash_list->next; hash_list->next;
write_one_revoke_record(journal, transaction, log_bufs, write_one_revoke_record(transaction, log_bufs,
&descriptor, &offset, &descriptor, &offset, record);
record, write_op);
count++; count++;
list_del(&record->hash); list_del(&record->hash);
kmem_cache_free(jbd2_revoke_record_cache, record); kmem_cache_free(jbd2_revoke_record_cache, record);
} }
} }
if (descriptor) if (descriptor)
flush_descriptor(journal, descriptor, offset, write_op); flush_descriptor(journal, descriptor, offset);
jbd_debug(1, "Wrote %d revoke records\n", count); jbd_debug(1, "Wrote %d revoke records\n", count);
} }
...@@ -562,18 +560,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal, ...@@ -562,18 +560,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
* block if the old one is full or if we have not already created one. * block if the old one is full or if we have not already created one.
*/ */
static void write_one_revoke_record(journal_t *journal, static void write_one_revoke_record(transaction_t *transaction,
transaction_t *transaction,
struct list_head *log_bufs, struct list_head *log_bufs,
struct buffer_head **descriptorp, struct buffer_head **descriptorp,
int *offsetp, int *offsetp,
struct jbd2_revoke_record_s *record, struct jbd2_revoke_record_s *record)
int write_op)
{ {
journal_t *journal = transaction->t_journal;
int csum_size = 0; int csum_size = 0;
struct buffer_head *descriptor; struct buffer_head *descriptor;
int sz, offset; int sz, offset;
journal_header_t *header;
/* If we are already aborting, this all becomes a noop. We /* If we are already aborting, this all becomes a noop. We
still need to go round the loop in still need to go round the loop in
...@@ -587,7 +583,7 @@ static void write_one_revoke_record(journal_t *journal, ...@@ -587,7 +583,7 @@ static void write_one_revoke_record(journal_t *journal,
/* Do we need to leave space at the end for a checksum? */ /* Do we need to leave space at the end for a checksum? */
if (jbd2_journal_has_csum_v2or3(journal)) if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_revoke_tail); csum_size = sizeof(struct jbd2_journal_block_tail);
if (jbd2_has_feature_64bit(journal)) if (jbd2_has_feature_64bit(journal))
sz = 8; sz = 8;
...@@ -597,19 +593,16 @@ static void write_one_revoke_record(journal_t *journal, ...@@ -597,19 +593,16 @@ static void write_one_revoke_record(journal_t *journal,
/* Make sure we have a descriptor with space left for the record */ /* Make sure we have a descriptor with space left for the record */
if (descriptor) { if (descriptor) {
if (offset + sz > journal->j_blocksize - csum_size) { if (offset + sz > journal->j_blocksize - csum_size) {
flush_descriptor(journal, descriptor, offset, write_op); flush_descriptor(journal, descriptor, offset);
descriptor = NULL; descriptor = NULL;
} }
} }
if (!descriptor) { if (!descriptor) {
descriptor = jbd2_journal_get_descriptor_buffer(journal); descriptor = jbd2_journal_get_descriptor_buffer(transaction,
JBD2_REVOKE_BLOCK);
if (!descriptor) if (!descriptor)
return; return;
header = (journal_header_t *)descriptor->b_data;
header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
header->h_sequence = cpu_to_be32(transaction->t_tid);
/* Record it so that we can wait for IO completion later */ /* Record it so that we can wait for IO completion later */
BUFFER_TRACE(descriptor, "file in log_bufs"); BUFFER_TRACE(descriptor, "file in log_bufs");
...@@ -630,21 +623,6 @@ static void write_one_revoke_record(journal_t *journal, ...@@ -630,21 +623,6 @@ static void write_one_revoke_record(journal_t *journal,
*offsetp = offset; *offsetp = offset;
} }
static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
{
struct jbd2_journal_revoke_tail *tail;
__u32 csum;
if (!jbd2_journal_has_csum_v2or3(j))
return;
tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
sizeof(struct jbd2_journal_revoke_tail));
tail->r_checksum = 0;
csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
tail->r_checksum = cpu_to_be32(csum);
}
/* /*
* Flush a revoke descriptor out to the journal. If we are aborting, * Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to * this is a noop; otherwise we are generating a buffer which needs to
...@@ -654,7 +632,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) ...@@ -654,7 +632,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
static void flush_descriptor(journal_t *journal, static void flush_descriptor(journal_t *journal,
struct buffer_head *descriptor, struct buffer_head *descriptor,
int offset, int write_op) int offset)
{ {
jbd2_journal_revoke_header_t *header; jbd2_journal_revoke_header_t *header;
...@@ -665,12 +643,12 @@ static void flush_descriptor(journal_t *journal, ...@@ -665,12 +643,12 @@ static void flush_descriptor(journal_t *journal,
header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
header->r_count = cpu_to_be32(offset); header->r_count = cpu_to_be32(offset);
jbd2_revoke_csum_set(journal, descriptor); jbd2_descriptor_block_csum_set(journal, descriptor);
set_buffer_jwrite(descriptor); set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write"); BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor); set_buffer_dirty(descriptor);
write_dirty_buffer(descriptor, write_op); write_dirty_buffer(descriptor, WRITE_SYNC);
} }
#endif #endif
......
...@@ -966,14 +966,8 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -966,14 +966,8 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (!frozen_buffer) { if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer"); JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
if (!frozen_buffer) { GFP_NOFS | __GFP_NOFAIL);
printk(KERN_ERR "%s: OOM for frozen_buffer\n",
__func__);
JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM;
goto out;
}
goto repeat; goto repeat;
} }
jh->b_frozen_data = frozen_buffer; jh->b_frozen_data = frozen_buffer;
...@@ -1226,15 +1220,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) ...@@ -1226,15 +1220,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
goto out; goto out;
repeat: repeat:
if (!jh->b_committed_data) { if (!jh->b_committed_data)
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); committed_data = jbd2_alloc(jh2bh(jh)->b_size,
if (!committed_data) { GFP_NOFS|__GFP_NOFAIL);
printk(KERN_ERR "%s: No memory for committed data\n",
__func__);
err = -ENOMEM;
goto out;
}
}
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
if (!jh->b_committed_data) { if (!jh->b_committed_data) {
......
This diff is collapsed.
...@@ -200,7 +200,7 @@ typedef struct journal_block_tag_s ...@@ -200,7 +200,7 @@ typedef struct journal_block_tag_s
__be32 t_blocknr_high; /* most-significant high 32bits. */ __be32 t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t; } journal_block_tag_t;
/* Tail of descriptor block, for checksumming */ /* Tail of descriptor or revoke block, for checksumming */
struct jbd2_journal_block_tail { struct jbd2_journal_block_tail {
__be32 t_checksum; /* crc32c(uuid+descr_block) */ __be32 t_checksum; /* crc32c(uuid+descr_block) */
}; };
...@@ -215,11 +215,6 @@ typedef struct jbd2_journal_revoke_header_s ...@@ -215,11 +215,6 @@ typedef struct jbd2_journal_revoke_header_s
__be32 r_count; /* Count of bytes used in the block */ __be32 r_count; /* Count of bytes used in the block */
} jbd2_journal_revoke_header_t; } jbd2_journal_revoke_header_t;
/* Tail of revoke block, for checksumming */
struct jbd2_journal_revoke_tail {
__be32 r_checksum; /* crc32c(uuid+revoke_block) */
};
/* Definitions for the journal tag flags word: */ /* Definitions for the journal tag flags word: */
#define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */ #define JBD2_FLAG_ESCAPE 1 /* on-disk block is escaped */
#define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */ #define JBD2_FLAG_SAME_UUID 2 /* block has same uuid as previous */
...@@ -1137,7 +1132,8 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh) ...@@ -1137,7 +1132,8 @@ static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
} }
/* Log buffer allocation */ /* Log buffer allocation */
struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal); struct buffer_head *jbd2_journal_get_descriptor_buffer(transaction_t *, int);
void jbd2_descriptor_block_csum_set(journal_t *, struct buffer_head *);
int jbd2_journal_next_log_block(journal_t *, unsigned long long *); int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
unsigned long *block); unsigned long *block);
...@@ -1327,10 +1323,8 @@ extern int jbd2_journal_init_revoke_caches(void); ...@@ -1327,10 +1323,8 @@ extern int jbd2_journal_init_revoke_caches(void);
extern void jbd2_journal_destroy_revoke(journal_t *); extern void jbd2_journal_destroy_revoke(journal_t *);
extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *); extern int jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *); extern int jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
extern void jbd2_journal_write_revoke_records(journal_t *journal, extern void jbd2_journal_write_revoke_records(transaction_t *transaction,
transaction_t *transaction, struct list_head *log_bufs);
struct list_head *log_bufs,
int write_op);
/* Recovery revoke support */ /* Recovery revoke support */
extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t); extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
......
/* #ifndef _LINUX_MBCACHE_H
File: linux/mbcache.h #define _LINUX_MBCACHE_H
(C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org> #include <linux/hash.h>
*/ #include <linux/list_bl.h>
struct mb_cache_entry { #include <linux/list.h>
struct list_head e_lru_list; #include <linux/atomic.h>
struct mb_cache *e_cache; #include <linux/fs.h>
unsigned short e_used;
unsigned short e_queued;
atomic_t e_refcnt;
struct block_device *e_bdev;
sector_t e_block;
struct hlist_bl_node e_block_list;
struct {
struct hlist_bl_node o_list;
unsigned int o_key;
} e_index;
struct hlist_bl_head *e_block_hash_p;
struct hlist_bl_head *e_index_hash_p;
};
struct mb_cache { struct mb_cache;
struct list_head c_cache_list;
const char *c_name;
atomic_t c_entry_count;
int c_max_entries;
int c_bucket_bits;
struct kmem_cache *c_entry_cache;
struct hlist_bl_head *c_block_hash;
struct hlist_bl_head *c_index_hash;
};
/* Functions on caches */ struct mb_cache_entry {
/* List of entries in cache - protected by cache->c_list_lock */
struct list_head e_list;
/* Hash table list - protected by hash chain bitlock */
struct hlist_bl_node e_hash_list;
atomic_t e_refcnt;
/* Key in hash - stable during lifetime of the entry */
u32 e_key;
u32 e_referenced:1;
u32 e_reusable:1;
/* Block number of hashed block - stable during lifetime of the entry */
sector_t e_block;
};
struct mb_cache *mb_cache_create(const char *, int); struct mb_cache *mb_cache_create(int bucket_bits);
void mb_cache_shrink(struct block_device *); void mb_cache_destroy(struct mb_cache *cache);
void mb_cache_destroy(struct mb_cache *);
/* Functions on cache entries */ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
sector_t block, bool reusable);
void __mb_cache_entry_free(struct mb_cache_entry *entry);
static inline int mb_cache_entry_put(struct mb_cache *cache,
struct mb_cache_entry *entry)
{
if (!atomic_dec_and_test(&entry->e_refcnt))
return 0;
__mb_cache_entry_free(entry);
return 1;
}
struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *, gfp_t); void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
int mb_cache_entry_insert(struct mb_cache_entry *, struct block_device *, sector_t block);
sector_t, unsigned int); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
void mb_cache_entry_release(struct mb_cache_entry *); sector_t block);
void mb_cache_entry_free(struct mb_cache_entry *);
struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *,
struct block_device *,
sector_t);
struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
struct block_device *, u32 key);
unsigned int); struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, struct mb_cache_entry *entry);
struct block_device *, void mb_cache_entry_touch(struct mb_cache *cache,
unsigned int); struct mb_cache_entry *entry);
#endif /* _LINUX_MBCACHE_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment