Commit a5adcfca authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "A large number of bug fixes and cleanups.

  One new feature to allow users to more easily find the jbd2 journal
  thread for a particular ext4 file system"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
  jbd2: jbd2_get_transaction does not need to return a value
  jbd2: fix invalid descriptor block checksum
  ext4: fix bigalloc cluster freeing when hole punching under load
  ext4: add sysfs attr /sys/fs/ext4/<disk>/journal_task
  ext4: Change debugging support help prefix from EXT4 to Ext4
  ext4: fix compile error when using BUFFER_TRACE
  jbd2: fix compile warning when using JBUFFER_TRACE
  ext4: fix some error pointer dereferences
  ext4: annotate more implicit fall throughs
  ext4: annotate implicit fall throughs
  ext4: don't update s_rev_level if not required
  jbd2: fold jbd2_superblock_csum_{verify,set} into their callers
  jbd2: fix race when writing superblock
  ext4: fix crash during online resizing
  ext4: disallow files with EXT4_JOURNAL_DATA_FL from EXT4_IOC_SWAP_BOOT
  ext4: add mask of ext4 flags to swap
  ext4: update quota information while swapping boot loader inode
  ext4: cleanup pagecache before swap i_data
  ext4: fix check of inode in swap_inode_boot_loader
  ext4: unlock unused_pages timely when doing writeback
  ...
parents 2b0a80b0 0df6f469
...@@ -109,3 +109,10 @@ Description: ...@@ -109,3 +109,10 @@ Description:
write operation (since a 4k random write might turn write operation (since a 4k random write might turn
into a much larger write due to the zeroout into a much larger write due to the zeroout
operation). operation).
What: /sys/fs/ext4/<disk>/journal_task
Date: February 2019
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
This file is read-only and shows the pid of journal thread in
current pid-namespace or 0 if task is unreachable.
...@@ -97,7 +97,7 @@ config EXT4_FS_SECURITY ...@@ -97,7 +97,7 @@ config EXT4_FS_SECURITY
extended attributes for file security labels, say N. extended attributes for file security labels, say N.
config EXT4_DEBUG config EXT4_DEBUG
bool "EXT4 debugging support" bool "Ext4 debugging support"
depends on EXT4_FS depends on EXT4_FS
help help
Enables run-time debugging support for the ext4 filesystem. Enables run-time debugging support for the ext4 filesystem.
......
...@@ -425,6 +425,9 @@ struct flex_groups { ...@@ -425,6 +425,9 @@ struct flex_groups {
/* Flags that are appropriate for non-directories/regular files. */ /* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) #define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
/* The only flags that should be swapped */
#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
/* Mask out flags that are inappropriate for the given type of inode. */ /* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{ {
...@@ -1661,6 +1664,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) ...@@ -1661,6 +1664,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
extern void ext4_update_dynamic_rev(struct super_block *sb);
#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \ #define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
static inline bool ext4_has_feature_##name(struct super_block *sb) \ static inline bool ext4_has_feature_##name(struct super_block *sb) \
{ \ { \
...@@ -1669,6 +1674,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ ...@@ -1669,6 +1674,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \ } \
static inline void ext4_set_feature_##name(struct super_block *sb) \ static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \ { \
ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_compat |= \ EXT4_SB(sb)->s_es->s_feature_compat |= \
cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \
} \ } \
...@@ -1686,6 +1692,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ ...@@ -1686,6 +1692,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \ } \
static inline void ext4_set_feature_##name(struct super_block *sb) \ static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \ { \
ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_ro_compat |= \ EXT4_SB(sb)->s_es->s_feature_ro_compat |= \
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \
} \ } \
...@@ -1703,6 +1710,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \ ...@@ -1703,6 +1710,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \ } \
static inline void ext4_set_feature_##name(struct super_block *sb) \ static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \ { \
ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_incompat |= \ EXT4_SB(sb)->s_es->s_feature_incompat |= \
cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \
} \ } \
...@@ -2666,7 +2674,6 @@ do { \ ...@@ -2666,7 +2674,6 @@ do { \
#endif #endif
extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat); __u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle, extern int ext4_update_rocompat_feature(handle_t *handle,
......
...@@ -2956,14 +2956,17 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, ...@@ -2956,14 +2956,17 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
if (err < 0) if (err < 0)
goto out; goto out;
} else if (sbi->s_cluster_ratio > 1 && end >= ex_end) { } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
partial.state == initial) {
/* /*
* If there's an extent to the right its first cluster * If we're punching, there's an extent to the right.
* contains the immediate right boundary of the * If the partial cluster hasn't been set, set it to
* truncated/punched region. Set partial_cluster to * that extent's first cluster and its state to nofree
* its negative value so it won't be freed if shared * so it won't be freed should it contain blocks to be
* with the current extent. The end < ee_block case * removed. If it's already set (tofree/nofree), we're
* is handled in ext4_ext_rm_leaf(). * retrying and keep the original partial cluster info
* so a cluster marked tofree as a result of earlier
* extent removal is not lost.
*/ */
lblk = ex_end + 1; lblk = ex_end + 1;
err = ext4_ext_search_right(inode, path, &lblk, &pblk, err = ext4_ext_search_right(inode, path, &lblk, &pblk,
...@@ -4048,18 +4051,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -4048,18 +4051,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
} else } else
allocated = ret; allocated = ret;
map->m_flags |= EXT4_MAP_NEW; map->m_flags |= EXT4_MAP_NEW;
/* if (allocated > map->m_len)
* if we allocated more blocks than requested
* we need to make sure we unmap the extra block
* allocated. The actual needed block will get
* unmapped later when we find the buffer_head marked
* new.
*/
if (allocated > map->m_len) {
clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
allocated - map->m_len);
allocated = map->m_len; allocated = map->m_len;
}
map->m_len = allocated; map->m_len = allocated;
map_out: map_out:
......
...@@ -231,6 +231,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) ...@@ -231,6 +231,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
break; break;
case DX_HASH_HALF_MD4_UNSIGNED: case DX_HASH_HALF_MD4_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned; str2hashbuf = str2hashbuf_unsigned;
/* fall through */
case DX_HASH_HALF_MD4: case DX_HASH_HALF_MD4:
p = name; p = name;
while (len > 0) { while (len > 0) {
...@@ -244,6 +245,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) ...@@ -244,6 +245,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
break; break;
case DX_HASH_TEA_UNSIGNED: case DX_HASH_TEA_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned; str2hashbuf = str2hashbuf_unsigned;
/* fall through */
case DX_HASH_TEA: case DX_HASH_TEA:
p = name; p = name;
while (len > 0) { while (len > 0) {
......
...@@ -1183,18 +1183,21 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode) ...@@ -1183,18 +1183,21 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode)
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0; i_data[EXT4_IND_BLOCK] = 0;
} }
/* fall through */
case EXT4_IND_BLOCK: case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK]; nr = i_data[EXT4_DIND_BLOCK];
if (nr) { if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0; i_data[EXT4_DIND_BLOCK] = 0;
} }
/* fall through */
case EXT4_DIND_BLOCK: case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK]; nr = i_data[EXT4_TIND_BLOCK];
if (nr) { if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0; i_data[EXT4_TIND_BLOCK] = 0;
} }
/* fall through */
case EXT4_TIND_BLOCK: case EXT4_TIND_BLOCK:
; ;
} }
...@@ -1433,6 +1436,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ...@@ -1433,6 +1436,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0; i_data[EXT4_IND_BLOCK] = 0;
} }
/* fall through */
case EXT4_IND_BLOCK: case EXT4_IND_BLOCK:
if (++n >= n2) if (++n >= n2)
return 0; return 0;
...@@ -1441,6 +1445,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ...@@ -1441,6 +1445,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0; i_data[EXT4_DIND_BLOCK] = 0;
} }
/* fall through */
case EXT4_DIND_BLOCK: case EXT4_DIND_BLOCK:
if (++n >= n2) if (++n >= n2)
return 0; return 0;
...@@ -1449,6 +1454,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ...@@ -1449,6 +1454,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0; i_data[EXT4_TIND_BLOCK] = 0;
} }
/* fall through */
case EXT4_TIND_BLOCK: case EXT4_TIND_BLOCK:
; ;
} }
......
...@@ -391,7 +391,7 @@ void ext4_da_update_reserve_space(struct inode *inode, ...@@ -391,7 +391,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
* inode's preallocations. * inode's preallocations.
*/ */
if ((ei->i_reserved_data_blocks == 0) && if ((ei->i_reserved_data_blocks == 0) &&
(atomic_read(&inode->i_writecount) == 0)) !inode_is_open_for_write(inode))
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
} }
...@@ -678,8 +678,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -678,8 +678,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_ZERO && if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) { map->m_flags & EXT4_MAP_NEW) {
clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
map->m_len);
ret = ext4_issue_zeroout(inode, map->m_lblk, ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len); map->m_pblk, map->m_len);
if (ret) { if (ret) {
...@@ -1194,7 +1192,6 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, ...@@ -1194,7 +1192,6 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (err) if (err)
break; break;
if (buffer_new(bh)) { if (buffer_new(bh)) {
clean_bdev_bh_alias(bh);
if (PageUptodate(page)) { if (PageUptodate(page)) {
clear_buffer_new(bh); clear_buffer_new(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
...@@ -2489,10 +2486,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) ...@@ -2489,10 +2486,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
} }
BUG_ON(map->m_len == 0); BUG_ON(map->m_len == 0);
if (map->m_flags & EXT4_MAP_NEW) {
clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
map->m_len);
}
return 0; return 0;
} }
...@@ -2835,12 +2828,12 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2835,12 +2828,12 @@ static int ext4_writepages(struct address_space *mapping,
goto unplug; goto unplug;
} }
ret = mpage_prepare_extent_to_map(&mpd); ret = mpage_prepare_extent_to_map(&mpd);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, false);
/* Submit prepared bio */ /* Submit prepared bio */
ext4_io_submit(&mpd.io_submit); ext4_io_submit(&mpd.io_submit);
ext4_put_io_end_defer(mpd.io_submit.io_end); ext4_put_io_end_defer(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL; mpd.io_submit.io_end = NULL;
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, false);
if (ret < 0) if (ret < 0)
goto unplug; goto unplug;
...@@ -2908,10 +2901,11 @@ static int ext4_writepages(struct address_space *mapping, ...@@ -2908,10 +2901,11 @@ static int ext4_writepages(struct address_space *mapping,
handle = NULL; handle = NULL;
mpd.do_map = 0; mpd.do_map = 0;
} }
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
/* Unlock pages we didn't use */ /* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write); mpage_release_unused_pages(&mpd, give_up_on_write);
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
/* /*
* Drop our io_end reference we got from init. We have * Drop our io_end reference we got from init. We have
* to be careful and use deferred io_end finishing if * to be careful and use deferred io_end finishing if
...@@ -5349,7 +5343,6 @@ static int ext4_do_update_inode(handle_t *handle, ...@@ -5349,7 +5343,6 @@ static int ext4_do_update_inode(handle_t *handle,
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (err) if (err)
goto out_brelse; goto out_brelse;
ext4_update_dynamic_rev(sb);
ext4_set_feature_large_file(sb); ext4_set_feature_large_file(sb);
ext4_handle_sync(handle); ext4_handle_sync(handle);
err = ext4_handle_dirty_super(handle, sb); err = ext4_handle_dirty_super(handle, sb);
...@@ -6000,7 +5993,7 @@ int ext4_expand_extra_isize(struct inode *inode, ...@@ -6000,7 +5993,7 @@ int ext4_expand_extra_isize(struct inode *inode,
ext4_write_lock_xattr(inode, &no_expand); ext4_write_lock_xattr(inode, &no_expand);
BUFFER_TRACE(iloc.bh, "get_write_access"); BUFFER_TRACE(iloc->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, iloc->bh); error = ext4_journal_get_write_access(handle, iloc->bh);
if (error) { if (error) {
brelse(iloc->bh); brelse(iloc->bh);
......
...@@ -63,18 +63,20 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) ...@@ -63,18 +63,20 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
loff_t isize; loff_t isize;
struct ext4_inode_info *ei1; struct ext4_inode_info *ei1;
struct ext4_inode_info *ei2; struct ext4_inode_info *ei2;
unsigned long tmp;
ei1 = EXT4_I(inode1); ei1 = EXT4_I(inode1);
ei2 = EXT4_I(inode2); ei2 = EXT4_I(inode2);
swap(inode1->i_version, inode2->i_version); swap(inode1->i_version, inode2->i_version);
swap(inode1->i_blocks, inode2->i_blocks);
swap(inode1->i_bytes, inode2->i_bytes);
swap(inode1->i_atime, inode2->i_atime); swap(inode1->i_atime, inode2->i_atime);
swap(inode1->i_mtime, inode2->i_mtime); swap(inode1->i_mtime, inode2->i_mtime);
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
swap(ei1->i_flags, ei2->i_flags); tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP;
ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) |
(ei1->i_flags & ~EXT4_FL_SHOULD_SWAP);
ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP);
swap(ei1->i_disksize, ei2->i_disksize); swap(ei1->i_disksize, ei2->i_disksize);
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
...@@ -115,28 +117,42 @@ static long swap_inode_boot_loader(struct super_block *sb, ...@@ -115,28 +117,42 @@ static long swap_inode_boot_loader(struct super_block *sb,
int err; int err;
struct inode *inode_bl; struct inode *inode_bl;
struct ext4_inode_info *ei_bl; struct ext4_inode_info *ei_bl;
qsize_t size, size_bl, diff;
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) || blkcnt_t blocks;
IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) || unsigned short bytes;
ext4_has_inline_data(inode))
return -EINVAL;
if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
return -EPERM;
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl)) if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl); return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl); ei_bl = EXT4_I(inode_bl);
filemap_flush(inode->i_mapping);
filemap_flush(inode_bl->i_mapping);
/* Protect orig inodes against a truncate and make sure, /* Protect orig inodes against a truncate and make sure,
* that only 1 swap_inode_boot_loader is running. */ * that only 1 swap_inode_boot_loader is running. */
lock_two_nondirectories(inode, inode_bl); lock_two_nondirectories(inode, inode_bl);
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
(EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) ||
ext4_has_inline_data(inode)) {
err = -EINVAL;
goto journal_err_out;
}
if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
err = -EPERM;
goto journal_err_out;
}
down_write(&EXT4_I(inode)->i_mmap_sem);
err = filemap_write_and_wait(inode->i_mapping);
if (err)
goto err_out;
err = filemap_write_and_wait(inode_bl->i_mapping);
if (err)
goto err_out;
/* Wait for all existing dio workers */ /* Wait for all existing dio workers */
inode_dio_wait(inode); inode_dio_wait(inode);
inode_dio_wait(inode_bl); inode_dio_wait(inode_bl);
...@@ -147,7 +163,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ...@@ -147,7 +163,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
err = -EINVAL; err = -EINVAL;
goto journal_err_out; goto err_out;
} }
/* Protect extent tree against block allocations via delalloc */ /* Protect extent tree against block allocations via delalloc */
...@@ -170,6 +186,13 @@ static long swap_inode_boot_loader(struct super_block *sb, ...@@ -170,6 +186,13 @@ static long swap_inode_boot_loader(struct super_block *sb,
memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data)); memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
} }
err = dquot_initialize(inode);
if (err)
goto err_out1;
size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes;
size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes;
diff = size - size_bl;
swap_inode_data(inode, inode_bl); swap_inode_data(inode, inode_bl);
inode->i_ctime = inode_bl->i_ctime = current_time(inode); inode->i_ctime = inode_bl->i_ctime = current_time(inode);
...@@ -183,27 +206,51 @@ static long swap_inode_boot_loader(struct super_block *sb, ...@@ -183,27 +206,51 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) { if (err < 0) {
/* No need to update quota information. */
ext4_warning(inode->i_sb, ext4_warning(inode->i_sb,
"couldn't mark inode #%lu dirty (err %d)", "couldn't mark inode #%lu dirty (err %d)",
inode->i_ino, err); inode->i_ino, err);
/* Revert all changes: */ /* Revert all changes: */
swap_inode_data(inode, inode_bl); swap_inode_data(inode, inode_bl);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
} else { goto err_out1;
err = ext4_mark_inode_dirty(handle, inode_bl); }
if (err < 0) {
ext4_warning(inode_bl->i_sb, blocks = inode_bl->i_blocks;
"couldn't mark inode #%lu dirty (err %d)", bytes = inode_bl->i_bytes;
inode_bl->i_ino, err); inode_bl->i_blocks = inode->i_blocks;
/* Revert all changes: */ inode_bl->i_bytes = inode->i_bytes;
swap_inode_data(inode, inode_bl); err = ext4_mark_inode_dirty(handle, inode_bl);
ext4_mark_inode_dirty(handle, inode); if (err < 0) {
ext4_mark_inode_dirty(handle, inode_bl); /* No need to update quota information. */
} ext4_warning(inode_bl->i_sb,
"couldn't mark inode #%lu dirty (err %d)",
inode_bl->i_ino, err);
goto revert;
}
/* Bootloader inode should not be counted into quota information. */
if (diff > 0)
dquot_free_space(inode, diff);
else
err = dquot_alloc_space(inode, -1 * diff);
if (err < 0) {
revert:
/* Revert all changes: */
inode_bl->i_blocks = blocks;
inode_bl->i_bytes = bytes;
swap_inode_data(inode, inode_bl);
ext4_mark_inode_dirty(handle, inode);
ext4_mark_inode_dirty(handle, inode_bl);
} }
err_out1:
ext4_journal_stop(handle); ext4_journal_stop(handle);
ext4_double_up_write_data_sem(inode, inode_bl); ext4_double_up_write_data_sem(inode, inode_bl);
err_out:
up_write(&EXT4_I(inode)->i_mmap_sem);
journal_err_out: journal_err_out:
unlock_two_nondirectories(inode, inode_bl); unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl); iput(inode_bl);
......
...@@ -4176,9 +4176,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) ...@@ -4176,9 +4176,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits; >> bsbits;
if ((size == isize) && if ((size == isize) && !ext4_fs_is_busy(sbi) &&
!ext4_fs_is_busy(sbi) && !inode_is_open_for_write(ac->ac_inode)) {
(atomic_read(&ac->ac_inode->i_writecount) == 0)) {
ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
return; return;
} }
...@@ -4258,7 +4257,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ...@@ -4258,7 +4257,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
(unsigned) ar->goal, ac->ac_flags, ac->ac_2order, (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
(unsigned) ar->lleft, (unsigned) ar->pleft, (unsigned) ar->lleft, (unsigned) ar->pleft,
(unsigned) ar->lright, (unsigned) ar->pright, (unsigned) ar->lright, (unsigned) ar->pright,
atomic_read(&ar->inode->i_writecount) ? "" : "non-"); inode_is_open_for_write(ar->inode) ? "" : "non-");
return 0; return 0;
} }
......
...@@ -468,10 +468,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -468,10 +468,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ext4_io_submit(io); ext4_io_submit(io);
continue; continue;
} }
if (buffer_new(bh)) { if (buffer_new(bh))
clear_buffer_new(bh); clear_buffer_new(bh);
clean_bdev_bh_alias(bh);
}
set_buffer_async_write(bh); set_buffer_async_write(bh);
nr_to_submit++; nr_to_submit++;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
......
...@@ -1960,7 +1960,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) ...@@ -1960,7 +1960,8 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
le16_to_cpu(es->s_reserved_gdt_blocks); le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
n_blocks_count = (ext4_fsblk_t)n_group * n_blocks_count = (ext4_fsblk_t)n_group *
EXT4_BLOCKS_PER_GROUP(sb); EXT4_BLOCKS_PER_GROUP(sb) +
le32_to_cpu(es->s_first_data_block);
n_group--; /* set to last group number */ n_group--; /* set to last group number */
} }
......
...@@ -2249,7 +2249,6 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, ...@@ -2249,7 +2249,6 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1); le16_add_cpu(&es->s_mnt_count, 1);
ext4_update_tstamp(es, s_mtime); ext4_update_tstamp(es, s_mtime);
ext4_update_dynamic_rev(sb);
if (sbi->s_journal) if (sbi->s_journal)
ext4_set_feature_journal_needs_recovery(sb); ext4_set_feature_journal_needs_recovery(sb);
......
...@@ -30,6 +30,7 @@ typedef enum { ...@@ -30,6 +30,7 @@ typedef enum {
attr_feature, attr_feature,
attr_pointer_ui, attr_pointer_ui,
attr_pointer_atomic, attr_pointer_atomic,
attr_journal_task,
} attr_id_t; } attr_id_t;
typedef enum { typedef enum {
...@@ -125,6 +126,14 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi, ...@@ -125,6 +126,14 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
return count; return count;
} }
static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
{
if (!sbi->s_journal)
return snprintf(buf, PAGE_SIZE, "<none>\n");
return snprintf(buf, PAGE_SIZE, "%d\n",
task_pid_vnr(sbi->s_journal->j_task));
}
#define EXT4_ATTR(_name,_mode,_id) \ #define EXT4_ATTR(_name,_mode,_id) \
static struct ext4_attr ext4_attr_##_name = { \ static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \ .attr = {.name = __stringify(_name), .mode = _mode }, \
...@@ -188,6 +197,7 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); ...@@ -188,6 +197,7 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(first_error_time, 0444, first_error_time);
EXT4_ATTR(last_error_time, 0444, last_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time);
EXT4_ATTR(journal_task, 0444, journal_task);
static unsigned int old_bump_val = 128; static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
...@@ -217,6 +227,7 @@ static struct attribute *ext4_attrs[] = { ...@@ -217,6 +227,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(errors_count), ATTR_LIST(errors_count),
ATTR_LIST(first_error_time), ATTR_LIST(first_error_time),
ATTR_LIST(last_error_time), ATTR_LIST(last_error_time),
ATTR_LIST(journal_task),
NULL, NULL,
}; };
...@@ -304,6 +315,8 @@ static ssize_t ext4_attr_show(struct kobject *kobj, ...@@ -304,6 +315,8 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return print_tstamp(buf, sbi->s_es, s_first_error_time); return print_tstamp(buf, sbi->s_es, s_first_error_time);
case attr_last_error_time: case attr_last_error_time:
return print_tstamp(buf, sbi->s_es, s_last_error_time); return print_tstamp(buf, sbi->s_es, s_last_error_time);
case attr_journal_task:
return journal_task_show(sbi, buf);
} }
return 0; return 0;
......
...@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage) ...@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) { if (IS_ERR(bh)) {
ret = PTR_ERR(bh); ret = PTR_ERR(bh);
bh = NULL;
goto out; goto out;
} }
...@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, ...@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
if (error == -EIO) if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error", EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl); EXT4_I(inode)->i_file_acl);
bh = NULL;
goto cleanup; goto cleanup;
} }
error = ext4_xattr_check_block(inode, bh); error = ext4_xattr_check_block(inode, bh);
...@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode, ...@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode,
if (IS_ERR(bh)) { if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM) if (PTR_ERR(bh) == -ENOMEM)
return NULL; return NULL;
bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error", EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value); (unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
......
...@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) ...@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
nblocks = jbd2_space_needed(journal); nblocks = jbd2_space_needed(journal);
while (jbd2_log_space_left(journal) < nblocks) { while (jbd2_log_space_left(journal) < nblocks) {
write_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock_io(&journal->j_checkpoint_mutex);
/* /*
* Test again, another process may have checkpointed while we * Test again, another process may have checkpointed while we
...@@ -276,9 +276,22 @@ int jbd2_log_do_checkpoint(journal_t *journal) ...@@ -276,9 +276,22 @@ int jbd2_log_do_checkpoint(journal_t *journal)
"JBD2: %s: Waiting for Godot: block %llu\n", "JBD2: %s: Waiting for Godot: block %llu\n",
journal->j_devname, (unsigned long long) bh->b_blocknr); journal->j_devname, (unsigned long long) bh->b_blocknr);
if (batch_count)
__flush_batch(journal, &batch_count);
jbd2_log_start_commit(journal, tid); jbd2_log_start_commit(journal, tid);
/*
* jbd2_journal_commit_transaction() may want
* to take the checkpoint_mutex if JBD2_FLUSHED
* is set, jbd2_update_log_tail() called by
* jbd2_journal_commit_transaction() may also take
* checkpoint_mutex. So we need to temporarily
* drop it.
*/
mutex_unlock(&journal->j_checkpoint_mutex);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
goto retry; mutex_lock_io(&journal->j_checkpoint_mutex);
spin_lock(&journal->j_list_lock);
goto restart;
} }
if (!buffer_dirty(bh)) { if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result) if (unlikely(buffer_write_io_error(bh)) && !result)
......
...@@ -694,9 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -694,9 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
the last tag we set up. */ the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io: start_journal_io:
if (descriptor)
jbd2_descriptor_block_csum_set(journal,
descriptor);
for (i = 0; i < bufs; i++) { for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i]; struct buffer_head *bh = wbuf[i];
/* /*
......
...@@ -142,22 +142,6 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) ...@@ -142,22 +142,6 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
return cpu_to_be32(csum); return cpu_to_be32(csum);
} }
static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
{
if (!jbd2_journal_has_csum_v2or3(j))
return 1;
return sb->s_checksum == jbd2_superblock_csum(j, sb);
}
static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
{
if (!jbd2_journal_has_csum_v2or3(j))
return;
sb->s_checksum = jbd2_superblock_csum(j, sb);
}
/* /*
* Helper function used to manage commit timeouts * Helper function used to manage commit timeouts
*/ */
...@@ -1356,6 +1340,10 @@ static int journal_reset(journal_t *journal) ...@@ -1356,6 +1340,10 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal); return jbd2_journal_start_thread(journal);
} }
/*
* This function expects that the caller will have locked the journal
* buffer head, and will return with it unlocked
*/
static int jbd2_write_superblock(journal_t *journal, int write_flags) static int jbd2_write_superblock(journal_t *journal, int write_flags)
{ {
struct buffer_head *bh = journal->j_sb_buffer; struct buffer_head *bh = journal->j_sb_buffer;
...@@ -1365,7 +1353,6 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) ...@@ -1365,7 +1353,6 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
trace_jbd2_write_superblock(journal, write_flags); trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER)) if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH); write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
lock_buffer(bh);
if (buffer_write_io_error(bh)) { if (buffer_write_io_error(bh)) {
/* /*
* Oh, dear. A previous attempt to write the journal * Oh, dear. A previous attempt to write the journal
...@@ -1381,7 +1368,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) ...@@ -1381,7 +1368,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
clear_buffer_write_io_error(bh); clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
} }
jbd2_superblock_csum_set(journal, sb); if (jbd2_journal_has_csum_v2or3(journal))
sb->s_checksum = jbd2_superblock_csum(journal, sb);
get_bh(bh); get_bh(bh);
bh->b_end_io = end_buffer_write_sync; bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, write_flags, bh); ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
...@@ -1424,6 +1412,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, ...@@ -1424,6 +1412,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
tail_block, tail_tid); tail_block, tail_tid);
lock_buffer(journal->j_sb_buffer);
sb->s_sequence = cpu_to_be32(tail_tid); sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block); sb->s_start = cpu_to_be32(tail_block);
...@@ -1454,18 +1443,17 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op) ...@@ -1454,18 +1443,17 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
journal_superblock_t *sb = journal->j_superblock; journal_superblock_t *sb = journal->j_superblock;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
read_lock(&journal->j_state_lock); lock_buffer(journal->j_sb_buffer);
/* Is it already empty? */ if (sb->s_start == 0) { /* Is it already empty? */
if (sb->s_start == 0) { unlock_buffer(journal->j_sb_buffer);
read_unlock(&journal->j_state_lock);
return; return;
} }
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence); journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0); sb->s_start = cpu_to_be32(0);
read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, write_op); jbd2_write_superblock(journal, write_op);
...@@ -1488,9 +1476,8 @@ void jbd2_journal_update_sb_errno(journal_t *journal) ...@@ -1488,9 +1476,8 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
journal_superblock_t *sb = journal->j_superblock; journal_superblock_t *sb = journal->j_superblock;
int errcode; int errcode;
read_lock(&journal->j_state_lock); lock_buffer(journal->j_sb_buffer);
errcode = journal->j_errno; errcode = journal->j_errno;
read_unlock(&journal->j_state_lock);
if (errcode == -ESHUTDOWN) if (errcode == -ESHUTDOWN)
errcode = 0; errcode = 0;
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
...@@ -1595,17 +1582,18 @@ static int journal_get_superblock(journal_t *journal) ...@@ -1595,17 +1582,18 @@ static int journal_get_superblock(journal_t *journal)
} }
} }
/* Check superblock checksum */ if (jbd2_journal_has_csum_v2or3(journal)) {
if (!jbd2_superblock_csum_verify(journal, sb)) { /* Check superblock checksum */
printk(KERN_ERR "JBD2: journal checksum error\n"); if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
err = -EFSBADCRC; printk(KERN_ERR "JBD2: journal checksum error\n");
goto out; err = -EFSBADCRC;
} goto out;
}
/* Precompute checksum seed for all metadata */ /* Precompute checksum seed for all metadata */
if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid)); sizeof(sb->s_uuid));
}
set_buffer_verified(bh); set_buffer_verified(bh);
...@@ -1894,28 +1882,27 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1894,28 +1882,27 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb = journal->j_superblock; sb = journal->j_superblock;
/* Load the checksum driver if necessary */
if ((journal->j_chksum_driver == NULL) &&
INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
journal->j_chksum_driver = NULL;
return 0;
}
/* Precompute checksum seed for all metadata */
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
}
lock_buffer(journal->j_sb_buffer);
/* If enabling v3 checksums, update superblock */ /* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &= sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
/* Load the checksum driver */
if (journal->j_chksum_driver == NULL) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c",
0, 0);
if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c "
"driver.\n");
journal->j_chksum_driver = NULL;
return 0;
}
/* Precompute checksum seed for all metadata */
journal->j_csum_seed = jbd2_chksum(journal, ~0,
sb->s_uuid,
sizeof(sb->s_uuid));
}
} }
/* If enabling v1 checksums, downgrade superblock */ /* If enabling v1 checksums, downgrade superblock */
...@@ -1927,6 +1914,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1927,6 +1914,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat); sb->s_feature_incompat |= cpu_to_be32(incompat);
unlock_buffer(journal->j_sb_buffer);
return 1; return 1;
#undef COMPAT_FEATURE_ON #undef COMPAT_FEATURE_ON
...@@ -2067,7 +2055,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) ...@@ -2067,7 +2055,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
err = jbd2_journal_skip_recovery(journal); err = jbd2_journal_skip_recovery(journal);
if (write) { if (write) {
/* Lock to make assertions happy... */ /* Lock to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock_io(&journal->j_checkpoint_mutex);
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex); mutex_unlock(&journal->j_checkpoint_mutex);
} }
......
...@@ -63,7 +63,7 @@ void jbd2_journal_free_transaction(transaction_t *transaction) ...@@ -63,7 +63,7 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
/* /*
* jbd2_get_transaction: obtain a new transaction_t object. * jbd2_get_transaction: obtain a new transaction_t object.
* *
* Simply allocate and initialise a new transaction. Create it in * Simply initialise a new transaction. Initialize it in
* RUNNING state and add it to the current journal (which should not * RUNNING state and add it to the current journal (which should not
* have an existing running transaction: we only make a new transaction * have an existing running transaction: we only make a new transaction
* once we have started to commit the old one). * once we have started to commit the old one).
...@@ -75,8 +75,8 @@ void jbd2_journal_free_transaction(transaction_t *transaction) ...@@ -75,8 +75,8 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
* *
*/ */
static transaction_t * static void jbd2_get_transaction(journal_t *journal,
jbd2_get_transaction(journal_t *journal, transaction_t *transaction) transaction_t *transaction)
{ {
transaction->t_journal = journal; transaction->t_journal = journal;
transaction->t_state = T_RUNNING; transaction->t_state = T_RUNNING;
...@@ -100,8 +100,6 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) ...@@ -100,8 +100,6 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
transaction->t_max_wait = 0; transaction->t_max_wait = 0;
transaction->t_start = jiffies; transaction->t_start = jiffies;
transaction->t_requested = 0; transaction->t_requested = 0;
return transaction;
} }
/* /*
...@@ -1252,11 +1250,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) ...@@ -1252,11 +1250,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh; struct journal_head *jh;
char *committed_data = NULL; char *committed_data = NULL;
JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true)) if (jbd2_write_access_granted(handle, bh, true))
return 0; return 0;
jh = jbd2_journal_add_journal_head(bh); jh = jbd2_journal_add_journal_head(bh);
JBUFFER_TRACE(jh, "entry");
/* /*
* Do this first --- it can drop the journal lock, so we want to * Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done * make sure that obtaining the committed_data is done
...@@ -1367,15 +1366,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1367,15 +1366,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return -EROFS; return -EROFS;
if (!buffer_jbd(bh)) { if (!buffer_jbd(bh))
ret = -EUCLEAN; return -EUCLEAN;
goto out;
}
/* /*
* We don't grab jh reference here since the buffer must be part * We don't grab jh reference here since the buffer must be part
* of the running transaction. * of the running transaction.
*/ */
jh = bh2jh(bh); jh = bh2jh(bh);
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
/* /*
* This and the following assertions are unreliable since we may see jh * This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is * in inconsistent state unless we grab bh_state lock. But this is
...@@ -1409,9 +1410,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1409,9 +1410,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
} }
journal = transaction->t_journal; journal = transaction->t_journal;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
if (jh->b_modified == 0) { if (jh->b_modified == 0) {
...@@ -1597,9 +1595,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1597,9 +1595,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_unfile_buffer(jh); __jbd2_journal_unfile_buffer(jh);
if (!buffer_jbd(bh)) { if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); goto not_jbd;
__bforget(bh);
goto drop;
} }
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
...@@ -1609,14 +1605,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1609,14 +1605,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
/* However, if the buffer is still owned by a prior /* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */ * (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction"); JBUFFER_TRACE(jh, "belongs to older transaction");
/* ... but we CAN drop it from the new transaction if we /* ... but we CAN drop it from the new transaction through
* have also modified it since the original commit. */ * marking the buffer as freed and set j_next_transaction to
* the new transaction, so that not only the commit code
* knows it should clear dirty bits when it is done with the
* buffer, but also the buffer can be checkpointed only
* after the new transaction commits. */
if (jh->b_next_transaction) { set_buffer_freed(bh);
J_ASSERT(jh->b_next_transaction == transaction);
if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
jh->b_next_transaction = NULL; jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} else {
J_ASSERT(jh->b_next_transaction == transaction);
/* /*
* only drop a reference if this transaction modified * only drop a reference if this transaction modified
...@@ -1625,9 +1628,40 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1625,9 +1628,40 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (was_modified) if (was_modified)
drop_reserve = 1; drop_reserve = 1;
} }
} else {
/*
* Finally, if the buffer is not belongs to any
* transaction, we can just drop it now if it has no
* checkpoint.
*/
spin_lock(&journal->j_list_lock);
if (!jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "belongs to none transaction");
spin_unlock(&journal->j_list_lock);
goto not_jbd;
}
/*
* Otherwise, if the buffer has been written to disk,
* it is safe to remove the checkpoint and drop it.
*/
if (!buffer_dirty(bh)) {
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
goto not_jbd;
}
/*
* The buffer is still not written to disk, we should
* attach this buffer to current transaction so that the
* buffer can be checkpointed only after the current
* transaction commits.
*/
clear_buffer_dirty(bh);
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
spin_unlock(&journal->j_list_lock);
} }
not_jbd:
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
__brelse(bh); __brelse(bh);
drop: drop:
...@@ -1636,6 +1670,11 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1636,6 +1670,11 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
handle->h_buffer_credits++; handle->h_buffer_credits++;
} }
return err; return err;
not_jbd:
jbd_unlock_bh_state(bh);
__bforget(bh);
goto drop;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment