Commit d723b99e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Improvements to ext4's block allocator performance for very large file
  systems, especially when the file system or files which are highly
  fragmented. There is a new mount option, prefetch_block_bitmaps which
  will pull in the block bitmaps and set up the in-memory buddy bitmaps
  when the file system is initially mounted.

  Beyond that, a lot of bug fixes and cleanups. In particular, a number
  of changes to make ext4 more robust in the face of write errors or
  file system corruptions"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits)
  ext4: limit the length of per-inode prealloc list
  ext4: reorganize if statement of ext4_mb_release_context()
  ext4: add mb_debug logging when there are lost chunks
  ext4: Fix comment typo "the the".
  jbd2: clean up checksum verification in do_one_pass()
  ext4: change to use fallthrough macro
  ext4: remove unused parameter of ext4_generic_delete_entry function
  mballoc: replace seq_printf with seq_puts
  ext4: optimize the implementation of ext4_mb_good_group()
  ext4: delete invalid comments near ext4_mb_check_limits()
  ext4: fix typos in ext4_mb_regular_allocator() comment
  ext4: fix checking of directory entry validity for inline directories
  fs: prevent BUG_ON in submit_bh_wbc()
  ext4: correctly restore system zone info when remount fails
  ext4: handle add_system_zone() failure in ext4_setup_system_zone()
  ext4: fold ext4_data_block_valid_rcu() into the caller
  ext4: check journal inode extents more carefully
  ext4: don't allow overlapping system zones
  ext4: handle error of ext4_setup_system_zone() on remount
  ext4: delete the invalid BUGON in ext4_mb_load_buddy_gfp()
  ...
parents 5e0b17b0 27bc446e
......@@ -489,6 +489,9 @@ Files in /sys/fs/ext4/<devname>:
multiple of this tuning parameter if the stripe size is not set in the
ext4 superblock
mb_max_inode_prealloc
The maximum length of per-inode ext4_prealloc_space list.
mb_max_to_scan
The maximum number of extents the multiblock allocator will search to
find the best extent.
......@@ -529,21 +532,21 @@ Files in /sys/fs/ext4/<devname>:
Ioctls
======
There is some Ext4 specific functionality which can be accessed by applications
through the system call interfaces. The list of all Ext4 specific ioctls are
shown in the table below.
Ext4 implements various ioctls which can be used by applications to access
ext4-specific functionality. An incomplete list of these ioctls is shown in the
table below. This list includes truly ext4-specific ioctls (``EXT4_IOC_*``) as
well as ioctls that may have been ext4-specific originally but are now supported
by some other filesystem(s) too (``FS_IOC_*``).
Table of Ext4 specific ioctls
Table of Ext4 ioctls
EXT4_IOC_GETFLAGS
FS_IOC_GETFLAGS
Get additional attributes associated with inode. The ioctl argument is
an integer bitfield, with bit values described in ext4.h. This ioctl is
an alias for FS_IOC_GETFLAGS.
an integer bitfield, with bit values described in ext4.h.
EXT4_IOC_SETFLAGS
FS_IOC_SETFLAGS
Set additional attributes associated with inode. The ioctl argument is
an integer bitfield, with bit values described in ext4.h. This ioctl is
an alias for FS_IOC_SETFLAGS.
an integer bitfield, with bit values described in ext4.h.
EXT4_IOC_GETVERSION, EXT4_IOC_GETVERSION_OLD
Get the inode i_generation number stored for each inode. The
......
......@@ -39,6 +39,6 @@ entry.
Other References
----------------
Also see http://www.nongnu.org/ext2-doc/ for quite a collection of
Also see https://www.nongnu.org/ext2-doc/ for quite a collection of
information about ext2/3. Here's another old reference:
http://wiki.osdev.org/Ext2
......@@ -3157,6 +3157,15 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
WARN_ON(atomic_read(&bh->b_count) < 1);
lock_buffer(bh);
if (test_clear_buffer_dirty(bh)) {
/*
* The bh should be mapped, but it might not be if the
* device was hot-removed. Not much we can do but fail the I/O.
*/
if (!buffer_mapped(bh)) {
unlock_buffer(bh);
return -EIO;
}
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
......
......@@ -110,7 +110,7 @@ config EXT4_KUNIT_TESTS
This builds the ext4 KUnit tests.
KUnit tests run during boot and output the results to the debug log
in TAP format (http://testanything.org/). Only useful for kernel devs
in TAP format (https://testanything.org/). Only useful for kernel devs
running KUnit test harness and are not for inclusion into a production
build.
......
......@@ -413,7 +413,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
* Return buffer_head on success or an ERR_PTR in case of failure.
*/
struct buffer_head *
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
bool ignore_locked)
{
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -441,6 +442,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
return ERR_PTR(-ENOMEM);
}
if (ignore_locked && buffer_locked(bh)) {
/* buffer under IO already, return if called for prefetching */
put_bh(bh);
return NULL;
}
if (bitmap_uptodate(bh))
goto verify;
......@@ -487,10 +494,11 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
* submit the buffer_head for reading
*/
set_buffer_new(bh);
trace_ext4_read_block_bitmap_load(sb, block_group);
trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
bh->b_end_io = ext4_end_bitmap_read;
get_bh(bh);
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO |
(ignore_locked ? REQ_RAHEAD : 0), bh);
return bh;
verify:
err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
......@@ -534,7 +542,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
struct buffer_head *bh;
int err;
bh = ext4_read_block_bitmap_nowait(sb, block_group);
bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
if (IS_ERR(bh))
return bh;
err = ext4_wait_block_bitmap(sb, block_group, bh);
......
......@@ -24,6 +24,7 @@ struct ext4_system_zone {
struct rb_node node;
ext4_fsblk_t start_blk;
unsigned int count;
u32 ino;
};
static struct kmem_cache *ext4_system_zone_cachep;
......@@ -45,7 +46,8 @@ void ext4_exit_system_zone(void)
static inline int can_merge(struct ext4_system_zone *entry1,
struct ext4_system_zone *entry2)
{
if ((entry1->start_blk + entry1->count) == entry2->start_blk)
if ((entry1->start_blk + entry1->count) == entry2->start_blk &&
entry1->ino == entry2->ino)
return 1;
return 0;
}
......@@ -66,9 +68,9 @@ static void release_system_zone(struct ext4_system_blocks *system_blks)
*/
static int add_system_zone(struct ext4_system_blocks *system_blks,
ext4_fsblk_t start_blk,
unsigned int count)
unsigned int count, u32 ino)
{
struct ext4_system_zone *new_entry = NULL, *entry;
struct ext4_system_zone *new_entry, *entry;
struct rb_node **n = &system_blks->root.rb_node, *node;
struct rb_node *parent = NULL, *new_node = NULL;
......@@ -79,30 +81,21 @@ static int add_system_zone(struct ext4_system_blocks *system_blks,
n = &(*n)->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = &(*n)->rb_right;
else {
if (start_blk + count > (entry->start_blk +
entry->count))
entry->count = (start_blk + count -
entry->start_blk);
new_node = *n;
new_entry = rb_entry(new_node, struct ext4_system_zone,
node);
break;
}
else /* Unexpected overlap of system zones. */
return -EFSCORRUPTED;
}
if (!new_entry) {
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
GFP_KERNEL);
if (!new_entry)
return -ENOMEM;
new_entry->start_blk = start_blk;
new_entry->count = count;
new_entry->ino = ino;
new_node = &new_entry->node;
rb_link_node(new_node, parent, n);
rb_insert_color(new_node, &system_blks->root);
}
/* Can we merge to the left? */
node = rb_prev(new_node);
......@@ -151,40 +144,6 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
printk(KERN_CONT "\n");
}
/*
* Returns 1 if the passed-in block region (start_blk,
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with filesystem metadata blocks.
*/
static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
struct ext4_system_blocks *system_blks,
ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_system_zone *entry;
struct rb_node *n;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
if (system_blks == NULL)
return 1;
n = system_blks->root.rb_node;
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else
return 0;
}
return 1;
}
static int ext4_protect_reserved_inode(struct super_block *sb,
struct ext4_system_blocks *system_blks,
u32 ino)
......@@ -214,19 +173,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb,
if (n == 0) {
i++;
} else {
if (!ext4_data_block_valid_rcu(sbi, system_blks,
map.m_pblk, n)) {
err = -EFSCORRUPTED;
__ext4_error(sb, __func__, __LINE__, -err,
map.m_pblk, "blocks %llu-%llu "
"from inode %u overlap system zone",
err = add_system_zone(system_blks, map.m_pblk, n, ino);
if (err < 0) {
if (err == -EFSCORRUPTED) {
__ext4_error(sb, __func__, __LINE__,
-err, map.m_pblk,
"blocks %llu-%llu from inode %u overlap system zone",
map.m_pblk,
map.m_pblk + map.m_len - 1, ino);
break;
map.m_pblk + map.m_len - 1,
ino);
}
err = add_system_zone(system_blks, map.m_pblk, n);
if (err < 0)
break;
}
i += n;
}
}
......@@ -262,14 +220,6 @@ int ext4_setup_system_zone(struct super_block *sb)
int flex_size = ext4_flex_bg_size(sbi);
int ret;
if (!test_opt(sb, BLOCK_VALIDITY)) {
if (sbi->system_blks)
ext4_release_system_zone(sb);
return 0;
}
if (sbi->system_blks)
return 0;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
if (!system_blks)
return -ENOMEM;
......@@ -277,22 +227,25 @@ int ext4_setup_system_zone(struct super_block *sb)
for (i=0; i < ngroups; i++) {
cond_resched();
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(system_blks,
((i < 5) || ((i % flex_size) == 0))) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
ext4_bg_num_gdb(sb, i) + 1);
ext4_bg_num_gdb(sb, i) + 1, 0);
if (ret)
goto err;
}
gdp = ext4_get_group_desc(sb, i, NULL);
ret = add_system_zone(system_blks,
ext4_block_bitmap(sb, gdp), 1);
ext4_block_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
ext4_inode_bitmap(sb, gdp), 1);
ext4_inode_bitmap(sb, gdp), 1, 0);
if (ret)
goto err;
ret = add_system_zone(system_blks,
ext4_inode_table(sb, gdp),
sbi->s_itb_per_group);
sbi->s_itb_per_group, 0);
if (ret)
goto err;
}
......@@ -341,11 +294,24 @@ void ext4_release_system_zone(struct super_block *sb)
call_rcu(&system_blks->rcu, ext4_destroy_system_zone);
}
int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
/*
* Returns 1 if the passed-in block region (start_blk,
* start_blk+count) is valid; 0 if some part of the block region
* overlaps with some other filesystem metadata blocks.
*/
int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk,
unsigned int count)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_system_blocks *system_blks;
int ret;
struct ext4_system_zone *entry;
struct rb_node *n;
int ret = 1;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
/*
* Lock the system zone to prevent it being released concurrently
......@@ -354,8 +320,22 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
*/
rcu_read_lock();
system_blks = rcu_dereference(sbi->system_blks);
ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk,
count);
if (system_blks == NULL)
goto out_rcu;
n = system_blks->root.rb_node;
while (n) {
entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
ret = (entry->ino == inode->i_ino);
break;
}
}
out_rcu:
rcu_read_unlock();
return ret;
}
......@@ -374,8 +354,7 @@ int ext4_check_blockref(const char *function, unsigned int line,
while (bref < p+max) {
blk = le32_to_cpu(*bref++);
if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) {
unlikely(!ext4_inode_block_valid(inode, blk, 1))) {
ext4_error_inode(inode, function, line, blk,
"invalid block");
return -EFSCORRUPTED;
......
......@@ -434,10 +434,36 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x725BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x624BC0FF /* User modifiable flags */
/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */
/* User modifiable flags */
#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \
EXT4_UNRM_FL | \
EXT4_COMPR_FL | \
EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
EXT4_NODUMP_FL | \
EXT4_NOATIME_FL | \
EXT4_JOURNAL_DATA_FL | \
EXT4_NOTAIL_FL | \
EXT4_DIRSYNC_FL | \
EXT4_TOPDIR_FL | \
EXT4_EXTENTS_FL | \
0x00400000 /* EXT4_EOFBLOCKS_FL */ | \
EXT4_DAX_FL | \
EXT4_PROJINHERIT_FL | \
EXT4_CASEFOLD_FL)
/* User visible flags */
#define EXT4_FL_USER_VISIBLE (EXT4_FL_USER_MODIFIABLE | \
EXT4_DIRTY_FL | \
EXT4_COMPRBLK_FL | \
EXT4_NOCOMPR_FL | \
EXT4_ENCRYPT_FL | \
EXT4_INDEX_FL | \
EXT4_VERITY_FL | \
EXT4_INLINE_DATA_FL)
/* Flags we can manipulate with through FS_IOC_FSSETXATTR */
#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
EXT4_IMMUTABLE_FL | \
EXT4_APPEND_FL | \
......@@ -669,8 +695,6 @@ enum {
/*
* ioctl commands
*/
#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
......@@ -687,17 +711,11 @@ enum {
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
#define EXT4_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
/* ioctl codes 19--39 are reserved for fscrypt */
#define EXT4_IOC_CLEAR_ES_CACHE _IO('f', 40)
#define EXT4_IOC_GETSTATE _IOW('f', 41, __u32)
#define EXT4_IOC_GET_ES_CACHE _IOWR('f', 42, struct fiemap)
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define EXT4_IOC_SHUTDOWN _IOR ('X', 125, __u32)
/*
......@@ -722,8 +740,6 @@ enum {
/*
* ioctl commands in 32 bit emulation
*/
#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
......@@ -1054,6 +1070,7 @@ struct ext4_inode_info {
struct timespec64 i_crtime;
/* mballoc */
atomic_t i_prealloc_active;
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
......@@ -1172,6 +1189,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_WARN_ON_ERROR 0x2000000 /* Trigger WARN_ON on error */
#define EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS 0x4000000
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
......@@ -1501,10 +1519,13 @@ struct ext4_sb_info {
unsigned int s_mb_stats;
unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc;
unsigned int s_mb_max_inode_prealloc;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
unsigned int s_mb_prefetch;
unsigned int s_mb_prefetch_limit;
/* stats for buddy allocator */
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
......@@ -1572,6 +1593,8 @@ struct ext4_sb_info {
struct ratelimit_state s_err_ratelimit_state;
struct ratelimit_state s_warning_ratelimit_state;
struct ratelimit_state s_msg_ratelimit_state;
atomic_t s_warning_count;
atomic_t s_msg_count;
/* Encryption context for '-o test_dummy_encryption' */
struct fscrypt_dummy_context s_dummy_enc_ctx;
......@@ -1585,6 +1608,9 @@ struct ext4_sb_info {
#ifdef CONFIG_EXT4_DEBUG
unsigned long s_simulate_fail;
#endif
/* Record the errseq of the backing block device */
errseq_t s_bdev_wb_err;
spinlock_t s_bdev_wb_lock;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
......@@ -2313,9 +2339,15 @@ struct ext4_lazy_init {
struct mutex li_list_mtx;
};
enum ext4_li_mode {
EXT4_LI_MODE_PREFETCH_BBITMAP,
EXT4_LI_MODE_ITABLE,
};
struct ext4_li_request {
struct super_block *lr_super;
struct ext4_sb_info *lr_sbi;
enum ext4_li_mode lr_mode;
ext4_group_t lr_first_not_zeroed;
ext4_group_t lr_next_group;
struct list_head lr_request;
unsigned long lr_next_sched;
......@@ -2446,7 +2478,8 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern struct buffer_head *ext4_read_block_bitmap_nowait(struct super_block *sb,
ext4_group_t block_group);
ext4_group_t block_group,
bool ignore_locked);
extern int ext4_wait_block_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct buffer_head *bh);
......@@ -2651,9 +2684,15 @@ extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
ext4_group_t group,
unsigned int nr, int *cnt);
extern void ext4_mb_prefetch_fini(struct super_block *sb, ext4_group_t group,
unsigned int nr);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
......@@ -2765,8 +2804,7 @@ extern int ext4_search_dir(struct buffer_head *bh,
struct ext4_filename *fname,
unsigned int offset,
struct ext4_dir_entry_2 **res_dir);
extern int ext4_generic_delete_entry(handle_t *handle,
struct inode *dir,
extern int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
......@@ -2924,12 +2962,6 @@ do { \
#endif
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle,
struct super_block *sb, __u32 rocompat);
extern int ext4_update_incompat_feature(handle_t *handle,
struct super_block *sb, __u32 incompat);
extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
......@@ -3145,6 +3177,7 @@ struct ext4_group_info {
(1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
#define EXT4_GROUP_INFO_IBITMAP_CORRUPT \
(1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
#define EXT4_GROUP_INFO_BBITMAP_READ_BIT 4
#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
......@@ -3159,6 +3192,8 @@ struct ext4_group_info {
(set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_CLEAR_TRIMMED(grp) \
(clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_TEST_AND_SET_READ(grp) \
(test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_READ_BIT, &((grp)->bb_state)))
#define EXT4_MAX_CONTENTION 8
#define EXT4_CONTENTION_THRESHOLD 2
......@@ -3363,7 +3398,7 @@ extern void ext4_release_system_zone(struct super_block *sb);
extern int ext4_setup_system_zone(struct super_block *sb);
extern int __init ext4_init_system_zone(void);
extern void ext4_exit_system_zone(void);
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
extern int ext4_inode_block_valid(struct inode *inode,
ext4_fsblk_t start_blk,
unsigned int count);
extern int ext4_check_blockref(const char *, unsigned int,
......
......@@ -195,6 +195,28 @@ static void ext4_journal_abort_handle(const char *caller, unsigned int line,
jbd2_journal_abort_handle(handle);
}
static void ext4_check_bdev_write_error(struct super_block *sb)
{
struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
/*
* If the block device has write error flag, it may have failed to
* async write out metadata buffers in the background. In this case,
* we could read old data from disk and write it out again, which
* may lead to on-disk filesystem inconsistency.
*/
if (errseq_check(&mapping->wb_err, READ_ONCE(sbi->s_bdev_wb_err))) {
spin_lock(&sbi->s_bdev_wb_lock);
err = errseq_check_and_advance(&mapping->wb_err, &sbi->s_bdev_wb_err);
spin_unlock(&sbi->s_bdev_wb_lock);
if (err)
ext4_error_err(sb, -err,
"Error while async write back metadata");
}
}
int __ext4_journal_get_write_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh)
{
......@@ -202,6 +224,9 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
if (bh->b_bdev->bd_super)
ext4_check_bdev_write_error(bh->b_bdev->bd_super);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
if (err)
......
......@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
......@@ -340,7 +340,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
*/
if (lblock + len <= lblock)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
return ext4_inode_block_valid(inode, block, len);
}
static int ext4_valid_extent_idx(struct inode *inode,
......@@ -348,7 +348,7 @@ static int ext4_valid_extent_idx(struct inode *inode,
{
ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
return ext4_inode_block_valid(inode, block, 1);
}
static int ext4_valid_extent_entries(struct inode *inode,
......@@ -507,14 +507,10 @@ __read_extent_tree_block(const char *function, unsigned int line,
}
if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
return bh;
if (!ext4_has_feature_journal(inode->i_sb) ||
(inode->i_ino !=
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) {
err = __ext4_ext_check(function, line, inode,
ext_block_hdr(bh), depth, pblk);
if (err)
goto errout;
}
set_buffer_verified(bh);
/*
* If this is a leaf block, cache all of its entries
......@@ -693,11 +689,9 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path)
return;
depth = path->p_depth;
for (i = 0; i <= depth; i++, path++) {
if (path->p_bh) {
brelse(path->p_bh);
path->p_bh = NULL;
}
}
}
/*
......@@ -1915,7 +1909,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
/*
* ext4_ext_insert_extent:
* tries to merge requsted extent into the existing extent or
* tries to merge requested extent into the existing extent or
* inserts requested extent as new one into the tree,
* creating new leaf in the no-space case.
*/
......@@ -3125,7 +3119,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
*
*
* Splits extent [a, b] into two extents [a, @split) and [@split, b], states
* of which are deterimined by split_flag.
* of which are determined by split_flag.
*
* There are two cases:
* a> the extent are splitted into two extent.
......@@ -3650,7 +3644,7 @@ static int ext4_split_convert_extents(handle_t *handle,
eof_block = map->m_lblk + map->m_len;
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size.
* zeroout only if extent is fully inside i_size or new_size.
*/
depth = ext_depth(inode);
ex = path[depth].p_ext;
......@@ -4272,7 +4266,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free().
*/
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
ext4_free_blocks(handle, inode, NULL, newblock,
......@@ -4495,7 +4489,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/*
* Round up offset. This is not fallocate, we neet to zero out
* Round up offset. This is not fallocate, we need to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
* range.
......@@ -5299,7 +5293,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
}
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
ret = ext4_es_remove_extent(inode, punch_start,
EXT_MAX_BLOCKS - punch_start);
......@@ -5313,7 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
ret = ext4_ext_shift_extents(inode, handle, punch_stop,
punch_stop - punch_start, SHIFT_LEFT);
......@@ -5445,7 +5439,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_stop;
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
path = ext4_find_extent(inode, offset_lblk, NULL, 0);
if (IS_ERR(path)) {
......@@ -5579,7 +5573,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
}
ex1 = path1[path1->p_depth].p_ext;
ex2 = path2[path2->p_depth].p_ext;
/* Do we have somthing to swap ? */
/* Do we have something to swap ? */
if (unlikely(!ex2 || !ex1))
goto finish;
......
......@@ -145,10 +145,9 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
/* if we are the last writer on the inode, drop the block reservation */
if ((filp->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1) &&
!EXT4_I(inode)->i_reserved_data_blocks)
{
!EXT4_I(inode)->i_reserved_data_blocks) {
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
}
if (is_dx(inode) && filp->private_data)
......@@ -428,6 +427,10 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
*/
if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
}
inode_unlock_shared(inode);
*ilock_shared = false;
inode_lock(inode);
......@@ -812,7 +815,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
return err;
}
static int ext4_file_open(struct inode * inode, struct file * filp)
static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
......
......@@ -233,7 +233,7 @@ static int __ext4fs_dirhash(const char *name, int len,
break;
case DX_HASH_HALF_MD4_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
/* fall through */
fallthrough;
case DX_HASH_HALF_MD4:
p = name;
while (len > 0) {
......@@ -247,7 +247,7 @@ static int __ext4fs_dirhash(const char *name, int len,
break;
case DX_HASH_TEA_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
/* fall through */
fallthrough;
case DX_HASH_TEA:
p = name;
while (len > 0) {
......
......@@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
* i_mutex. So we can safely drop the i_data_sem here.
*/
BUG_ON(EXT4_JOURNAL(inode) == NULL);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
up_write(&EXT4_I(inode)->i_data_sem);
*dropped = 1;
return 0;
......@@ -858,8 +858,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
else if (ext4_should_journal_data(inode))
flags |= EXT4_FREE_BLOCKS_FORGET;
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
count)) {
if (!ext4_inode_block_valid(inode, block_to_free, count)) {
EXT4_ERROR_INODE(inode, "attempt to clear invalid "
"blocks %llu len %lu",
(unsigned long long) block_to_free, count);
......@@ -1004,8 +1003,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
if (!nr)
continue; /* A hole */
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
nr, 1)) {
if (!ext4_inode_block_valid(inode, nr, 1)) {
EXT4_ERROR_INODE(inode,
"invalid indirect mapped "
"block %lu (level %d)",
......@@ -1182,21 +1180,21 @@ void ext4_ind_truncate(handle_t *handle, struct inode *inode)
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_TIND_BLOCK:
;
}
......@@ -1436,7 +1434,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_IND_BLOCK:
if (++n >= n2)
break;
......@@ -1445,7 +1443,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_DIND_BLOCK:
if (++n >= n2)
break;
......@@ -1454,7 +1452,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode,
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
/* fall through */
fallthrough;
case EXT4_TIND_BLOCK:
;
}
......
......@@ -276,7 +276,7 @@ static int ext4_create_inline_data(handle_t *handle,
len = 0;
}
/* Insert the the xttr entry. */
/* Insert the xttr entry. */
i.value = value;
i.value_len = len;
......@@ -1706,7 +1706,7 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
goto out;
err = ext4_generic_delete_entry(handle, dir, de_del, bh,
err = ext4_generic_delete_entry(dir, de_del, bh,
inline_start, inline_size, 0);
if (err)
goto out;
......
......@@ -383,7 +383,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
*/
if ((ei->i_reserved_data_blocks == 0) &&
!inode_is_open_for_write(inode))
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
}
static int __check_block_validity(struct inode *inode, const char *func,
......@@ -394,8 +394,7 @@ static int __check_block_validity(struct inode *inode, const char *func,
(inode->i_ino ==
le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
return 0;
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) {
if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
"(length %d)", (unsigned long) map->m_lblk,
......@@ -3288,7 +3287,7 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
if (PageChecked(page))
return 0;
if (journal)
return jbd2_journal_try_to_free_buffers(journal, page, wait);
return jbd2_journal_try_to_free_buffers(journal, page);
else
return try_to_free_buffers(page);
}
......@@ -4056,7 +4055,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (stop_block > first_block) {
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
ret = ext4_es_remove_extent(inode, first_block,
stop_block - first_block);
......@@ -4163,7 +4162,7 @@ int ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
return 0;
goto out_trace;
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
......@@ -4172,16 +4171,14 @@ int ext4_truncate(struct inode *inode)
int has_inline = 1;
err = ext4_inline_data_truncate(inode, &has_inline);
if (err)
return err;
if (has_inline)
return 0;
if (err || has_inline)
goto out_trace;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
if (ext4_inode_attach_jinode(inode) < 0)
return 0;
goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
......@@ -4190,8 +4187,10 @@ int ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
goto out_trace;
}
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
......@@ -4211,7 +4210,7 @@ int ext4_truncate(struct inode *inode)
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
err = ext4_ext_truncate(handle, inode);
......@@ -4242,6 +4241,7 @@ int ext4_truncate(struct inode *inode)
err = err2;
ext4_journal_stop(handle);
out_trace:
trace_ext4_truncate_exit(inode);
return err;
}
......@@ -4760,7 +4760,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
ret = 0;
if (ei->i_file_acl &&
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
!ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
ext4_error_inode(inode, function, line, 0,
"iget: bad extended attribute block %llu",
ei->i_file_acl);
......
......@@ -202,7 +202,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
reset_inode_seed(inode);
reset_inode_seed(inode_bl);
ext4_discard_preallocations(inode);
ext4_discard_preallocations(inode, 0);
err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) {
......@@ -819,12 +819,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FS_IOC_GETFSMAP:
return ext4_ioc_getfsmap(sb, (void __user *)arg);
case EXT4_IOC_GETFLAGS:
case FS_IOC_GETFLAGS:
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (S_ISREG(inode->i_mode))
flags &= ~EXT4_PROJINHERIT_FL;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
case FS_IOC_SETFLAGS: {
int err;
if (!inode_owner_or_capable(inode))
......@@ -1129,12 +1129,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_PRECACHE_EXTENTS:
return ext4_ext_precache(inode);
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case FS_IOC_SET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
case FS_IOC_GET_ENCRYPTION_PWSALT: {
#ifdef CONFIG_FS_ENCRYPTION
int err, err2;
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -1174,7 +1174,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EOPNOTSUPP;
#endif
}
case EXT4_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
......@@ -1236,7 +1236,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GET_ES_CACHE:
return ext4_ioctl_get_es_cache(filp, arg);
case EXT4_IOC_FSGETXATTR:
case FS_IOC_FSGETXATTR:
{
struct fsxattr fa;
......@@ -1247,7 +1247,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -EFAULT;
return 0;
}
case EXT4_IOC_FSSETXATTR:
case FS_IOC_FSSETXATTR:
{
struct fsxattr fa, old_fa;
int err;
......@@ -1313,11 +1313,11 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
/* These are just misnamed, they actually get/put from/to user an int */
switch (cmd) {
case EXT4_IOC32_GETFLAGS:
cmd = EXT4_IOC_GETFLAGS;
case FS_IOC32_GETFLAGS:
cmd = FS_IOC_GETFLAGS;
break;
case EXT4_IOC32_SETFLAGS:
cmd = EXT4_IOC_SETFLAGS;
case FS_IOC32_SETFLAGS:
cmd = FS_IOC_SETFLAGS;
break;
case EXT4_IOC32_GETVERSION:
cmd = EXT4_IOC_GETVERSION;
......@@ -1361,9 +1361,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_RESIZE_FS:
case FITRIM:
case EXT4_IOC_PRECACHE_EXTENTS:
case EXT4_IOC_SET_ENCRYPTION_POLICY:
case EXT4_IOC_GET_ENCRYPTION_PWSALT:
case EXT4_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_SET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_PWSALT:
case FS_IOC_GET_ENCRYPTION_POLICY:
case FS_IOC_GET_ENCRYPTION_POLICY_EX:
case FS_IOC_ADD_ENCRYPTION_KEY:
case FS_IOC_REMOVE_ENCRYPTION_KEY:
......@@ -1377,8 +1377,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CLEAR_ES_CACHE:
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_FSGETXATTR:
case EXT4_IOC_FSSETXATTR:
case FS_IOC_FSGETXATTR:
case FS_IOC_FSSETXATTR:
break;
default:
return -ENOIOCTLCMD;
......
This diff is collapsed.
......@@ -73,6 +73,10 @@
*/
#define MB_DEFAULT_GROUP_PREALLOC 512
/*
* maximum length of inode prealloc list
*/
#define MB_DEFAULT_MAX_INODE_PREALLOC 512
struct ext4_free_data {
/* this links the free block information from sb_info */
......
......@@ -686,8 +686,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
out:
if (*moved_len) {
ext4_discard_preallocations(orig_inode);
ext4_discard_preallocations(donor_inode);
ext4_discard_preallocations(orig_inode, 0);
ext4_discard_preallocations(donor_inode, 0);
}
ext4_ext_drop_refs(path);
......
......@@ -1396,8 +1396,8 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
ext4_match(dir, fname, de)) {
/* found a match - just to be sure, do
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
bh->b_size, offset))
if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
buf_size, offset))
return -1;
*res_dir = de;
return 1;
......@@ -1858,7 +1858,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
blocksize, hinfo, map);
map -= count;
dx_sort_map(map, count);
/* Split the existing block in the middle, size-wise */
/* Ensure that neither split block is over half full */
size = 0;
move = 0;
for (i = count-1; i >= 0; i--) {
......@@ -1868,8 +1868,18 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
size += map[i].size;
move++;
}
/* map index at which we will split */
/*
* map index at which we will split
*
* If the sum of active entries didn't exceed half the block size, just
* split it in half by count; each resulting block will have at least
* half the space free.
*/
if (i > 0)
split = count - move;
else
split = count/2;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
......@@ -2455,8 +2465,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
* ext4_generic_delete_entry deletes a directory entry by merging it
* with the previous entry
*/
int ext4_generic_delete_entry(handle_t *handle,
struct inode *dir,
int ext4_generic_delete_entry(struct inode *dir,
struct ext4_dir_entry_2 *de_del,
struct buffer_head *bh,
void *entry_buf,
......@@ -2472,7 +2481,7 @@ int ext4_generic_delete_entry(handle_t *handle,
de = (struct ext4_dir_entry_2 *)entry_buf;
while (i < buf_size - csum_size) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
bh->b_data, bh->b_size, i))
entry_buf, buf_size, i))
return -EFSCORRUPTED;
if (de == de_del) {
if (pde)
......@@ -2517,8 +2526,7 @@ static int ext4_delete_entry(handle_t *handle,
if (unlikely(err))
goto out;
err = ext4_generic_delete_entry(handle, dir, de_del,
bh, bh->b_data,
err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
dir->i_sb->s_blocksize, csum_size);
if (err)
goto out;
......@@ -3193,30 +3201,33 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
* in separate transaction */
retval = dquot_initialize(dir);
if (retval)
return retval;
goto out_trace;
retval = dquot_initialize(d_inode(dentry));
if (retval)
return retval;
goto out_trace;
retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh)
goto end_unlink;
if (IS_ERR(bh)) {
retval = PTR_ERR(bh);
goto out_trace;
}
if (!bh) {
retval = -ENOENT;
goto out_trace;
}
inode = d_inode(dentry);
if (le32_to_cpu(de->inode) != inode->i_ino) {
retval = -EFSCORRUPTED;
if (le32_to_cpu(de->inode) != inode->i_ino)
goto end_unlink;
goto out_bh;
}
handle = ext4_journal_start(dir, EXT4_HT_DIR,
EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
handle = NULL;
goto end_unlink;
goto out_bh;
}
if (IS_DIRSYNC(dir))
......@@ -3224,12 +3235,12 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto end_unlink;
goto out_handle;
dir->i_ctime = dir->i_mtime = current_time(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
goto end_unlink;
goto out_handle;
if (inode->i_nlink == 0)
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
dentry->d_name.len, dentry->d_name.name);
......@@ -3251,10 +3262,11 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
d_invalidate(dentry);
#endif
end_unlink:
brelse(bh);
if (handle)
out_handle:
ext4_journal_stop(handle);
out_bh:
brelse(bh);
out_trace:
trace_ext4_unlink_exit(dentry, retval);
return retval;
}
......
......@@ -140,7 +140,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
return;
}
ctx->cur_step++;
/* fall-through */
fallthrough;
case STEP_VERITY:
if (ctx->enabled_steps & (1 << STEP_VERITY)) {
INIT_WORK(&ctx->work, verity_work);
......@@ -148,7 +148,7 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
return;
}
ctx->cur_step++;
/* fall-through */
fallthrough;
default:
__read_end_io(ctx->bio);
}
......
This diff is collapsed.
......@@ -189,6 +189,9 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_RW_ATTR_SBI_UL(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname)
#define EXT4_RO_ATTR_SBI_ATOMIC(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0444, pointer_atomic, ext4_sb_info, _elname)
#define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \
static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
......@@ -215,6 +218,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_inode_prealloc, s_mb_max_inode_prealloc);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
......@@ -226,6 +230,8 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
EXT4_RO_ATTR_SBI_ATOMIC(warning_count, s_warning_count);
EXT4_RO_ATTR_SBI_ATOMIC(msg_count, s_msg_count);
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
EXT4_RO_ATTR_ES_U8(first_error_errcode, s_first_error_errcode);
EXT4_RO_ATTR_ES_U8(last_error_errcode, s_last_error_errcode);
......@@ -240,6 +246,8 @@ EXT4_RO_ATTR_ES_STRING(last_error_func, s_last_error_func, 32);
EXT4_ATTR(first_error_time, 0444, first_error_time);
EXT4_ATTR(last_error_time, 0444, last_error_time);
EXT4_ATTR(journal_task, 0444, journal_task);
EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
......@@ -257,6 +265,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(mb_order2_req),
ATTR_LIST(mb_stream_req),
ATTR_LIST(mb_group_prealloc),
ATTR_LIST(mb_max_inode_prealloc),
ATTR_LIST(max_writeback_mb_bump),
ATTR_LIST(extent_max_zeroout_kb),
ATTR_LIST(trigger_fs_error),
......@@ -267,6 +276,8 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(msg_ratelimit_interval_ms),
ATTR_LIST(msg_ratelimit_burst),
ATTR_LIST(errors_count),
ATTR_LIST(warning_count),
ATTR_LIST(msg_count),
ATTR_LIST(first_error_ino),
ATTR_LIST(last_error_ino),
ATTR_LIST(first_error_block),
......@@ -283,6 +294,8 @@ static struct attribute *ext4_attrs[] = {
#ifdef CONFIG_EXT4_DEBUG
ATTR_LIST(simulate_fail),
#endif
ATTR_LIST(mb_prefetch),
ATTR_LIST(mb_prefetch_limit),
NULL,
};
ATTRIBUTE_GROUPS(ext4);
......
......@@ -1356,7 +1356,6 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
block = 0;
while (wsize < bufsize) {
if (bh != NULL)
brelse(bh);
csize = (bufsize - wsize) > blocksize ? blocksize :
bufsize - wsize;
......
......@@ -1285,7 +1285,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode)
* superblock as being NULL to prevent the journal destroy from writing
* back a bogus superblock.
*/
static void journal_fail_superblock (journal_t *journal)
static void journal_fail_superblock(journal_t *journal)
{
struct buffer_head *bh = journal->j_sb_buffer;
brelse(bh);
......@@ -1367,8 +1367,10 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
int ret;
/* Buffer got discarded which means block device got invalidated */
if (!buffer_mapped(bh))
if (!buffer_mapped(bh)) {
unlock_buffer(bh);
return -EIO;
}
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
......@@ -1815,7 +1817,7 @@ int jbd2_journal_destroy(journal_t *journal)
/**
*int jbd2_journal_check_used_features () - Check if features specified are used.
*int jbd2_journal_check_used_features() - Check if features specified are used.
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
......@@ -1825,7 +1827,7 @@ int jbd2_journal_destroy(journal_t *journal)
* features. Return true (non-zero) if it does.
**/
int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
journal_superblock_t *sb;
......@@ -1860,7 +1862,7 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
* all of a given set of features on this journal. Return true
* (non-zero) if it can. */
int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
if (!compat && !ro && !incompat)
......@@ -1882,7 +1884,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
}
/**
* int jbd2_journal_set_features () - Mark a given journal feature in the superblock
* int jbd2_journal_set_features() - Mark a given journal feature in the superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
......@@ -1893,7 +1895,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com
*
*/
int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
#define INCOMPAT_FEATURE_ON(f) \
......
......@@ -690,14 +690,11 @@ static int do_one_pass(journal_t *journal,
* number. */
if (pass == PASS_SCAN &&
jbd2_has_feature_checksum(journal)) {
int chksum_err, chksum_seen;
struct commit_header *cbh =
(struct commit_header *)bh->b_data;
unsigned found_chksum =
be32_to_cpu(cbh->h_chksum[0]);
chksum_err = chksum_seen = 0;
if (info->end_transaction) {
journal->j_failed_commit =
info->end_transaction;
......@@ -705,42 +702,23 @@ static int do_one_pass(journal_t *journal,
break;
}
if (crc32_sum == found_chksum &&
cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
/* Neither checksum match nor unused? */
if (!((crc32_sum == found_chksum &&
cbh->h_chksum_type ==
JBD2_CRC32_CHKSUM &&
cbh->h_chksum_size ==
JBD2_CRC32_CHKSUM_SIZE)
chksum_seen = 1;
else if (!(cbh->h_chksum_type == 0 &&
JBD2_CRC32_CHKSUM_SIZE) ||
(cbh->h_chksum_type == 0 &&
cbh->h_chksum_size == 0 &&
found_chksum == 0 &&
!chksum_seen))
/*
* If fs is mounted using an old kernel and then
* kernel with journal_chksum is used then we
* get a situation where the journal flag has
* checksum flag set but checksums are not
* present i.e chksum = 0, in the individual
* commit blocks.
* Hence to avoid checksum failures, in this
* situation, this extra check is added.
*/
chksum_err = 1;
found_chksum == 0)))
goto chksum_error;
if (chksum_err) {
info->end_transaction = next_commit_ID;
if (!jbd2_has_feature_async_commit(journal)) {
journal->j_failed_commit =
next_commit_ID;
brelse(bh);
break;
}
}
crc32_sum = ~0;
}
if (pass == PASS_SCAN &&
!jbd2_commit_block_csum_verify(journal,
bh->b_data)) {
chksum_error:
info->end_transaction = next_commit_ID;
if (!jbd2_has_feature_async_commit(journal)) {
......
......@@ -2026,6 +2026,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
*/
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
J_ASSERT_JH(jh, jh->b_transaction != NULL);
J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
}
......@@ -2078,10 +2081,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
* @gfp_mask: we use the mask to detect how hard should we try to release
* buffers. If __GFP_DIRECT_RECLAIM and __GFP_FS is set, we wait for commit
* code to release the buffers.
*
*
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
......@@ -2112,11 +2111,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
*
* Return 0 on failure, 1 on success
*/
int jbd2_journal_try_to_free_buffers(journal_t *journal,
struct page *page, gfp_t gfp_mask)
int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
{
struct buffer_head *head;
struct buffer_head *bh;
bool has_write_io_error = false;
int ret = 0;
J_ASSERT(PageLocked(page));
......@@ -2141,11 +2140,26 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
jbd2_journal_put_journal_head(jh);
if (buffer_jbd(bh))
goto busy;
/*
* If we free a metadata buffer which has been failed to
* write out, the jbd2 checkpoint procedure will not detect
* this failure and may lead to filesystem inconsistency
* after cleanup journal tail.
*/
if (buffer_write_io_error(bh)) {
pr_err("JBD2: Error while async write back metadata bh %llu.",
(unsigned long long)bh->b_blocknr);
has_write_io_error = true;
}
} while ((bh = bh->b_this_page) != head);
ret = try_to_free_buffers(page);
busy:
if (has_write_io_error)
jbd2_journal_abort(journal, -EIO);
return ret;
}
......@@ -2572,6 +2586,13 @@ bool __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
/*
* b_transaction must be set, otherwise the new b_transaction won't
* be holding jh reference
*/
J_ASSERT_JH(jh, jh->b_transaction != NULL);
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __jbd2_journal_file_buffer() must not
......
......@@ -1381,7 +1381,7 @@ extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
extern int jbd2_journal_forget (handle_t *, struct buffer_head *);
extern int jbd2_journal_invalidatepage(journal_t *,
struct page *, unsigned int, unsigned int);
extern int jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
extern int jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page);
extern int jbd2_journal_stop(handle_t *);
extern int jbd2_journal_flush (journal_t *);
extern void jbd2_journal_lock_updates (journal_t *);
......
......@@ -746,24 +746,29 @@ TRACE_EVENT(ext4_mb_release_group_pa,
);
TRACE_EVENT(ext4_discard_preallocations,
TP_PROTO(struct inode *inode),
TP_PROTO(struct inode *inode, unsigned int len, unsigned int needed),
TP_ARGS(inode),
TP_ARGS(inode, len, needed),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, len )
__field( unsigned int, needed )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->len = len;
__entry->needed = needed;
),
TP_printk("dev %d,%d ino %lu",
TP_printk("dev %d,%d ino %lu len: %u needed %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino)
(unsigned long) __entry->ino, __entry->len,
__entry->needed)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
......@@ -1312,18 +1317,34 @@ DEFINE_EVENT(ext4__bitmap_load, ext4_mb_buddy_bitmap_load,
TP_ARGS(sb, group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
TP_PROTO(struct super_block *sb, unsigned long group),
TP_ARGS(sb, group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
TRACE_EVENT(ext4_read_block_bitmap_load,
TP_PROTO(struct super_block *sb, unsigned long group, bool prefetch),
TP_PROTO(struct super_block *sb, unsigned long group),
TP_ARGS(sb, group, prefetch),
TP_ARGS(sb, group)
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
__field( bool, prefetch )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
__entry->prefetch = prefetch;
),
TP_printk("dev %d,%d group %u prefetch %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->group, __entry->prefetch)
);
TRACE_EVENT(ext4_direct_IO_enter,
......@@ -2726,6 +2747,50 @@ TRACE_EVENT(ext4_error,
__entry->function, __entry->line)
);
TRACE_EVENT(ext4_prefetch_bitmaps,
TP_PROTO(struct super_block *sb, ext4_group_t group,
ext4_group_t next, unsigned int prefetch_ios),
TP_ARGS(sb, group, next, prefetch_ios),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
__field( __u32, next )
__field( __u32, ios )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
__entry->next = next;
__entry->ios = prefetch_ios;
),
TP_printk("dev %d,%d group %u next %u ios %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->group, __entry->next, __entry->ios)
);
TRACE_EVENT(ext4_lazy_itable_init,
TP_PROTO(struct super_block *sb, ext4_group_t group),
TP_ARGS(sb, group),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( __u32, group )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->group = group;
),
TP_printk("dev %d,%d group %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->group)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment