Commit 056f8c43 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Lots of cleanups and bug fixes this cycle, primarily in the block
  allocation, extent management, fast commit, and journalling"

* tag 'ext4_for_linus-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (93 commits)
  ext4: convert EXT4_B2C(sbi->s_stripe) users to EXT4_NUM_B2C
  ext4: check stripe size compatibility on remount as well
  ext4: fix i_data_sem unlock order in ext4_ind_migrate()
  ext4: remove the special buffer dirty handling in do_journal_get_write_access
  ext4: fix a potential assertion failure due to improperly dirtied buffer
  ext4: hoist ext4_block_write_begin and replace the __block_write_begin
  ext4: persist the new uptodate buffers in ext4_journalled_zero_new_buffers
  ext4: dax: keep orphan list before truncate overflow allocated blocks
  ext4: fix error message when rejecting the default hash
  ext4: save unnecessary indentation in ext4_ext_create_new_leaf()
  ext4: make some fast commit functions reuse extents path
  ext4: refactor ext4_swap_extents() to reuse extents path
  ext4: get rid of ppath in convert_initialized_extent()
  ext4: get rid of ppath in ext4_ext_handle_unwritten_extents()
  ext4: get rid of ppath in ext4_ext_convert_to_initialized()
  ext4: get rid of ppath in ext4_convert_unwritten_extents_endio()
  ext4: get rid of ppath in ext4_split_convert_extents()
  ext4: get rid of ppath in ext4_split_extent()
  ext4: get rid of ppath in ext4_force_split_extent_at()
  ext4: get rid of ppath in ext4_split_extent_at()
  ...
parents 171754c3 ff2beee2
......@@ -212,16 +212,6 @@ When mounting an ext4 filesystem, the following option are accepted:
that ext4's inode table readahead algorithm will pre-read into the
buffer cache. The default value is 32 blocks.
nouser_xattr
Disables Extended User Attributes. See the attr(5) manual page for
more information about extended attributes.
noacl
This option disables POSIX Access Control List support. If ACL support
is enabled in the kernel configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL
is enabled by default on mount. See the acl(5) manual page for more
information about acl.
bsddf (*)
Make 'df' act like BSD.
......
......@@ -18,15 +18,17 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
struct buffer_head *bh)
{
__u32 hi;
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int sz;
if (!ext4_has_metadata_csum(sb))
return 1;
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
calculated = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END) {
......@@ -40,14 +42,16 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb,
void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz)
struct buffer_head *bh)
{
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);
int sz;
if (!ext4_has_metadata_csum(sb))
return;
sz = EXT4_INODES_PER_GROUP(sb) >> 3;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
gdp->bg_inode_bitmap_csum_lo = cpu_to_le16(csum & 0xFFFF);
if (sbi->s_desc_size >= EXT4_BG_INODE_BITMAP_CSUM_HI_END)
......
......@@ -280,12 +280,20 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct fscrypt_str de_name =
FSTR_INIT(de->name,
de->name_len);
u32 hash;
u32 minor_hash;
if (IS_CASEFOLDED(inode)) {
hash = EXT4_DIRENT_HASH(de);
minor_hash = EXT4_DIRENT_MINOR_HASH(de);
} else {
hash = 0;
minor_hash = 0;
}
/* Directory is encrypted */
err = fscrypt_fname_disk_to_usr(inode,
EXT4_DIRENT_HASH(de),
EXT4_DIRENT_MINOR_HASH(de),
&de_name, &fstr);
hash, minor_hash, &de_name, &fstr);
de_name = fstr;
fstr.len = save_len;
if (err)
......
......@@ -1058,6 +1058,7 @@ struct ext4_inode_info {
/* Number of ongoing updates on this inode */
atomic_t i_fc_updates;
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
/* Fast commit wait queue for this inode */
wait_queue_head_t i_fc_wait;
......@@ -1106,6 +1107,10 @@ struct ext4_inode_info {
/* mballoc */
atomic_t i_prealloc_active;
/* allocation reservation info for delalloc */
/* In case of bigalloc, this refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
struct rb_root i_prealloc_node;
rwlock_t i_prealloc_lock;
......@@ -1122,10 +1127,6 @@ struct ext4_inode_info {
/* ialloc */
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
/* In case of bigalloc, this refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
/* pending cluster reservations for bigalloc file systems */
struct ext4_pending_tree i_pending_tree;
......@@ -1149,7 +1150,6 @@ struct ext4_inode_info {
*/
struct list_head i_rsv_conversion_list;
struct work_struct i_rsv_conversion_work;
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
......@@ -2338,9 +2338,9 @@ struct ext4_dir_entry_2 {
((struct ext4_dir_entry_hash *) \
(((void *)(entry)) + \
((8 + (entry)->name_len + EXT4_DIR_ROUND) & ~EXT4_DIR_ROUND)))
#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(de)->hash)
#define EXT4_DIRENT_HASH(entry) le32_to_cpu(EXT4_DIRENT_HASHES(entry)->hash)
#define EXT4_DIRENT_MINOR_HASH(entry) \
le32_to_cpu(EXT4_DIRENT_HASHES(de)->minor_hash)
le32_to_cpu(EXT4_DIRENT_HASHES(entry)->minor_hash)
static inline bool ext4_hash_in_dirent(const struct inode *inode)
{
......@@ -2462,6 +2462,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define DX_HASH_HALF_MD4_UNSIGNED 4
#define DX_HASH_TEA_UNSIGNED 5
#define DX_HASH_SIPHASH 6
#define DX_HASH_LAST DX_HASH_SIPHASH
static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc,
const void *address, unsigned int length)
......@@ -2695,10 +2696,10 @@ struct mmpd_data {
extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
void ext4_inode_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
struct buffer_head *bh);
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh, int sz);
struct buffer_head *bh);
void ext4_block_bitmap_csum_set(struct super_block *sb,
struct ext4_group_desc *gdp,
struct buffer_head *bh);
......@@ -3712,11 +3713,12 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
int num,
struct ext4_ext_path *path);
extern int ext4_ext_insert_extent(handle_t *, struct inode *,
struct ext4_ext_path **,
struct ext4_extent *, int);
extern struct ext4_ext_path *ext4_ext_insert_extent(
handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *newext, int gb_flags);
extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path **,
struct ext4_ext_path *,
int flags);
extern void ext4_free_ext_path(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
......@@ -3853,6 +3855,9 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
return buffer_uptodate(bh);
}
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
loff_t pos, unsigned len,
get_block_t *get_block);
#endif /* __KERNEL__ */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
......
This diff is collapsed.
This diff is collapsed.
......@@ -42,6 +42,10 @@ enum {
#define ES_SHIFT (sizeof(ext4_fsblk_t)*8 - ES_FLAGS)
#define ES_MASK (~((ext4_fsblk_t)0) << ES_SHIFT)
/*
* Besides EXTENT_STATUS_REFERENCED, all these extent type masks
* are exclusive, only one type can be set at a time.
*/
#define EXTENT_STATUS_WRITTEN (1 << ES_WRITTEN_B)
#define EXTENT_STATUS_UNWRITTEN (1 << ES_UNWRITTEN_B)
#define EXTENT_STATUS_DELAYED (1 << ES_DELAYED_B)
......@@ -51,7 +55,9 @@ enum {
#define ES_TYPE_MASK ((ext4_fsblk_t)(EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE) << ES_SHIFT)
EXTENT_STATUS_HOLE))
#define ES_TYPE_VALID(type) ((type) && !((type) & ((type) - 1)))
struct ext4_sb_info;
struct ext4_extent;
......@@ -129,7 +135,7 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status);
unsigned int status, int flags);
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status);
......@@ -156,7 +162,7 @@ static inline unsigned int ext4_es_status(struct extent_status *es)
static inline unsigned int ext4_es_type(struct extent_status *es)
{
return (es->es_pblk & ES_TYPE_MASK) >> ES_SHIFT;
return (es->es_pblk >> ES_SHIFT) & ES_TYPE_MASK;
}
static inline int ext4_es_is_written(struct extent_status *es)
......@@ -184,11 +190,6 @@ static inline int ext4_es_is_mapped(struct extent_status *es)
return (ext4_es_is_written(es) || ext4_es_is_unwritten(es));
}
static inline int ext4_es_is_delonly(struct extent_status *es)
{
return (ext4_es_is_delayed(es) && !ext4_es_is_unwritten(es));
}
static inline void ext4_es_set_referenced(struct extent_status *es)
{
es->es_pblk |= ((ext4_fsblk_t)EXTENT_STATUS_REFERENCED) << ES_SHIFT;
......@@ -224,17 +225,12 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
es->es_pblk = block;
}
static inline void ext4_es_store_status(struct extent_status *es,
unsigned int status)
{
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
(es->es_pblk & ~ES_MASK);
}
static inline void ext4_es_store_pblock_status(struct extent_status *es,
ext4_fsblk_t pb,
unsigned int status)
{
WARN_ON_ONCE(!ES_TYPE_VALID(status & ES_TYPE_MASK));
es->es_pblk = (((ext4_fsblk_t)status << ES_SHIFT) & ES_MASK) |
(pb & ~ES_MASK);
}
......@@ -252,8 +248,6 @@ extern bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk);
extern void ext4_es_insert_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, bool lclu_allocated,
bool end_allocated);
extern unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern void ext4_clear_inode_es(struct inode *inode);
#endif /* _EXT4_EXTENTS_STATUS_H */
......@@ -339,22 +339,29 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
tid_t tid;
bool has_transaction = true;
bool is_ineligible;
if (ext4_fc_disabled(sb))
return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (handle && !IS_ERR(handle))
tid = handle->h_transaction->t_tid;
else {
read_lock(&sbi->s_journal->j_state_lock);
tid = sbi->s_journal->j_running_transaction ?
sbi->s_journal->j_running_transaction->t_tid : 0;
if (sbi->s_journal->j_running_transaction)
tid = sbi->s_journal->j_running_transaction->t_tid;
else
has_transaction = false;
read_unlock(&sbi->s_journal->j_state_lock);
}
spin_lock(&sbi->s_fc_lock);
if (tid_gt(tid, sbi->s_fc_ineligible_tid))
is_ineligible = ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (has_transaction &&
(!is_ineligible ||
(is_ineligible && tid_gt(tid, sbi->s_fc_ineligible_tid))))
sbi->s_fc_ineligible_tid = tid;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
......@@ -1288,8 +1295,21 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
if (tid_geq(tid, iter->i_sync_tid))
if (tid_geq(tid, iter->i_sync_tid)) {
ext4_fc_reset_inode(&iter->vfs_inode);
} else if (full) {
/*
* We are called after a full commit, inode has been
* modified while the commit was running. Re-enqueue
* the inode into STAGING, which will then be splice
* back into MAIN. This cannot happen during
* fastcommit because the journal is locked all the
* time in that case (and tid doesn't increase so
* tid check above isn't reliable).
*/
list_add_tail(&EXT4_I(&iter->vfs_inode)->i_fc_list,
&sbi->s_fc_q[FC_Q_STAGING]);
}
/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
smp_mb();
#if (BITS_PER_LONG < 64)
......@@ -1772,7 +1792,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ret == 0) {
/* Range is not mapped */
path = ext4_find_extent(inode, cur, NULL, 0);
path = ext4_find_extent(inode, cur, path, 0);
if (IS_ERR(path))
goto out;
memset(&newex, 0, sizeof(newex));
......@@ -1783,11 +1803,10 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
if (ext4_ext_is_unwritten(ex))
ext4_ext_mark_unwritten(&newex);
down_write(&EXT4_I(inode)->i_data_sem);
ret = ext4_ext_insert_extent(
NULL, inode, &path, &newex, 0);
path = ext4_ext_insert_extent(NULL, inode,
path, &newex, 0);
up_write((&EXT4_I(inode)->i_data_sem));
ext4_free_ext_path(path);
if (ret)
if (IS_ERR(path))
goto out;
goto next;
}
......@@ -1836,6 +1855,7 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
sb->s_blocksize_bits);
out:
ext4_free_ext_path(path);
iput(inode);
return 0;
}
......@@ -1936,12 +1956,13 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
break;
if (ret > 0) {
path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
path = ext4_find_extent(inode, map.m_lblk, path, 0);
if (!IS_ERR(path)) {
for (j = 0; j < path->p_depth; j++)
ext4_mb_mark_bb(inode->i_sb,
path[j].p_block, 1, true);
ext4_free_ext_path(path);
} else {
path = NULL;
}
cur += ret;
ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
......@@ -1952,6 +1973,8 @@ static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
}
iput(inode);
}
ext4_free_ext_path(path);
}
/*
......
......@@ -306,7 +306,7 @@ static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
}
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
ssize_t count)
ssize_t written, ssize_t count)
{
handle_t *handle;
......@@ -315,7 +315,7 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
if (IS_ERR(handle))
return PTR_ERR(handle);
if (ext4_update_inode_size(inode, offset + count)) {
if (ext4_update_inode_size(inode, offset + written)) {
int ret = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret)) {
ext4_journal_stop(handle);
......@@ -323,21 +323,21 @@ static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
}
}
if (inode->i_nlink)
if ((written == count) && inode->i_nlink)
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
return count;
return written;
}
/*
* Clean up the inode after DIO or DAX extending write has completed and the
* inode size has been updated using ext4_handle_inode_extension().
*/
static void ext4_inode_extension_cleanup(struct inode *inode, ssize_t count)
static void ext4_inode_extension_cleanup(struct inode *inode, bool need_trunc)
{
lockdep_assert_held_write(&inode->i_rwsem);
if (count < 0) {
if (need_trunc) {
ext4_truncate_failed_write(inode);
/*
* If the truncate operation failed early, then the inode may
......@@ -393,7 +393,7 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
if (pos + size <= READ_ONCE(EXT4_I(inode)->i_disksize) &&
pos + size <= i_size_read(inode))
return size;
return ext4_handle_inode_extension(inode, pos, size);
return ext4_handle_inode_extension(inode, pos, size, size);
}
static const struct iomap_dio_ops ext4_dio_write_ops = {
......@@ -586,7 +586,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
* writeback of delalloc blocks.
*/
WARN_ON_ONCE(ret == -EIOCBQUEUED);
ext4_inode_extension_cleanup(inode, ret);
ext4_inode_extension_cleanup(inode, ret < 0);
}
out:
......@@ -669,8 +669,8 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
if (extend) {
ret = ext4_handle_inode_extension(inode, offset, ret);
ext4_inode_extension_cleanup(inode, ret);
ret = ext4_handle_inode_extension(inode, offset, ret, count);
ext4_inode_extension_cleanup(inode, ret < (ssize_t)count);
}
out:
inode_unlock(inode);
......
......@@ -87,10 +87,10 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
return 0;
grp = ext4_get_group_info(sb, block_group);
if (buffer_verified(bh))
return 0;
grp = ext4_get_group_info(sb, block_group);
if (!grp || EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
return -EFSCORRUPTED;
......@@ -98,8 +98,7 @@ static int ext4_validate_inode_bitmap(struct super_block *sb,
if (buffer_verified(bh))
goto verified;
blk = ext4_inode_bitmap(sb, desc);
if (!ext4_inode_bitmap_csum_verify(sb, desc, bh,
EXT4_INODES_PER_GROUP(sb) / 8) ||
if (!ext4_inode_bitmap_csum_verify(sb, desc, bh) ||
ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
......@@ -327,8 +326,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
if (percpu_counter_initialized(&sbi->s_dirs_counter))
percpu_counter_dec(&sbi->s_dirs_counter);
}
ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_inode_bitmap_csum_set(sb, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
......@@ -514,6 +512,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
if (min_inodes < 1)
min_inodes = 1;
min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
if (min_clusters < 0)
min_clusters = 0;
/*
* Start looking in the flex group where we last allocated an
......@@ -755,10 +755,10 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
struct ext4_group_desc *gdp;
ext4_group_t group;
int bit;
int err = -EFSCORRUPTED;
int err;
if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
goto out;
return -EFSCORRUPTED;
group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
......@@ -772,7 +772,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp || !group_desc_bh) {
if (!gdp) {
err = -EINVAL;
goto out;
}
......@@ -851,8 +851,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1);
if (ext4_has_group_desc_csum(sb)) {
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
......@@ -860,6 +859,7 @@ int ext4_mark_inode_used(struct super_block *sb, int ino)
err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh);
sync_dirty_buffer(group_desc_bh);
out:
brelse(inode_bitmap_bh);
return err;
}
......@@ -1053,14 +1053,14 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
/* Skip groups with suspicious inode tables */
if (((!(sbi->s_mount_state & EXT4_FC_REPLAY))
&& EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) ||
IS_ERR(inode_bitmap_bh)) {
if (IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
goto next_group;
}
if (!(sbi->s_mount_state & EXT4_FC_REPLAY) &&
EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
goto next_group;
repeat_in_this_group:
ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
if (!ret2)
goto next_group;
......@@ -1110,8 +1110,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
if (!ret2)
goto got; /* we grabbed the inode! */
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
next_group:
if (++group == ngroups)
group = 0;
......@@ -1224,8 +1222,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
}
}
if (ext4_has_group_desc_csum(sb)) {
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_inode_bitmap_csum_set(sb, gdp, inode_bitmap_bh);
ext4_group_desc_csum_set(sb, group, gdp);
}
ext4_unlock_group(sb, group);
......
......@@ -652,13 +652,6 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
ext4_update_inode_fsync_trans(handle, inode, 1);
count = ar.len;
/*
* Update reserved blocks/metadata blocks after successful block
* allocation which had been deferred till now.
*/
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ext4_da_update_reserve_space(inode, count, 1);
got_it:
map->m_flags |= EXT4_MAP_MAPPED;
map->m_pblk = le32_to_cpu(chain[depth-1].key);
......
......@@ -601,10 +601,11 @@ static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
goto out;
if (ext4_should_dioread_nolock(inode)) {
ret = __block_write_begin(folio, from, to,
ext4_get_block_unwritten);
ret = ext4_block_write_begin(handle, folio, from, to,
ext4_get_block_unwritten);
} else
ret = __block_write_begin(folio, from, to, ext4_get_block);
ret = ext4_block_write_begin(handle, folio, from, to,
ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
ret = ext4_walk_page_buffers(handle, inode,
......@@ -856,8 +857,8 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
goto out;
}
ret = __block_write_begin(folio, 0, inline_size,
ext4_da_get_block_prep);
ret = ext4_block_write_begin(NULL, folio, 0, inline_size,
ext4_da_get_block_prep);
if (ret) {
up_read(&EXT4_I(inode)->xattr_sem);
folio_unlock(folio);
......@@ -1665,24 +1666,36 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
struct ext4_dir_entry_2 **res_dir,
int *has_inline_data)
{
struct ext4_xattr_ibody_find is = {
.s = { .not_found = -ENODATA, },
};
struct ext4_xattr_info i = {
.name_index = EXT4_XATTR_INDEX_SYSTEM,
.name = EXT4_XATTR_SYSTEM_DATA,
};
int ret;
struct ext4_iloc iloc;
void *inline_start;
int inline_size;
if (ext4_get_inode_loc(dir, &iloc))
return NULL;
ret = ext4_get_inode_loc(dir, &is.iloc);
if (ret)
return ERR_PTR(ret);
down_read(&EXT4_I(dir)->xattr_sem);
ret = ext4_xattr_ibody_find(dir, &i, &is);
if (ret)
goto out;
if (!ext4_has_inline_data(dir)) {
*has_inline_data = 0;
goto out;
}
inline_start = (void *)ext4_raw_inode(&iloc)->i_block +
inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block +
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
......@@ -1692,20 +1705,23 @@ struct buffer_head *ext4_find_inline_entry(struct inode *dir,
if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE)
goto out;
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc);
inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE;
ret = ext4_search_dir(iloc.bh, inline_start, inline_size,
ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size,
dir, fname, 0, res_dir);
if (ret == 1)
goto out_find;
out:
brelse(iloc.bh);
iloc.bh = NULL;
brelse(is.iloc.bh);
if (ret < 0)
is.iloc.bh = ERR_PTR(ret);
else
is.iloc.bh = NULL;
out_find:
up_read(&EXT4_I(dir)->xattr_sem);
return iloc.bh;
return is.iloc.bh;
}
int ext4_delete_inline_entry(handle_t *handle,
......
This diff is collapsed.
......@@ -2356,7 +2356,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ex.fe_logical = 0xDEADFA11; /* debug value */
if (max >= ac->ac_g_ex.fe_len &&
ac->ac_g_ex.fe_len == EXT4_B2C(sbi, sbi->s_stripe)) {
ac->ac_g_ex.fe_len == EXT4_NUM_B2C(sbi, sbi->s_stripe)) {
ext4_fsblk_t start;
start = ext4_grp_offs_to_block(ac->ac_sb, &ex);
......@@ -2553,7 +2553,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
do_div(a, sbi->s_stripe);
i = (a * sbi->s_stripe) - first_group_block;
stripe = EXT4_B2C(sbi, sbi->s_stripe);
stripe = EXT4_NUM_B2C(sbi, sbi->s_stripe);
i = EXT4_B2C(sbi, i);
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
if (!mb_test_bit(i, bitmap)) {
......@@ -2928,9 +2928,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
if (cr == CR_POWER2_ALIGNED)
ext4_mb_simple_scan_group(ac, &e4b);
else {
bool is_stripe_aligned = sbi->s_stripe &&
bool is_stripe_aligned =
(sbi->s_stripe >=
sbi->s_cluster_ratio) &&
!(ac->ac_g_ex.fe_len %
EXT4_B2C(sbi, sbi->s_stripe));
EXT4_NUM_B2C(sbi, sbi->s_stripe));
if ((cr == CR_GOAL_LEN_FAST ||
cr == CR_BEST_AVAIL_LEN) &&
......@@ -3075,8 +3077,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
seq_puts(seq, " ]");
if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
seq_puts(seq, " Block bitmap corrupted!");
seq_puts(seq, "\n");
seq_putc(seq, '\n');
return 0;
}
......@@ -3707,7 +3708,7 @@ int ext4_mb_init(struct super_block *sb)
*/
if (sbi->s_stripe > 1) {
sbi->s_mb_group_prealloc = roundup(
sbi->s_mb_group_prealloc, EXT4_B2C(sbi, sbi->s_stripe));
sbi->s_mb_group_prealloc, EXT4_NUM_B2C(sbi, sbi->s_stripe));
}
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
......@@ -3887,11 +3888,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
/*
* Clear the trimmed flag for the group so that the next
* ext4_trim_fs can trim it.
* If the volume is mounted with -o discard, online discard
* is supported and the free blocks will be trimmed online.
*/
if (!test_opt(sb, DISCARD))
EXT4_MB_GRP_CLEAR_TRIMMED(db);
EXT4_MB_GRP_CLEAR_TRIMMED(db);
if (!db->bb_free_root.rb_node) {
/* No more items in the per group rb tree
......@@ -6515,8 +6513,9 @@ static void ext4_mb_clear_bb(handle_t *handle, struct inode *inode,
" group:%u block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
} else
EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
}
EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_free_blocks(inode, &e4b, bit, count_clusters);
......
......@@ -37,7 +37,6 @@ static int finish_range(handle_t *handle, struct inode *inode,
path = ext4_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
path = NULL;
goto err_out;
}
......@@ -53,7 +52,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
if (retval < 0)
goto err_out;
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
path = ext4_ext_insert_extent(handle, inode, path, &newext, 0);
if (IS_ERR(path))
retval = PTR_ERR(path);
err_out:
up_write((&EXT4_I(inode)->i_data_sem));
ext4_free_ext_path(path);
......@@ -663,8 +664,8 @@ int ext4_ind_migrate(struct inode *inode)
if (unlikely(ret2 && !ret))
ret = ret2;
errout:
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle);
out_unlock:
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
return ret;
......
......@@ -17,27 +17,23 @@
* get_ext_path() - Find an extent path for designated logical block number.
* @inode: inode to be searched
* @lblock: logical block number to find an extent path
* @ppath: pointer to an extent path pointer (for output)
* @path: pointer to an extent path
*
* ext4_find_extent wrapper. Return 0 on success, or a negative error value
* on failure.
* ext4_find_extent wrapper. Return an extent path pointer on success,
* or an error pointer on failure.
*/
static inline int
static inline struct ext4_ext_path *
get_ext_path(struct inode *inode, ext4_lblk_t lblock,
struct ext4_ext_path **ppath)
struct ext4_ext_path *path)
{
struct ext4_ext_path *path;
path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
path = ext4_find_extent(inode, lblock, path, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
return path;
if (path[ext_depth(inode)].p_ext == NULL) {
ext4_free_ext_path(path);
*ppath = NULL;
return -ENODATA;
return ERR_PTR(-ENODATA);
}
*ppath = path;
return 0;
return path;
}
/**
......@@ -95,9 +91,11 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
int ret = 0;
ext4_lblk_t last = from + count;
while (from < last) {
*err = get_ext_path(inode, from, &path);
if (*err)
goto out;
path = get_ext_path(inode, from, path);
if (IS_ERR(path)) {
*err = PTR_ERR(path);
return ret;
}
ext = path[ext_depth(inode)].p_ext;
if (unwritten != ext4_ext_is_unwritten(ext))
goto out;
......@@ -166,15 +164,16 @@ mext_folio_double_lock(struct inode *inode1, struct inode *inode2,
return 0;
}
/* Force page buffers uptodate w/o dropping page's lock */
static int
mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
/* Force folio buffers uptodate w/o dropping folio's lock */
static int mext_page_mkuptodate(struct folio *folio, size_t from, size_t to)
{
struct inode *inode = folio->mapping->host;
sector_t block;
struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
struct buffer_head *bh, *head;
unsigned int blocksize, block_start, block_end;
int i, err, nr = 0, partial = 0;
int nr = 0;
bool partial = false;
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
......@@ -186,19 +185,21 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
if (!head)
head = create_empty_buffers(folio, blocksize, 0);
block = (sector_t)folio->index << (PAGE_SHIFT - inode->i_blkbits);
for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) {
block = folio_pos(folio) >> inode->i_blkbits;
block_end = 0;
bh = head;
do {
block_start = block_end;
block_end = block_start + blocksize;
if (block_end <= from || block_start >= to) {
if (!buffer_uptodate(bh))
partial = 1;
partial = true;
continue;
}
if (buffer_uptodate(bh))
continue;
if (!buffer_mapped(bh)) {
err = ext4_get_block(inode, block, bh, 0);
int err = ext4_get_block(inode, block, bh, 0);
if (err)
return err;
if (!buffer_mapped(bh)) {
......@@ -207,21 +208,30 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
}
}
BUG_ON(nr >= MAX_BUF_PER_PAGE);
arr[nr++] = bh;
}
lock_buffer(bh);
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
continue;
}
ext4_read_bh_nowait(bh, 0, NULL);
nr++;
} while (block++, (bh = bh->b_this_page) != head);
/* No io required */
if (!nr)
goto out;
for (i = 0; i < nr; i++) {
bh = arr[i];
if (!bh_uptodate_or_lock(bh)) {
err = ext4_read_bh(bh, 0, NULL);
if (err)
return err;
}
}
bh = head;
do {
if (bh_offset(bh) + blocksize <= from)
continue;
if (bh_offset(bh) > to)
break;
wait_on_buffer(bh);
if (buffer_uptodate(bh))
continue;
return -EIO;
} while ((bh = bh->b_this_page) != head);
out:
if (!partial)
folio_mark_uptodate(folio);
......@@ -624,9 +634,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
int offset_in_page;
int unwritten, cur_len;
ret = get_ext_path(orig_inode, o_start, &path);
if (ret)
path = get_ext_path(orig_inode, o_start, path);
if (IS_ERR(path)) {
ret = PTR_ERR(path);
goto out;
}
ex = path[path->p_depth].p_ext;
cur_blk = le32_to_cpu(ex->ee_block);
cur_len = ext4_ext_get_actual_len(ex);
......
......@@ -1482,7 +1482,7 @@ static bool ext4_match(struct inode *parent,
}
/*
* Returns 0 if not found, -1 on failure, and 1 on success
* Returns 0 if not found, -EFSCORRUPTED on failure, and 1 on success
*/
int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct inode *dir, struct ext4_filename *fname,
......@@ -1503,7 +1503,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
buf_size, offset))
return -1;
return -EFSCORRUPTED;
*res_dir = de;
return 1;
}
......@@ -1511,7 +1511,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
if (de_len <= 0)
return -1;
return -EFSCORRUPTED;
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
......@@ -1574,7 +1574,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
&has_inline_data);
if (inlined)
*inlined = has_inline_data;
if (has_inline_data)
if (has_inline_data || IS_ERR(ret))
goto cleanup_and_exit;
}
......@@ -1663,8 +1663,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
goto cleanup_and_exit;
} else {
brelse(bh);
if (i < 0)
if (i < 0) {
ret = ERR_PTR(i);
goto cleanup_and_exit;
}
}
next:
if (++block >= nblocks)
......@@ -1758,7 +1760,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
if (retval == 1)
goto success;
brelse(bh);
if (retval == -1) {
if (retval < 0) {
bh = ERR_PTR(ERR_BAD_DX_DIR);
goto errout;
}
......@@ -1999,7 +2001,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
split = count/2;
hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash;
continued = split > 0 ? hash2 == map[split - 1].hash : 0;
dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
(unsigned long)dx_get_block(frame->at),
hash2, split, count-split));
......
......@@ -221,7 +221,7 @@ int ext4_mpage_readpages(struct inode *inode,
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
sector_t first_block;
unsigned page_block;
struct block_device *bdev = inode->i_sb->s_bdev;
int length;
......@@ -263,6 +263,7 @@ int ext4_mpage_readpages(struct inode *inode,
unsigned map_offset = block_in_file - map.m_lblk;
unsigned last = map.m_len - map_offset;
first_block = map.m_pblk + map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
/* needed? */
......@@ -271,8 +272,6 @@ int ext4_mpage_readpages(struct inode *inode,
}
if (page_block == blocks_per_page)
break;
blocks[page_block] = map.m_pblk + map_offset +
relative_block;
page_block++;
block_in_file++;
}
......@@ -307,7 +306,9 @@ int ext4_mpage_readpages(struct inode *inode,
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
if (page_block && blocks[page_block-1] != map.m_pblk-1)
if (!page_block)
first_block = map.m_pblk;
else if (first_block + page_block != map.m_pblk)
goto confused;
for (relative_block = 0; ; relative_block++) {
if (relative_block == map.m_len) {
......@@ -316,7 +317,6 @@ int ext4_mpage_readpages(struct inode *inode,
break;
} else if (page_block == blocks_per_page)
break;
blocks[page_block] = map.m_pblk+relative_block;
page_block++;
block_in_file++;
}
......@@ -339,7 +339,7 @@ int ext4_mpage_readpages(struct inode *inode,
* This folio will go to BIO. Do we need to send this
* BIO off first?
*/
if (bio && (last_block_in_bio != blocks[0] - 1 ||
if (bio && (last_block_in_bio != first_block - 1 ||
!fscrypt_mergeable_bio(bio, inode, next_block))) {
submit_and_realloc:
submit_bio(bio);
......@@ -355,7 +355,7 @@ int ext4_mpage_readpages(struct inode *inode,
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, folio->index);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_iter.bi_sector = first_block << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
if (rac)
bio->bi_opf |= REQ_RAHEAD;
......@@ -371,7 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
submit_bio(bio);
bio = NULL;
} else
last_block_in_bio = blocks[blocks_per_page - 1];
last_block_in_bio = first_block + blocks_per_page - 1;
continue;
confused:
if (bio) {
......
......@@ -1319,8 +1319,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
if (!bh)
return -EIO;
ext4_inode_bitmap_csum_set(sb, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8);
ext4_inode_bitmap_csum_set(sb, gdp, bh);
brelse(bh);
bh = ext4_get_bitmap(sb, group_data->block_bitmap);
......
......@@ -735,11 +735,12 @@ static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible before
* ->s_flags update
* EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
* modifications. We don't set SB_RDONLY because that requires
* sb->s_umount semaphore and setting it without proper remount
* procedure is confusing code such as freeze_super() leading to
* deadlocks and other problems.
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
}
static void update_super_work(struct work_struct *work)
......@@ -3045,7 +3046,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
rc = _ext4_show_options(seq, sb, 1);
seq_puts(seq, "\n");
seq_putc(seq, '\n');
return rc;
}
......@@ -5087,16 +5088,27 @@ static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
return ret;
}
static void ext4_hash_info_init(struct super_block *sb)
static int ext4_hash_info_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
unsigned int i;
sbi->s_def_hash_version = es->s_def_hash_version;
if (sbi->s_def_hash_version > DX_HASH_LAST) {
ext4_msg(sb, KERN_ERR,
"Invalid default hash set in the superblock");
return -EINVAL;
} else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
ext4_msg(sb, KERN_ERR,
"SIPHASH is not a valid default hash value");
return -EINVAL;
}
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
if (ext4_has_feature_dir_index(sb)) {
i = le32_to_cpu(es->s_flags);
if (i & EXT2_FLAGS_UNSIGNED_HASH)
......@@ -5114,6 +5126,7 @@ static void ext4_hash_info_init(struct super_block *sb)
#endif
}
}
return 0;
}
static int ext4_block_group_meta_init(struct super_block *sb, int silent)
......@@ -5165,6 +5178,18 @@ static int ext4_block_group_meta_init(struct super_block *sb, int silent)
return 0;
}
/*
* It's hard to get stripe aligned blocks if stripe is not aligned with
* cluster, just disable stripe and alert user to simplify code and avoid
* stripe aligned allocation which will rarely succeed.
*/
static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
stripe % sbi->s_cluster_ratio != 0);
}
static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
{
struct ext4_super_block *es = NULL;
......@@ -5249,7 +5274,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount;
ext4_hash_info_init(sb);
err = ext4_hash_info_init(sb);
if (err)
goto failed_mount;
err = ext4_handle_clustersize(sb);
if (err)
......@@ -5272,13 +5299,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount3;
sbi->s_stripe = ext4_get_stripe_size(sbi);
/*
* It's hard to get stripe aligned blocks if stripe is not aligned with
* cluster, just disable stripe and alert user to simpfy code and avoid
* stripe aligned allocation which will rarely successes.
*/
if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
sbi->s_stripe % sbi->s_cluster_ratio != 0) {
if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
ext4_msg(sb, KERN_WARNING,
"stripe (%lu) is not aligned with cluster size (%u), "
"stripe is disabled",
......@@ -5313,6 +5334,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
spin_lock_init(&sbi->s_bdev_wb_lock);
ext4_fast_commit_init(sb);
sb->s_root = NULL;
......@@ -5534,7 +5557,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
* Save the original bdev mapping's wb_err value which could be
* used to detect the metadata async write error.
*/
spin_lock_init(&sbi->s_bdev_wb_lock);
errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
&sbi->s_bdev_wb_err);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
......@@ -5614,8 +5636,8 @@ failed_mount8: __maybe_unused
failed_mount3:
/* flush s_sb_upd_work before sbi destroy */
flush_work(&sbi->s_sb_upd_work);
del_timer_sync(&sbi->s_err_report);
ext4_stop_mmpd(sbi);
del_timer_sync(&sbi->s_err_report);
ext4_group_desc_free(sbi);
failed_mount:
if (sbi->s_chksum_driver)
......@@ -6441,6 +6463,15 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
if ((ctx->spec & EXT4_SPEC_s_stripe) &&
ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
ext4_msg(sb, KERN_WARNING,
"stripe (%lu) is not aligned with cluster size (%u), "
"stripe is disabled",
ctx->s_stripe, sbi->s_cluster_ratio);
ctx->s_stripe = 0;
}
/*
* Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
* two calls to ext4_should_dioread_nolock() to return inconsistent
......
......@@ -458,7 +458,7 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
ext4_xattr_inode_set_ref(inode, 1);
} else {
inode_lock(inode);
inode_lock_nested(inode, I_MUTEX_XATTR);
inode->i_flags |= S_NOQUOTA;
inode_unlock(inode);
}
......@@ -1039,7 +1039,7 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
s64 ref_count;
int ret;
inode_lock(ea_inode);
inode_lock_nested(ea_inode, I_MUTEX_XATTR);
ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
if (ret)
......@@ -2879,33 +2879,31 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
if (*ea_inode_array == NULL) {
/*
* Start with 15 inodes, so it fits into a power-of-two size.
* If *ea_inode_array is NULL, this is essentially offsetof()
*/
(*ea_inode_array) =
kmalloc(offsetof(struct ext4_xattr_inode_array,
inodes[EIA_MASK]),
GFP_NOFS);
(*ea_inode_array) = kmalloc(
struct_size(*ea_inode_array, inodes, EIA_MASK),
GFP_NOFS);
if (*ea_inode_array == NULL)
return -ENOMEM;
(*ea_inode_array)->count = 0;
} else if (((*ea_inode_array)->count & EIA_MASK) == EIA_MASK) {
/* expand the array once all 15 + n * 16 slots are full */
struct ext4_xattr_inode_array *new_array = NULL;
int count = (*ea_inode_array)->count;
/* if new_array is NULL, this is essentially offsetof() */
new_array = kmalloc(
offsetof(struct ext4_xattr_inode_array,
inodes[count + EIA_INCR]),
GFP_NOFS);
struct_size(*ea_inode_array, inodes,
(*ea_inode_array)->count + EIA_INCR),
GFP_NOFS);
if (new_array == NULL)
return -ENOMEM;
memcpy(new_array, *ea_inode_array,
offsetof(struct ext4_xattr_inode_array, inodes[count]));
struct_size(*ea_inode_array, inodes,
(*ea_inode_array)->count));
kfree(*ea_inode_array);
*ea_inode_array = new_array;
}
(*ea_inode_array)->inodes[(*ea_inode_array)->count++] = inode;
(*ea_inode_array)->count++;
(*ea_inode_array)->inodes[(*ea_inode_array)->count - 1] = inode;
return 0;
}
......@@ -3036,8 +3034,6 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
*
* Create a new entry in the extended attribute block cache, and insert
* it unless such an entry is already in the cache.
*
* Returns 0, or a negative error number on failure.
*/
static void
ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
......@@ -3065,8 +3061,7 @@ ext4_xattr_block_cache_insert(struct mb_cache *ea_block_cache,
*
* Compare two extended attribute blocks for equality.
*
* Returns 0 if the blocks are equal, 1 if they differ, and
* a negative error number on errors.
* Returns 0 if the blocks are equal, 1 if they differ.
*/
static int
ext4_xattr_cmp(struct ext4_xattr_header *header1,
......
......@@ -32,8 +32,7 @@ struct ext4_xattr_header {
__le32 h_refcount; /* reference count */
__le32 h_blocks; /* number of disk blocks used */
__le32 h_hash; /* hash value of all attributes */
__le32 h_checksum; /* crc32c(uuid+id+xattrblock) */
/* id = inum if refcount=1, blknum otherwise */
__le32 h_checksum; /* crc32c(uuid+blknum+xattrblock) */
__u32 h_reserved[3]; /* zero right now */
};
......@@ -130,8 +129,8 @@ struct ext4_xattr_ibody_find {
};
struct ext4_xattr_inode_array {
unsigned int count; /* # of used items in the array */
struct inode *inodes[];
unsigned int count;
struct inode *inodes[] __counted_by(count);
};
extern const struct xattr_handler ext4_xattr_user_handler;
......
......@@ -79,17 +79,23 @@ __releases(&journal->j_state_lock)
if (space_left < nblocks) {
int chkpt = journal->j_checkpoint_transactions != NULL;
tid_t tid = 0;
bool has_transaction = false;
if (journal->j_committing_transaction)
if (journal->j_committing_transaction) {
tid = journal->j_committing_transaction->t_tid;
has_transaction = true;
}
spin_unlock(&journal->j_list_lock);
write_unlock(&journal->j_state_lock);
if (chkpt) {
jbd2_log_do_checkpoint(journal);
} else if (jbd2_cleanup_journal_tail(journal) == 0) {
/* We were able to recover space; yay! */
} else if (jbd2_cleanup_journal_tail(journal) <= 0) {
/*
* We were able to recover space or the
* journal was aborted due to an error.
*/
;
} else if (tid) {
} else if (has_transaction) {
/*
* jbd2_journal_commit_transaction() may want
* to take the checkpoint_mutex if JBD2_FLUSHED
......@@ -407,6 +413,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
tid_t tid = 0;
unsigned long nr_freed = 0;
unsigned long freed;
bool first_set = false;
again:
spin_lock(&journal->j_list_lock);
......@@ -426,8 +433,10 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
else
transaction = journal->j_checkpoint_transactions;
if (!first_tid)
if (!first_set) {
first_tid = transaction->t_tid;
first_set = true;
}
last_transaction = journal->j_checkpoint_transactions->t_cpprev;
next_transaction = transaction;
last_tid = last_transaction->t_tid;
......@@ -457,7 +466,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
spin_unlock(&journal->j_list_lock);
cond_resched();
if (*nr_to_scan && next_tid)
if (*nr_to_scan && journal->j_shrink_transaction)
goto again;
out:
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
......
......@@ -281,6 +281,16 @@ static void journal_kill_thread(journal_t *journal)
write_unlock(&journal->j_state_lock);
}
static inline bool jbd2_data_needs_escaping(char *data)
{
return *((__be32 *)data) == cpu_to_be32(JBD2_MAGIC_NUMBER);
}
static inline void jbd2_data_do_escape(char *data)
{
*((unsigned int *)data) = 0;
}
/*
* jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
*
......@@ -318,9 +328,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
struct buffer_head **bh_out,
sector_t blocknr)
{
int done_copy_out = 0;
int do_escape = 0;
char *mapped_data;
struct buffer_head *new_bh;
struct folio *new_folio;
unsigned int new_offset;
......@@ -349,37 +357,33 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
* we use that version of the data for the commit.
*/
if (jh_in->b_frozen_data) {
done_copy_out = 1;
new_folio = virt_to_folio(jh_in->b_frozen_data);
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
do_escape = jbd2_data_needs_escaping(jh_in->b_frozen_data);
if (do_escape)
jbd2_data_do_escape(jh_in->b_frozen_data);
} else {
char *tmp;
char *mapped_data;
new_folio = bh_in->b_folio;
new_offset = offset_in_folio(new_folio, bh_in->b_data);
}
mapped_data = kmap_local_folio(new_folio, new_offset);
/*
* Fire data frozen trigger if data already wasn't frozen. Do this
* before checking for escaping, as the trigger may modify the magic
* offset. If a copy-out happens afterwards, it will have the correct
* data in the buffer.
*/
if (!done_copy_out)
mapped_data = kmap_local_folio(new_folio, new_offset);
/*
* Fire data frozen trigger if data already wasn't frozen. Do
* this before checking for escaping, as the trigger may modify
* the magic offset. If a copy-out happens afterwards, it will
* have the correct data in the buffer.
*/
jbd2_buffer_frozen_trigger(jh_in, mapped_data,
jh_in->b_triggers);
/*
* Check for escaping
*/
if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER))
do_escape = 1;
kunmap_local(mapped_data);
/*
* Do we need to do a data copy?
*/
if (do_escape && !done_copy_out) {
char *tmp;
do_escape = jbd2_data_needs_escaping(mapped_data);
kunmap_local(mapped_data);
/*
* Do we need to do a data copy?
*/
if (!do_escape)
goto escape_done;
spin_unlock(&jh_in->b_state_lock);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
......@@ -406,18 +410,10 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
copy_done:
new_folio = virt_to_folio(jh_in->b_frozen_data);
new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data);
done_copy_out = 1;
jbd2_data_do_escape(jh_in->b_frozen_data);
}
/*
* Did we need to do an escaping? Now we've done all the
* copying, we can finally do so.
* b_frozen_data is from jbd2_alloc() which always provides an
* address from the direct kernels mapping.
*/
if (do_escape)
*((unsigned int *)jh_in->b_frozen_data) = 0;
escape_done:
folio_set_bh(new_bh, new_folio, new_offset);
new_bh->b_size = bh_in->b_size;
new_bh->b_bdev = journal->j_dev;
......@@ -710,7 +706,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
return -EINVAL;
write_lock(&journal->j_state_lock);
if (tid <= journal->j_commit_sequence) {
if (tid_geq(journal->j_commit_sequence, tid)) {
write_unlock(&journal->j_state_lock);
return -EALREADY;
}
......@@ -740,9 +736,9 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
*/
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0, tid);
jbd2_journal_unlock_updates(journal);
write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback)
......@@ -841,17 +837,12 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
*bh_out = NULL;
if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
fc_off = journal->j_fc_off;
blocknr = journal->j_fc_first + fc_off;
journal->j_fc_off++;
} else {
ret = -EINVAL;
}
if (ret)
return ret;
if (journal->j_fc_off + journal->j_fc_first >= journal->j_fc_last)
return -EINVAL;
fc_off = journal->j_fc_off;
blocknr = journal->j_fc_first + fc_off;
journal->j_fc_off++;
ret = jbd2_journal_bmap(journal, blocknr, &pblock);
if (ret)
return ret;
......@@ -860,7 +851,6 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
if (!bh)
return -ENOMEM;
journal->j_fc_wbuf[fc_off] = bh;
*bh_out = bh;
......@@ -903,7 +893,7 @@ int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
}
EXPORT_SYMBOL(jbd2_fc_wait_bufs);
int jbd2_fc_release_bufs(journal_t *journal)
void jbd2_fc_release_bufs(journal_t *journal)
{
struct buffer_head *bh;
int i, j_fc_off;
......@@ -917,8 +907,6 @@ int jbd2_fc_release_bufs(journal_t *journal)
put_bh(bh);
journal->j_fc_wbuf[i] = NULL;
}
return 0;
}
EXPORT_SYMBOL(jbd2_fc_release_bufs);
......@@ -1944,7 +1932,7 @@ static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
if (had_fast_commit)
jbd2_set_feature_fast_commit(journal);
/* Log is no longer empty */
/* Log is empty */
write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_FLUSHED;
write_unlock(&journal->j_state_lock);
......@@ -2866,8 +2854,7 @@ static struct journal_head *journal_alloc_journal_head(void)
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
GFP_NOFS | __GFP_NOFAIL);
}
if (ret)
spin_lock_init(&ret->b_state_lock);
spin_lock_init(&ret->b_state_lock);
return ret;
}
......
......@@ -1086,7 +1086,7 @@ struct journal_s
int j_revoke_records_per_block;
/**
* @j_transaction_overhead:
* @j_transaction_overhead_buffers:
*
* Number of blocks each transaction needs for its own bookkeeping
*/
......@@ -1675,7 +1675,7 @@ int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out);
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode);
int jbd2_fc_wait_bufs(journal_t *journal, int num_blks);
int jbd2_fc_release_bufs(journal_t *journal);
void jbd2_fc_release_bufs(journal_t *journal);
/*
* is_journal_abort
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment