Commit 7991c92f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:

 - more folio conversion patches

 - add support for FS_IOC_GETFSSYSFSPATH

 - mballoc cleaups and add more kunit tests

 - sysfs cleanups and bug fixes

 - miscellaneous bug fixes and cleanups

* tag 'ext4_for_linus-6.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
  ext4: fix error pointer dereference in ext4_mb_load_buddy_gfp()
  jbd2: add prefix 'jbd2' for 'shrink_type'
  jbd2: use shrink_type type instead of bool type for __jbd2_journal_clean_checkpoint_list()
  ext4: fix uninitialized ratelimit_state->lock access in __ext4_fill_super()
  ext4: remove calls to to set/clear the folio error flag
  ext4: propagate errors from ext4_sb_bread() in ext4_xattr_block_cache_find()
  ext4: fix mb_cache_entry's e_refcnt leak in ext4_xattr_block_cache_find()
  jbd2: remove redundant assignement to variable err
  ext4: remove the redundant folio_wait_stable()
  ext4: fix potential unnitialized variable
  ext4: convert ac_buddy_page to ac_buddy_folio
  ext4: convert ac_bitmap_page to ac_bitmap_folio
  ext4: convert ext4_mb_init_cache() to take a folio
  ext4: convert bd_buddy_page to bd_buddy_folio
  ext4: convert bd_bitmap_page to bd_bitmap_folio
  ext4: open coding repeated check in next_linear_group
  ext4: use correct criteria name instead stale integer number in comment
  ext4: call ext4_mb_mark_free_simple to free continuous bits in found chunk
  ext4: add test_mb_mark_used_cost to estimate cost of mb_mark_used
  ext4: keep "prefetch_grp" and "nr" consistent
  ...
parents 61ea647e c6a6c969
......@@ -68,11 +68,6 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
/* usually, the umask is applied by posix_acl_create(), but if
ext4 ACL support is disabled at compile time, we need to do
it here, because posix_acl_create() will never be called */
inode->i_mode &= ~current_umask();
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
......
......@@ -213,11 +213,14 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
/* Large fragment size list lookup succeeded at least once for cr = 0 */
/* Large fragment size list lookup succeeded at least once for
* CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
/* Avg fragment size rb tree lookup succeeded at least once for
* CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request {
......
......@@ -3402,9 +3402,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
int allocated = 0, max_zeroout = 0;
int err = 0;
int split_flag = EXT4_EXT_DATA_VALID2;
int allocated = 0;
unsigned int max_zeroout = 0;
ext_debug(inode, "logical block %llu, max_blocks %u\n",
(unsigned long long)map->m_lblk, map_len);
......
......@@ -844,8 +844,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (err)
goto out_journal;
lock_buffer(sbi->s_sbh);
strncpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted));
strtomem_pad(sbi->s_es->s_last_mounted, cp, 0);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
......@@ -885,7 +884,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
......
......@@ -1865,7 +1865,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
len = folio_size(folio);
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(mpd->inode))
len = size & ~PAGE_MASK;
len = size & (len - 1);
err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
if (!err)
mpd->wbc->nr_to_write--;
......@@ -2334,7 +2334,7 @@ static int mpage_journal_page_buffers(handle_t *handle,
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(inode))
len = size - folio_pos(folio);
len = size & (len - 1);
return ext4_journal_folio_buffers(handle, folio, len);
}
......@@ -2887,9 +2887,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
if (IS_ERR(folio))
return PTR_ERR(folio);
/* In case writeback began while the folio was unlocked */
folio_wait_stable(folio);
#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#else
......@@ -3530,7 +3527,6 @@ static const struct address_space_operations ext4_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
......@@ -3547,7 +3543,6 @@ static const struct address_space_operations ext4_journalled_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
......@@ -3564,7 +3559,6 @@ static const struct address_space_operations ext4_da_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
.direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
......@@ -3573,7 +3567,6 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
.direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
.bmap = ext4_bmap,
.swap_activate = ext4_iomap_swap_activate,
......
......@@ -1150,9 +1150,8 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
*/
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
memset(label, 0, sizeof(label));
lock_buffer(sbi->s_sbh);
strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
strscpy_pad(label, sbi->s_es->s_volume_name);
unlock_buffer(sbi->s_sbh);
if (copy_to_user(user_label, label, sizeof(label)))
......
......@@ -30,7 +30,31 @@ struct mbt_ext4_super_block {
#define MBT_CTX(_sb) (&MBT_SB(_sb)->mbt_ctx)
#define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group])
static struct inode *mbt_alloc_inode(struct super_block *sb)
{
struct ext4_inode_info *ei;
ei = kmalloc(sizeof(struct ext4_inode_info), GFP_KERNEL);
if (!ei)
return NULL;
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
return &ei->vfs_inode;
}
static void mbt_free_inode(struct inode *inode)
{
kfree(EXT4_I(inode));
}
static const struct super_operations mbt_sops = {
.alloc_inode = mbt_alloc_inode,
.free_inode = mbt_free_inode,
};
static void mbt_kill_sb(struct super_block *sb)
......@@ -859,6 +883,56 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_mb_unload_buddy(&e4b);
}
#define COUNT_FOR_ESTIMATE 100000
static void test_mb_mark_used_cost(struct kunit *test)
{
struct ext4_buddy e4b;
struct super_block *sb = (struct super_block *)test->priv;
struct ext4_free_extent ex;
int ret;
struct test_range ranges[TEST_RANGE_COUNT];
int i, j;
unsigned long start, end, all = 0;
/* buddy cache assumes that each page contains at least one block */
if (sb->s_blocksize > PAGE_SIZE)
kunit_skip(test, "blocksize exceeds pagesize");
ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_group = TEST_GOAL_GROUP;
for (j = 0; j < COUNT_FOR_ESTIMATE; j++) {
mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
start = jiffies;
for (i = 0; i < TEST_RANGE_COUNT; i++) {
if (ranges[i].len == 0)
continue;
ex.fe_start = ranges[i].start;
ex.fe_len = ranges[i].len;
ext4_lock_group(sb, TEST_GOAL_GROUP);
mb_mark_used(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
end = jiffies;
all += (end - start);
for (i = 0; i < TEST_RANGE_COUNT; i++) {
if (ranges[i].len == 0)
continue;
ext4_lock_group(sb, TEST_GOAL_GROUP);
mb_free_blocks(NULL, &e4b, ranges[i].start,
ranges[i].len);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
}
kunit_info(test, "costed jiffies %lu\n", all);
ext4_mb_unload_buddy(&e4b);
}
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
{
.blocksize_bits = 10,
......@@ -901,6 +975,8 @@ static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
{ .speed = KUNIT_SPEED_SLOW }),
{}
};
......
This diff is collapsed.
......@@ -187,14 +187,14 @@ struct ext4_allocation_context {
struct ext4_free_extent ac_f_ex;
/*
* goal len can change in CR1.5, so save the original len. This is
* used while adjusting the PA window and for accounting.
* goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
* This is used while adjusting the PA window and for accounting.
*/
ext4_grpblk_t ac_orig_goal_len;
__u32 ac_flags; /* allocation hints */
__u32 ac_groups_linear_remaining;
__u16 ac_groups_scanned;
__u16 ac_groups_linear_remaining;
__u16 ac_found;
__u16 ac_cX_found[EXT4_MB_NUM_CRS];
__u16 ac_tail;
......@@ -204,8 +204,8 @@ struct ext4_allocation_context {
__u8 ac_2order; /* if request is to allocate 2^N blocks and
* N > 0, the field stores N, otherwise 0 */
__u8 ac_op; /* operation, for history only */
struct page *ac_bitmap_page;
struct page *ac_buddy_page;
struct folio *ac_bitmap_folio;
struct folio *ac_buddy_folio;
struct ext4_prealloc_space *ac_pa;
struct ext4_locality_group *ac_lg;
};
......@@ -215,9 +215,9 @@ struct ext4_allocation_context {
#define AC_STATUS_BREAK 3
struct ext4_buddy {
struct page *bd_buddy_page;
struct folio *bd_buddy_folio;
void *bd_buddy;
struct page *bd_bitmap_page;
struct folio *bd_bitmap_folio;
void *bd_bitmap;
struct ext4_group_info *bd_info;
struct super_block *bd_sb;
......
......@@ -199,10 +199,8 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
if (!buffer_mapped(bh)) {
err = ext4_get_block(inode, block, bh, 0);
if (err) {
folio_set_error(folio);
if (err)
return err;
}
if (!buffer_mapped(bh)) {
folio_zero_range(folio, block_start, blocksize);
set_buffer_uptodate(bh);
......
......@@ -2897,7 +2897,7 @@ static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
inode = ext4_new_inode_start_handle(idmap, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) +
4 + EXT4_XATTR_TRANS_BLOCKS);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
......
......@@ -117,7 +117,6 @@ static void ext4_finish_bio(struct bio *bio)
if (bio->bi_status) {
int err = blk_status_to_errno(bio->bi_status);
folio_set_error(folio);
mapping_set_error(folio->mapping, err);
}
bh = head = folio_buffers(folio);
......@@ -441,8 +440,6 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
folio_clear_error(folio);
/*
* Comments copied from block_write_full_folio:
*
......
......@@ -289,7 +289,6 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
set_error_page:
folio_set_error(folio);
folio_zero_segment(folio, 0,
folio_size(folio));
folio_unlock(folio);
......
......@@ -2074,8 +2074,7 @@ static int unnote_qf_name(struct fs_context *fc, int qtype)
{
struct ext4_fs_context *ctx = fc->fs_private;
if (ctx->s_qf_names[qtype])
kfree(ctx->s_qf_names[qtype]);
kfree(ctx->s_qf_names[qtype]);
ctx->s_qf_names[qtype] = NULL;
ctx->qname_spec |= 1 << qtype;
......@@ -2480,8 +2479,7 @@ static int parse_options(struct fs_context *fc, char *options)
param.size = v_len;
ret = ext4_parse_param(fc, &param);
if (param.string)
kfree(param.string);
kfree(param.string);
if (ret < 0)
return ret;
}
......@@ -5338,6 +5336,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
super_set_sysfs_name_bdev(sb);
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
......@@ -5547,19 +5546,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount6;
err = ext4_register_sysfs(sb);
if (err)
goto failed_mount7;
err = ext4_init_orphan_info(sb);
if (err)
goto failed_mount8;
goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount9;
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
......@@ -5585,7 +5580,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
goto failed_mount10;
goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
......@@ -5602,15 +5597,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
atomic_set(&sbi->s_warning_count, 0);
atomic_set(&sbi->s_msg_count, 0);
/* Register sysfs after all initializations are complete. */
err = ext4_register_sysfs(sb);
if (err)
goto failed_mount9;
return 0;
failed_mount10:
failed_mount9:
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
failed_mount9: __maybe_unused
failed_mount8: __maybe_unused
ext4_release_orphan_info(sb);
failed_mount8:
ext4_unregister_sysfs(sb);
kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
......@@ -6126,8 +6123,8 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_first_error_time,
&es->s_first_error_time_hi,
sbi->s_first_error_time);
strncpy(es->s_first_error_func, sbi->s_first_error_func,
sizeof(es->s_first_error_func));
strtomem_pad(es->s_first_error_func,
sbi->s_first_error_func, 0);
es->s_first_error_line =
cpu_to_le32(sbi->s_first_error_line);
es->s_first_error_ino =
......@@ -6140,8 +6137,7 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_last_error_time,
&es->s_last_error_time_hi,
sbi->s_last_error_time);
strncpy(es->s_last_error_func, sbi->s_last_error_func,
sizeof(es->s_last_error_func));
strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
......
......@@ -29,7 +29,10 @@ typedef enum {
attr_trigger_test_error,
attr_first_error_time,
attr_last_error_time,
attr_clusters_in_group,
attr_mb_order,
attr_feature,
attr_pointer_pi,
attr_pointer_ui,
attr_pointer_ul,
attr_pointer_u64,
......@@ -104,7 +107,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
if (ret || val >= clusters)
if (ret || val >= clusters || (s64)val < 0)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
......@@ -178,6 +181,9 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_RO_ATTR_ES_STRING(_name,_elname,_size) \
EXT4_ATTR_STRING(_name, 0444, _size, ext4_super_block, _elname)
#define EXT4_RW_ATTR_SBI_PI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_pi, ext4_sb_info, _elname)
#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname)
......@@ -207,23 +213,25 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
ext4_sb_info, s_inode_readahead_blks);
EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
ext4_sb_info, s_mb_group_prealloc);
EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
ext4_sb_info, s_mb_best_avail_max_trim_order);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_UI(mb_best_avail_max_trim_order, s_mb_best_avail_max_trim_order);
EXT4_RW_ATTR_SBI_PI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(err_ratelimit_burst, s_err_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_PI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
EXT4_RW_ATTR_SBI_PI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
EXT4_RW_ATTR_SBI_PI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
......@@ -366,13 +374,45 @@ static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
#define print_tstamp(buf, es, tstamp) \
__print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
static ssize_t ext4_generic_attr_show(struct ext4_attr *a,
struct ext4_sb_info *sbi, char *buf)
{
void *ptr = calc_ptr(a, sbi);
if (!ptr)
return 0;
switch (a->attr_id) {
case attr_inode_readahead:
case attr_clusters_in_group:
case attr_mb_order:
case attr_pointer_pi:
case attr_pointer_ui:
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr));
case attr_pointer_ul:
return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr));
case attr_pointer_u8:
return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr));
case attr_pointer_u64:
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr));
return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr));
case attr_pointer_string:
return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr);
case attr_pointer_atomic:
return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr));
}
return 0;
}
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
void *ptr = calc_ptr(a, sbi);
switch (a->attr_id) {
case attr_delayed_allocation_blocks:
......@@ -391,45 +431,6 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return sysfs_emit(buf, "%llu\n",
(unsigned long long)
percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
case attr_inode_readahead:
case attr_pointer_ui:
if (!ptr)
return 0;
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%u\n",
le32_to_cpup(ptr));
else
return sysfs_emit(buf, "%u\n",
*((unsigned int *) ptr));
case attr_pointer_ul:
if (!ptr)
return 0;
return sysfs_emit(buf, "%lu\n",
*((unsigned long *) ptr));
case attr_pointer_u8:
if (!ptr)
return 0;
return sysfs_emit(buf, "%u\n",
*((unsigned char *) ptr));
case attr_pointer_u64:
if (!ptr)
return 0;
if (a->attr_ptr == ptr_ext4_super_block_offset)
return sysfs_emit(buf, "%llu\n",
le64_to_cpup(ptr));
else
return sysfs_emit(buf, "%llu\n",
*((unsigned long long *) ptr));
case attr_pointer_string:
if (!ptr)
return 0;
return sysfs_emit(buf, "%.*s\n", a->attr_size,
(char *) ptr);
case attr_pointer_atomic:
if (!ptr)
return 0;
return sysfs_emit(buf, "%d\n",
atomic_read((atomic_t *) ptr));
case attr_feature:
return sysfs_emit(buf, "supported\n");
case attr_first_error_time:
......@@ -438,29 +439,34 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return print_tstamp(buf, sbi->s_es, s_last_error_time);
case attr_journal_task:
return journal_task_show(sbi, buf);
default:
return ext4_generic_attr_show(a, sbi, buf);
}
return 0;
}
static ssize_t ext4_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
static ssize_t ext4_generic_attr_store(struct ext4_attr *a,
struct ext4_sb_info *sbi,
const char *buf, size_t len)
{
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
void *ptr = calc_ptr(a, sbi);
unsigned long t;
int ret;
unsigned int t;
unsigned long lt;
void *ptr = calc_ptr(a, sbi);
if (!ptr)
return 0;
switch (a->attr_id) {
case attr_reserved_clusters:
return reserved_clusters_store(sbi, buf, len);
case attr_pointer_pi:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if ((int)t < 0)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_pointer_ui:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (a->attr_ptr == ptr_ext4_super_block_offset)
......@@ -468,20 +474,50 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
else
*((unsigned int *) ptr) = t;
return len;
case attr_mb_order:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t > 64)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_clusters_in_group:
ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (t > sbi->s_clusters_per_group)
return -EINVAL;
*((unsigned int *) ptr) = t;
return len;
case attr_pointer_ul:
if (!ptr)
return 0;
ret = kstrtoul(skip_spaces(buf), 0, &t);
ret = kstrtoul(skip_spaces(buf), 0, &lt);
if (ret)
return ret;
*((unsigned long *) ptr) = t;
*((unsigned long *) ptr) = lt;
return len;
}
return 0;
}
static ssize_t ext4_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t len)
{
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
switch (a->attr_id) {
case attr_reserved_clusters:
return reserved_clusters_store(sbi, buf, len);
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
case attr_trigger_test_error:
return trigger_test_error(sbi, buf, len);
default:
return ext4_generic_attr_store(a, sbi, buf, len);
}
return 0;
}
static void ext4_sb_release(struct kobject *kobj)
......
......@@ -1619,6 +1619,7 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
struct ext4_xattr_search *s,
handle_t *handle, struct inode *inode,
struct inode *new_ea_inode,
bool is_block)
{
struct ext4_xattr_entry *last, *next;
......@@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;
......@@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
old_ea_inode = NULL;
goto out;
}
}
if (i->value && in_inode) {
WARN_ON_ONCE(!i->value_len);
new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(new_ea_inode)) {
ret = PTR_ERR(new_ea_inode);
new_ea_inode = NULL;
goto out;
}
}
if (old_ea_inode) {
/* We are ready to release ref count on the old_ea_inode. */
ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
if (ret) {
/* Release newly required ref count on new_ea_inode. */
if (new_ea_inode) {
int err;
err = ext4_xattr_inode_dec_ref(handle,
new_ea_inode);
if (err)
ext4_warning_inode(new_ea_inode,
"dec ref new_ea_inode err=%d",
err);
ext4_xattr_inode_free_quota(inode, new_ea_inode,
i->value_len);
}
if (ret)
goto out;
}
ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
......@@ -1866,7 +1839,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
ret = 0;
out:
iput(old_ea_inode);
iput(new_ea_inode);
return ret;
}
......@@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
size_t old_ea_inode_quota = 0;
unsigned int ea_ino;
#define header(x) ((struct ext4_xattr_header *)(x))
/* If we need EA inode, prepare it before locking the buffer */
if (i->value && i->in_inode) {
WARN_ON_ONCE(!i->value_len);
ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(ea_inode)) {
error = PTR_ERR(ea_inode);
ea_inode = NULL;
goto cleanup;
}
}
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;
......@@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_JTR_NONE);
if (error)
goto cleanup;
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
......@@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
......@@ -2034,33 +2019,22 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
s->end = s->base + sb->s_blocksize;
}
error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;
if (i->value && s->here->e_value_inum) {
/*
* A ref count on ea_inode has been taken as part of the call to
* ext4_xattr_set_entry() above. We would like to drop this
* extra ref but we have to wait until the xattr block is
* initialized and has its own ref count on the ea_inode.
*/
ea_ino = le32_to_cpu(s->here->e_value_inum);
error = ext4_xattr_inode_iget(inode, ea_ino,
le32_to_cpu(s->here->e_hash),
&ea_inode);
if (error) {
ea_inode = NULL;
inserted:
if (!IS_LAST_ENTRY(s->first)) {
new_bh = ext4_xattr_block_cache_find(inode, header(s->base), &ce);
if (IS_ERR(new_bh)) {
error = PTR_ERR(new_bh);
new_bh = NULL;
goto cleanup;
}
}
inserted:
if (!IS_LAST_ENTRY(s->first)) {
new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
&ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
......@@ -2158,6 +2132,17 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ENTRY(header(s->base)+1));
if (error)
goto getblk_failed;
if (ea_inode) {
/* Drop the extra ref on ea_inode. */
error = ext4_xattr_inode_dec_ref(handle,
ea_inode);
if (error)
ext4_warning_inode(ea_inode,
"dec ref error=%d",
error);
iput(ea_inode);
ea_inode = NULL;
}
lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, sb,
......@@ -2198,17 +2183,16 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
cleanup:
if (ea_inode) {
int error2;
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
if (error) {
int error2;
/* If there was an error, revert the quota charge. */
if (error)
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
}
iput(ea_inode);
}
if (ce)
......@@ -2266,14 +2250,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_search *s = &is->s;
struct inode *ea_inode = NULL;
int error;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;
error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
if (error)
/* If we need EA inode, prepare it before locking the buffer */
if (i->value && i->in_inode) {
WARN_ON_ONCE(!i->value_len);
ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
i->value, i->value_len);
if (IS_ERR(ea_inode))
return PTR_ERR(ea_inode);
}
error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
false /* is_block */);
if (error) {
if (ea_inode) {
int error2;
error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
if (error2)
ext4_warning_inode(ea_inode, "dec ref error=%d",
error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
iput(ea_inode);
}
return error;
}
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
......@@ -2282,6 +2290,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header->h_magic = cpu_to_le32(0);
ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
}
iput(ea_inode);
return 0;
}
......@@ -3090,8 +3099,8 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*
* Find an identical extended attribute block.
*
* Returns a pointer to the block found, or NULL if such a block was
* not found or an error occurred.
* Returns a pointer to the block found, or NULL if such a block was not
* found, or an error pointer if an error occurred while reading ea block.
*/
static struct buffer_head *
ext4_xattr_block_cache_find(struct inode *inode,
......@@ -3113,11 +3122,11 @@ ext4_xattr_block_cache_find(struct inode *inode,
bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
if (PTR_ERR(bh) != -ENOMEM)
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
mb_cache_entry_put(ea_block_cache, ce);
return bh;
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
......
......@@ -337,8 +337,6 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
/*
* journal_shrink_one_cp_list
*
......@@ -350,7 +348,7 @@ enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
enum shrink_type type,
enum jbd2_shrink_type type,
bool *released)
{
struct journal_head *last_jh;
......@@ -367,12 +365,12 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
if (type == SHRINK_DESTROY) {
if (type == JBD2_SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0) {
if (type == SHRINK_BUSY_SKIP)
if (type == JBD2_SHRINK_BUSY_SKIP)
continue;
break;
}
......@@ -439,7 +437,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
SHRINK_BUSY_SKIP, &released);
JBD2_SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
......@@ -472,21 +470,25 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
* If 'destroy' is set, release all buffers unconditionally.
* If 'type' is JBD2_SHRINK_DESTROY, release all buffers unconditionally. If
* 'type' is JBD2_SHRINK_BUSY_STOP, will stop release buffers if encounters a
* busy buffer. To avoid wasting CPU cycles scanning the buffer list in some
* cases, don't pass JBD2_SHRINK_BUSY_SKIP 'type' for this function.
*
* Called with j_list_lock held.
*/
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
void __jbd2_journal_clean_checkpoint_list(journal_t *journal,
enum jbd2_shrink_type type)
{
transaction_t *transaction, *last_transaction, *next_transaction;
enum shrink_type type;
bool released;
WARN_ON_ONCE(type == JBD2_SHRINK_BUSY_SKIP);
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
......@@ -527,7 +529,7 @@ void jbd2_journal_destroy_checkpoint(journal_t *journal)
spin_unlock(&journal->j_list_lock);
break;
}
__jbd2_journal_clean_checkpoint_list(journal, true);
__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_DESTROY);
spin_unlock(&journal->j_list_lock);
cond_resched();
}
......
......@@ -501,7 +501,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
__jbd2_journal_clean_checkpoint_list(journal, false);
__jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
spin_unlock(&journal->j_list_lock);
jbd2_debug(3, "JBD2: commit phase 1\n");
......@@ -571,7 +571,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_nr_buffers <=
atomic_read(&commit_transaction->t_outstanding_credits));
err = 0;
bufs = 0;
descriptor = NULL;
while (commit_transaction->t_buffers) {
......
......@@ -1434,7 +1434,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
extern void jbd2_journal_commit_transaction(journal_t *);
/* Checkpoint list management */
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
enum jbd2_shrink_type {JBD2_SHRINK_DESTROY, JBD2_SHRINK_BUSY_STOP, JBD2_SHRINK_BUSY_SKIP};
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, enum jbd2_shrink_type type);
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment