Commit 73a3fcda authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this cycle, we've mainly investigated the zoned block device
  support along with patches such as correcting write pointers between
  f2fs and storage, adding asynchronous zone reset flow, and managing
  the number of open zones.

  Other than them, f2fs adds another mount option, "errors=x" to specify
  how to handle when it detects an unexpected behavior at runtime.

  Enhancements:
   - support 'errors=remount-ro|continue|panic' mount option
   - enforce some inode flag policies
   - allow .tmp compression given extensions
   - add some ioctls to manage the f2fs compression
   - improve looped node chain flow
   - avoid issuing small-sized discard commands during checkpoint
   - implement an asynchronous zone reset

  Bug fixes:
   - fix deadlock in xattr and inode page lock
   - fix and add sanity check in some error paths
   - fix to avoid NULL pointer dereference f2fs_write_end_io() along
     with put_super
   - set proper flags to quota files
   - fix potential deadlock due to unpaired node_write lock use
   - fix over-estimating free section during FG GC
   - fix the wrong condition to determine atomic context

  As usual, also there are a number of patches with code refactoring and
  minor clean-ups"

* tag 'f2fs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (46 commits)
  f2fs: fix to do sanity check on direct node in truncate_dnode()
  f2fs: only set release for file that has compressed data
  f2fs: fix compile warning in f2fs_destroy_node_manager()
  f2fs: fix error path handling in truncate_dnode()
  f2fs: fix deadlock in i_xattr_sem and inode page lock
  f2fs: remove unneeded page uptodate check/set
  f2fs: update mtime and ctime in move file range method
  f2fs: compress tmp files given extension
  f2fs: refactor struct f2fs_attr macro
  f2fs: convert to use sbi directly
  f2fs: remove redundant assignment to variable err
  f2fs: do not issue small discard commands during checkpoint
  f2fs: check zone write pointer points to the end of zone
  f2fs: add f2fs_ioc_get_compress_blocks
  f2fs: cleanup MIN_INLINE_XATTR_SIZE
  f2fs: add helper to check compression level
  f2fs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method
  f2fs: do more sanity check on inode
  f2fs: compress: fix to check validity of i_compress_flag field
  f2fs: add sanity compress level check for compressed file
  ...
parents bb8e7e9f a6ec8378
......@@ -351,6 +351,22 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
errors=%s Specify f2fs behavior on critical errors. This supports modes:
"panic", "continue" and "remount-ro", respectively, trigger
panic immediately, continue without doing anything, and remount
the partition in read-only mode. By default it uses "continue"
mode.
====================== =============== =============== ========
mode continue remount-ro panic
====================== =============== =============== ========
access ops normal noraml N/A
syscall errors -EIO -EROFS N/A
mount option rw ro N/A
pending dir write keep keep N/A
pending non-dir write drop keep N/A
pending node write drop keep N/A
pending meta write keep keep N/A
====================== =============== =============== ========
======================== ============================================================
Debugfs Entries
......
......@@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
unsigned char reason)
{
f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io) {
if (!end_io)
f2fs_flush_merged_writes(sbi);
f2fs_handle_stop(sbi, reason);
}
f2fs_handle_critical_error(sbi, reason, end_io);
}
/*
......
......@@ -55,6 +55,7 @@ struct f2fs_compress_ops {
int (*init_decompress_ctx)(struct decompress_io_ctx *dic);
void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic);
int (*decompress_pages)(struct decompress_io_ctx *dic);
bool (*is_level_valid)(int level);
};
static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index)
......@@ -308,17 +309,25 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
return 0;
}
static bool lz4_is_level_valid(int lvl)
{
#ifdef CONFIG_F2FS_FS_LZ4HC
return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL);
#else
return lvl == 0;
#endif
}
static const struct f2fs_compress_ops f2fs_lz4_ops = {
.init_compress_ctx = lz4_init_compress_ctx,
.destroy_compress_ctx = lz4_destroy_compress_ctx,
.compress_pages = lz4_compress_pages,
.decompress_pages = lz4_decompress_pages,
.is_level_valid = lz4_is_level_valid,
};
#endif
#ifdef CONFIG_F2FS_FS_ZSTD
#define F2FS_ZSTD_DEFAULT_CLEVEL 1
static int zstd_init_compress_ctx(struct compress_ctx *cc)
{
zstd_parameters params;
......@@ -327,6 +336,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
unsigned int workspace_size;
unsigned char level = F2FS_I(cc->inode)->i_compress_level;
/* Need to remain this for backward compatibility */
if (!level)
level = F2FS_ZSTD_DEFAULT_CLEVEL;
......@@ -477,6 +487,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
return 0;
}
static bool zstd_is_level_valid(int lvl)
{
return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel();
}
static const struct f2fs_compress_ops f2fs_zstd_ops = {
.init_compress_ctx = zstd_init_compress_ctx,
.destroy_compress_ctx = zstd_destroy_compress_ctx,
......@@ -484,6 +499,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = {
.init_decompress_ctx = zstd_init_decompress_ctx,
.destroy_decompress_ctx = zstd_destroy_decompress_ctx,
.decompress_pages = zstd_decompress_pages,
.is_level_valid = zstd_is_level_valid,
};
#endif
......@@ -542,6 +558,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
}
bool f2fs_is_compress_level_valid(int alg, int lvl)
{
const struct f2fs_compress_ops *cops = f2fs_cops[alg];
if (cops->is_level_valid)
return cops->is_level_valid(lvl);
return lvl == 0;
}
static mempool_t *compress_page_pool;
static int num_compress_pages = 512;
module_param(num_compress_pages, uint, 0444);
......@@ -743,8 +769,8 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
ret = -EFSCORRUPTED;
/* Avoid f2fs_commit_super in irq context */
if (in_task)
f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION);
if (!in_task)
f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION);
else
f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
goto out_release;
......@@ -1215,6 +1241,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
unsigned int last_index = cc->cluster_size - 1;
loff_t psize;
int i, err;
bool quota_inode = IS_NOQUOTA(inode);
/* we should bypass data pages to proceed the kworker jobs */
if (unlikely(f2fs_cp_error(sbi))) {
......@@ -1222,7 +1249,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
goto out_free;
}
if (IS_NOQUOTA(inode)) {
if (quota_inode) {
/*
* We need to wait for node_write to avoid block allocation during
* checkpoint. This can only happen to quota writes which can cause
......@@ -1344,7 +1371,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
f2fs_put_dnode(&dn);
if (IS_NOQUOTA(inode))
if (quota_inode)
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
......@@ -1370,7 +1397,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
if (IS_NOQUOTA(inode))
if (quota_inode)
f2fs_up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
......
......@@ -383,6 +383,17 @@ static void f2fs_write_end_io(struct bio *bio)
bio_put(bio);
}
#ifdef CONFIG_BLK_DEV_ZONED
static void f2fs_zone_write_end_io(struct bio *bio)
{
struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
bio->bi_private = io->bi_private;
complete(&io->zone_wait);
f2fs_write_end_io(bio);
}
#endif
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, sector_t *sector)
{
......@@ -639,6 +650,11 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
#ifdef CONFIG_BLK_DEV_ZONED
init_completion(&sbi->write_io[i][j].zone_wait);
sbi->write_io[i][j].zone_pending_bio = NULL;
sbi->write_io[i][j].bi_private = NULL;
#endif
}
}
......@@ -965,6 +981,26 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
return 0;
}
#ifdef CONFIG_BLK_DEV_ZONED
static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int devi = 0;
if (f2fs_is_multi_device(sbi)) {
devi = f2fs_target_device_index(sbi, blkaddr);
if (blkaddr < FDEV(devi).start_blk ||
blkaddr > FDEV(devi).end_blk) {
f2fs_err(sbi, "Invalid block %x", blkaddr);
return false;
}
blkaddr -= FDEV(devi).start_blk;
}
return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM &&
f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
}
#endif
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
......@@ -975,6 +1011,16 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
f2fs_bug_on(sbi, is_read_io(fio->op));
f2fs_down_write(&io->io_rwsem);
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
wait_for_completion_io(&io->zone_wait);
bio_put(io->zone_pending_bio);
io->zone_pending_bio = NULL;
io->bi_private = NULL;
}
#endif
next:
if (fio->in_list) {
spin_lock(&io->io_lock);
......@@ -1038,6 +1084,18 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
if (fio->in_list)
goto next;
out:
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
bio_get(io->bio);
reinit_completion(&io->zone_wait);
io->bi_private = io->bio->bi_private;
io->bio->bi_private = io;
io->bio->bi_end_io = f2fs_zone_write_end_io;
io->zone_pending_bio = io->bio;
__submit_merged_bio(io);
}
#endif
if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
!f2fs_is_checkpoint_ready(sbi))
__submit_merged_bio(io);
......@@ -2173,7 +2231,6 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
F2FS_BLKSIZE);
*last_block_in_bio = block_nr;
goto out;
out:
*bio_ret = bio;
return ret;
......@@ -2775,6 +2832,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
bool quota_inode = IS_NOQUOTA(inode);
int err = 0;
struct f2fs_io_info fio = {
.sbi = sbi,
......@@ -2807,6 +2865,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (S_ISDIR(inode->i_mode) &&
!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
goto redirty_out;
/* keep data pages in remount-ro mode */
if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
goto redirty_out;
goto out;
}
......@@ -2832,19 +2894,19 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
goto out;
/* Dentry/quota blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
if (S_ISDIR(inode->i_mode) || quota_inode) {
/*
* We need to wait for node_write to avoid block allocation during
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
if (IS_NOQUOTA(inode))
if (quota_inode)
f2fs_down_read(&sbi->node_write);
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
if (IS_NOQUOTA(inode))
if (quota_inode)
f2fs_up_read(&sbi->node_write);
goto done;
......@@ -4067,7 +4129,6 @@ const struct address_space_operations f2fs_dblock_aops = {
.migrate_folio = filemap_migrate_folio,
.invalidate_folio = f2fs_invalidate_folio,
.release_folio = f2fs_release_folio,
.direct_IO = noop_direct_IO,
.bmap = f2fs_bmap,
.swap_activate = f2fs_swap_activate,
.swap_deactivate = f2fs_swap_deactivate,
......
......@@ -775,8 +775,15 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
{
int err = -EAGAIN;
if (f2fs_has_inline_dentry(dir))
if (f2fs_has_inline_dentry(dir)) {
/*
* Should get i_xattr_sem to keep the lock order:
* i_xattr_sem -> inode_page lock used by f2fs_setxattr.
*/
f2fs_down_read(&F2FS_I(dir)->i_xattr_sem);
err = f2fs_add_inline_entry(dir, fname, inode, ino, mode);
f2fs_up_read(&F2FS_I(dir)->i_xattr_sem);
}
if (err == -EAGAIN)
err = f2fs_add_regular_entry(dir, fname, inode, ino, mode);
......
......@@ -80,34 +80,34 @@ extern const char *f2fs_fault_name[FAULT_MAX];
/*
* For mount options
*/
#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002
#define F2FS_MOUNT_DISCARD 0x00000004
#define F2FS_MOUNT_NOHEAP 0x00000008
#define F2FS_MOUNT_XATTR_USER 0x00000010
#define F2FS_MOUNT_POSIX_ACL 0x00000020
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040
#define F2FS_MOUNT_INLINE_XATTR 0x00000080
#define F2FS_MOUNT_INLINE_DATA 0x00000100
#define F2FS_MOUNT_INLINE_DENTRY 0x00000200
#define F2FS_MOUNT_FLUSH_MERGE 0x00000400
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define F2FS_MOUNT_FAULT_INJECTION 0x00010000
#define F2FS_MOUNT_USRQUOTA 0x00080000
#define F2FS_MOUNT_GRPQUOTA 0x00100000
#define F2FS_MOUNT_PRJQUOTA 0x00200000
#define F2FS_MOUNT_QUOTA 0x00400000
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000
#define F2FS_MOUNT_GC_MERGE 0x20000000
#define F2FS_MOUNT_COMPRESS_CACHE 0x40000000
#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000
#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000001
#define F2FS_MOUNT_DISCARD 0x00000002
#define F2FS_MOUNT_NOHEAP 0x00000004
#define F2FS_MOUNT_XATTR_USER 0x00000008
#define F2FS_MOUNT_POSIX_ACL 0x00000010
#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000020
#define F2FS_MOUNT_INLINE_XATTR 0x00000040
#define F2FS_MOUNT_INLINE_DATA 0x00000080
#define F2FS_MOUNT_INLINE_DENTRY 0x00000100
#define F2FS_MOUNT_FLUSH_MERGE 0x00000200
#define F2FS_MOUNT_NOBARRIER 0x00000400
#define F2FS_MOUNT_FASTBOOT 0x00000800
#define F2FS_MOUNT_READ_EXTENT_CACHE 0x00001000
#define F2FS_MOUNT_DATA_FLUSH 0x00002000
#define F2FS_MOUNT_FAULT_INJECTION 0x00004000
#define F2FS_MOUNT_USRQUOTA 0x00008000
#define F2FS_MOUNT_GRPQUOTA 0x00010000
#define F2FS_MOUNT_PRJQUOTA 0x00020000
#define F2FS_MOUNT_QUOTA 0x00040000
#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00080000
#define F2FS_MOUNT_RESERVE_ROOT 0x00100000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x00200000
#define F2FS_MOUNT_NORECOVERY 0x00400000
#define F2FS_MOUNT_ATGC 0x00800000
#define F2FS_MOUNT_MERGE_CHECKPOINT 0x01000000
#define F2FS_MOUNT_GC_MERGE 0x02000000
#define F2FS_MOUNT_COMPRESS_CACHE 0x04000000
#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
......@@ -162,6 +162,7 @@ struct f2fs_mount_info {
int fs_mode; /* fs mode: LFS or ADAPTIVE */
int bggc_mode; /* bggc mode: off, on or sync */
int memory_mode; /* memory mode */
int errors; /* errors parameter */
int discard_unit; /*
* discard command's offset/size should
* be aligned to this unit: block,
......@@ -185,21 +186,21 @@ struct f2fs_mount_info {
unsigned char noextensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
#define F2FS_FEATURE_BLKZONED 0x0002
#define F2FS_FEATURE_ATOMIC_WRITE 0x0004
#define F2FS_FEATURE_EXTRA_ATTR 0x0008
#define F2FS_FEATURE_PRJQUOTA 0x0010
#define F2FS_FEATURE_INODE_CHKSUM 0x0020
#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040
#define F2FS_FEATURE_QUOTA_INO 0x0080
#define F2FS_FEATURE_INODE_CRTIME 0x0100
#define F2FS_FEATURE_LOST_FOUND 0x0200
#define F2FS_FEATURE_VERITY 0x0400
#define F2FS_FEATURE_SB_CHKSUM 0x0800
#define F2FS_FEATURE_CASEFOLD 0x1000
#define F2FS_FEATURE_COMPRESSION 0x2000
#define F2FS_FEATURE_RO 0x4000
#define F2FS_FEATURE_ENCRYPT 0x00000001
#define F2FS_FEATURE_BLKZONED 0x00000002
#define F2FS_FEATURE_ATOMIC_WRITE 0x00000004
#define F2FS_FEATURE_EXTRA_ATTR 0x00000008
#define F2FS_FEATURE_PRJQUOTA 0x00000010
#define F2FS_FEATURE_INODE_CHKSUM 0x00000020
#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x00000040
#define F2FS_FEATURE_QUOTA_INO 0x00000080
#define F2FS_FEATURE_INODE_CRTIME 0x00000100
#define F2FS_FEATURE_LOST_FOUND 0x00000200
#define F2FS_FEATURE_VERITY 0x00000400
#define F2FS_FEATURE_SB_CHKSUM 0x00000800
#define F2FS_FEATURE_CASEFOLD 0x00001000
#define F2FS_FEATURE_COMPRESSION 0x00002000
#define F2FS_FEATURE_RO 0x00004000
#define __F2FS_HAS_FEATURE(raw_super, mask) \
((raw_super->feature & cpu_to_le32(mask)) != 0)
......@@ -1175,6 +1176,7 @@ enum iostat_type {
/* other */
FS_DISCARD_IO, /* discard */
FS_FLUSH_IO, /* flush */
FS_ZONE_RESET_IO, /* zone reset */
NR_IO_TYPE,
};
......@@ -1217,6 +1219,11 @@ struct f2fs_bio_info {
struct bio *bio; /* bios to merge */
sector_t last_block_in_bio; /* last block number */
struct f2fs_io_info fio; /* store buffered io info. */
#ifdef CONFIG_BLK_DEV_ZONED
struct completion zone_wait; /* condition value for the previous open zone to close */
struct bio *zone_pending_bio; /* pending bio for the previous zone */
void *bi_private; /* previous bi_private for pending bio */
#endif
struct f2fs_rwsem io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
......@@ -1370,6 +1377,12 @@ enum {
MEMORY_MODE_LOW, /* memory mode for low memry devices */
};
enum errors_option {
MOUNT_ERRORS_READONLY, /* remount fs ro on errors */
MOUNT_ERRORS_CONTINUE, /* continue on errors */
MOUNT_ERRORS_PANIC, /* panic on errors */
};
static inline int f2fs_test_bit(unsigned int nr, char *addr);
static inline void f2fs_set_bit(unsigned int nr, char *addr);
static inline void f2fs_clear_bit(unsigned int nr, char *addr);
......@@ -1427,6 +1440,8 @@ struct compress_data {
#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000
#define F2FS_ZSTD_DEFAULT_CLEVEL 1
#define COMPRESS_LEVEL_OFFSET 8
/* compress context */
......@@ -1721,8 +1736,14 @@ struct f2fs_sb_info {
struct workqueue_struct *post_read_wq; /* post read workqueue */
unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
spinlock_t error_lock; /* protect errors array */
/*
* If we are in irq context, let's update error information into
* on-disk superblock in the work.
*/
struct work_struct s_error_work;
unsigned char errors[MAX_F2FS_ERRORS]; /* error flags */
unsigned char stop_reason[MAX_STOP_REASON]; /* stop reason */
spinlock_t error_lock; /* protect errors/stop_reason array */
bool error_dirty; /* errors of sb is dirty */
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
......@@ -2941,6 +2962,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */
#define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
......@@ -3394,6 +3417,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
#define F2FS_MIN_EXTRA_ATTR_SIZE (sizeof(__le32))
#define F2FS_TOTAL_EXTRA_ATTR_SIZE \
(offsetof(struct f2fs_inode, i_extra_end) - \
offsetof(struct f2fs_inode, i_extra_isize)) \
......@@ -3432,7 +3457,6 @@ static inline bool __is_valid_data_blkaddr(block_t blkaddr)
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
......@@ -3541,9 +3565,11 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
int f2fs_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
bool irq_context);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
......@@ -3815,7 +3841,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
int f2fs_resize_fs(struct file *filp, __u64 block_count);
int __init f2fs_create_garbage_collection_cache(void);
void f2fs_destroy_garbage_collection_cache(void);
/* victim selection function for cleaning and SSR */
......@@ -4213,6 +4239,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock);
void f2fs_compress_write_end_io(struct bio *bio, struct page *page);
bool f2fs_is_compress_backend_ready(struct inode *inode);
bool f2fs_is_compress_level_valid(int alg, int lvl);
int __init f2fs_init_compress_mempool(void);
void f2fs_destroy_compress_mempool(void);
void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task);
......@@ -4277,6 +4304,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
/* not support compression */
return false;
}
static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; }
static inline struct page *f2fs_compress_control_page(struct page *page)
{
WARN_ON_ONCE(1);
......
......@@ -149,8 +149,6 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
zero_user_segment(page, offset, PAGE_SIZE);
}
set_page_dirty(page);
if (!PageUptodate(page))
SetPageUptodate(page);
f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
f2fs_update_time(sbi, REQ_TIME);
......@@ -546,7 +544,8 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
filp->f_mode |= FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
......@@ -627,11 +626,6 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->ofs_in_node, nr_free);
}
void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
{
f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
}
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
......@@ -2225,7 +2219,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
ret = 0;
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
trace_f2fs_shutdown(sbi, in, ret);
}
return ret;
......@@ -2238,7 +2231,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev);
break;
case F2FS_GOING_DOWN_METASYNC:
......@@ -2247,16 +2239,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NEED_FSCK:
set_sbi_flag(sbi, SBI_NEED_FSCK);
......@@ -2593,6 +2582,11 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
inode_lock(inode);
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
err = -EINVAL;
goto unlock_out;
}
/* if in-place-update policy is enabled, don't waste time here */
set_inode_flag(inode, FI_OPU_WRITE);
if (f2fs_should_update_inplace(inode, NULL)) {
......@@ -2717,6 +2711,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
clear_inode_flag(inode, FI_SKIP_WRITES);
out:
clear_inode_flag(inode, FI_OPU_WRITE);
unlock_out:
inode_unlock(inode);
if (!err)
range->len = (u64)total << PAGE_SHIFT;
......@@ -2876,6 +2871,17 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
out_src:
f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (ret)
goto out_unlock;
src->i_mtime = src->i_ctime = current_time(src);
f2fs_mark_inode_dirty_sync(src, false);
if (src != dst) {
dst->i_mtime = dst->i_ctime = current_time(dst);
f2fs_mark_inode_dirty_sync(dst, false);
}
f2fs_update_time(sbi, REQ_TIME);
out_unlock:
if (src != dst)
inode_unlock(dst);
......@@ -3278,7 +3284,7 @@ static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
sizeof(block_count)))
return -EFAULT;
return f2fs_resize_fs(sbi, block_count);
return f2fs_resize_fs(filp, block_count);
}
static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
......@@ -3375,18 +3381,29 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
return err;
}
static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
static int f2fs_get_compress_blocks(struct inode *inode, __u64 *blocks)
{
struct inode *inode = file_inode(filp);
__u64 blocks;
if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
return -EINVAL;
blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
*blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
return 0;
}
static int f2fs_ioc_get_compress_blocks(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
__u64 blocks;
int ret;
ret = f2fs_get_compress_blocks(inode, &blocks);
if (ret < 0)
return ret;
return put_user(blocks, (u64 __user *)arg);
}
......@@ -3455,7 +3472,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
int ret;
int writecount;
if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
if (!f2fs_sb_has_compression(sbi))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
......@@ -3468,7 +3485,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
return ret;
f2fs_balance_fs(F2FS_I_SB(inode), true);
f2fs_balance_fs(sbi, true);
inode_lock(inode);
......@@ -3488,13 +3505,15 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
goto out;
if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
ret = -EPERM;
goto out;
}
set_inode_flag(inode, FI_COMPRESS_RELEASED);
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
......@@ -3625,7 +3644,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
unsigned int reserved_blocks = 0;
int ret;
if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
if (!f2fs_sb_has_compression(sbi))
return -EOPNOTSUPP;
if (!f2fs_compressed_file(inode))
......@@ -3641,7 +3660,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
f2fs_balance_fs(F2FS_I_SB(inode), true);
f2fs_balance_fs(sbi, true);
inode_lock(inode);
......@@ -4035,7 +4054,7 @@ static int f2fs_ioc_decompress_file(struct file *filp)
if (!f2fs_compressed_file(inode))
return -EINVAL;
f2fs_balance_fs(F2FS_I_SB(inode), true);
f2fs_balance_fs(sbi, true);
file_start_write(filp);
inode_lock(inode);
......@@ -4110,7 +4129,7 @@ static int f2fs_ioc_compress_file(struct file *filp)
if (!f2fs_compressed_file(inode))
return -EINVAL;
f2fs_balance_fs(F2FS_I_SB(inode), true);
f2fs_balance_fs(sbi, true);
file_start_write(filp);
inode_lock(inode);
......@@ -4238,7 +4257,7 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FS_IOC_SETFSLABEL:
return f2fs_ioc_setfslabel(filp, arg);
case F2FS_IOC_GET_COMPRESS_BLOCKS:
return f2fs_get_compress_blocks(filp, arg);
return f2fs_ioc_get_compress_blocks(filp, arg);
case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
return f2fs_release_compress_blocks(filp, arg);
case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
......
......@@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
if (gc_th->gc_wake)
gc_th->gc_wake = false;
if (try_to_freeze()) {
if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
stat_other_skip_bggc_count(sbi);
continue;
}
......@@ -1797,7 +1797,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
int gc_type = gc_control->init_gc_type;
unsigned int segno = gc_control->victim_segno;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int sec_freed = 0, seg_freed = 0, total_freed = 0, total_sec_freed = 0;
int ret = 0;
struct cp_control cpc;
struct gc_inode_list gc_list = {
......@@ -1842,6 +1842,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
/* Reset due to checkpoint */
sec_freed = 0;
}
}
......@@ -1866,15 +1868,17 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
gc_control->should_migrate_blocks);
total_freed += seg_freed;
if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) {
sec_freed++;
total_sec_freed++;
}
if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
if (has_enough_free_secs(sbi, sec_freed, 0)) {
if (!gc_control->no_bg_gc &&
sec_freed < gc_control->nr_free_secs)
total_sec_freed < gc_control->nr_free_secs)
goto go_gc_more;
goto stop;
}
......@@ -1901,6 +1905,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
/* Reset due to checkpoint */
sec_freed = 0;
}
go_gc_more:
segno = NULL_SEGNO;
......@@ -1913,7 +1919,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
trace_f2fs_gc_end(sbi->sb, ret, total_freed, total_sec_freed,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
......@@ -1927,7 +1933,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
put_gc_inode(&gc_list);
if (gc_control->err_gc_skipped && !ret)
ret = sec_freed ? 0 : -EAGAIN;
ret = total_sec_freed ? 0 : -EAGAIN;
return ret;
}
......@@ -2099,8 +2105,9 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
}
}
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
int f2fs_resize_fs(struct file *filp, __u64 block_count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
__u64 old_block_count, shrunk_blocks;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
unsigned int secs;
......@@ -2138,12 +2145,18 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
return -EINVAL;
}
err = mnt_want_write_file(filp);
if (err)
return err;
shrunk_blocks = old_block_count - block_count;
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
/* stop other GC */
if (!f2fs_down_write_trylock(&sbi->gc_lock))
return -EAGAIN;
if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
err = -EAGAIN;
goto out_drop_write;
}
/* stop CP to protect MAIN_SEC in free_segment_range */
f2fs_lock_op(sbi);
......@@ -2163,10 +2176,20 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
out_unlock:
f2fs_unlock_op(sbi);
f2fs_up_write(&sbi->gc_lock);
out_drop_write:
mnt_drop_write_file(filp);
if (err)
return err;
freeze_super(sbi->sb);
err = freeze_super(sbi->sb);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
thaw_super(sbi->sb);
return -EROFS;
}
f2fs_down_write(&sbi->gc_lock);
f2fs_down_write(&sbi->cp_global_sem);
......
......@@ -10,6 +10,8 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/sched/mm.h>
#include <linux/lz4.h>
#include <linux/zstd.h>
#include "f2fs.h"
#include "node.h"
......@@ -202,6 +204,80 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static bool sanity_check_compress_inode(struct inode *inode,
struct f2fs_inode *ri)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned char clevel;
if (ri->i_compress_algorithm >= COMPRESS_MAX) {
f2fs_warn(sbi,
"%s: inode (ino=%lx) has unsupported compress algorithm: %u, run fsck to fix",
__func__, inode->i_ino, ri->i_compress_algorithm);
goto err;
}
if (le64_to_cpu(ri->i_compr_blocks) >
SECTOR_TO_BLOCK(inode->i_blocks)) {
f2fs_warn(sbi,
"%s: inode (ino=%lx) has inconsistent i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
__func__, inode->i_ino, le64_to_cpu(ri->i_compr_blocks),
SECTOR_TO_BLOCK(inode->i_blocks));
goto err;
}
if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
f2fs_warn(sbi,
"%s: inode (ino=%lx) has unsupported log cluster size: %u, run fsck to fix",
__func__, inode->i_ino, ri->i_log_cluster_size);
goto err;
}
clevel = le16_to_cpu(ri->i_compress_flag) >>
COMPRESS_LEVEL_OFFSET;
switch (ri->i_compress_algorithm) {
case COMPRESS_LZO:
#ifdef CONFIG_F2FS_FS_LZO
if (clevel)
goto err_level;
#endif
break;
case COMPRESS_LZORLE:
#ifdef CONFIG_F2FS_FS_LZORLE
if (clevel)
goto err_level;
#endif
break;
case COMPRESS_LZ4:
#ifdef CONFIG_F2FS_FS_LZ4
#ifdef CONFIG_F2FS_FS_LZ4HC
if (clevel &&
(clevel < LZ4HC_MIN_CLEVEL || clevel > LZ4HC_MAX_CLEVEL))
goto err_level;
#else
if (clevel)
goto err_level;
#endif
#endif
break;
case COMPRESS_ZSTD:
#ifdef CONFIG_F2FS_FS_ZSTD
if (clevel < zstd_min_clevel() || clevel > zstd_max_clevel())
goto err_level;
#endif
break;
default:
goto err_level;
}
return true;
err_level:
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported compress level: %u, run fsck to fix",
__func__, inode->i_ino, clevel);
err:
set_sbi_flag(sbi, SBI_NEED_FSCK);
return false;
}
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
......@@ -225,41 +301,77 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
if (f2fs_has_extra_attr(inode)) {
if (!f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
__func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize < F2FS_MIN_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
__func__, inode->i_ino, fi->i_extra_isize,
F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
f2fs_has_inline_xattr(inode) &&
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
__func__, inode->i_ino, fi->i_inline_xattr_size,
MAX_INLINE_XATTR_SIZE);
return false;
}
if (f2fs_sb_has_compression(sbi) &&
fi->i_flags & F2FS_COMPR_FL &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_compress_flag)) {
if (!sanity_check_compress_inode(inode, ri))
return false;
}
} else if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
__func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
__func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
__func__, inode->i_ino, fi->i_extra_isize,
F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
if (f2fs_has_extra_attr(inode) &&
f2fs_sb_has_flexible_inline_xattr(sbi) &&
f2fs_has_inline_xattr(inode) &&
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
__func__, inode->i_ino, fi->i_inline_xattr_size,
MAX_INLINE_XATTR_SIZE);
return false;
if (!f2fs_sb_has_extra_attr(sbi)) {
if (f2fs_sb_has_project_quota(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
__func__, inode->i_ino, F2FS_FEATURE_PRJQUOTA);
return false;
}
if (f2fs_sb_has_inode_chksum(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
__func__, inode->i_ino, F2FS_FEATURE_INODE_CHKSUM);
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
__func__, inode->i_ino, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR);
return false;
}
if (f2fs_sb_has_inode_crtime(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
__func__, inode->i_ino, F2FS_FEATURE_INODE_CRTIME);
return false;
}
if (f2fs_sb_has_compression(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.",
__func__, inode->i_ino, F2FS_FEATURE_COMPRESSION);
return false;
}
}
if (f2fs_sanity_check_inline_data(inode)) {
......@@ -283,39 +395,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
fi->i_flags & F2FS_COMPR_FL &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
if (ri->i_compress_algorithm >= COMPRESS_MAX) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"compress algorithm: %u, run fsck to fix",
__func__, inode->i_ino,
ri->i_compress_algorithm);
return false;
}
if (le64_to_cpu(ri->i_compr_blocks) >
SECTOR_TO_BLOCK(inode->i_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
"i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
__func__, inode->i_ino,
le64_to_cpu(ri->i_compr_blocks),
SECTOR_TO_BLOCK(inode->i_blocks));
return false;
}
if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"log cluster size: %u, run fsck to fix",
__func__, inode->i_ino,
ri->i_log_cluster_size);
return false;
}
}
return true;
}
......@@ -442,7 +521,7 @@ static int do_read_inode(struct inode *inode)
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
(fi->i_flags & F2FS_COMPR_FL)) {
if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
i_compress_flag)) {
unsigned short compress_flag;
atomic_set(&fi->i_compr_blocks,
......@@ -680,7 +759,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_log_cluster_size)) {
i_compress_flag)) {
unsigned short compress_flag;
ri->i_compr_blocks =
......
......@@ -80,6 +80,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
seq_puts(seq, "[OTHER]\n");
IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO);
IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO);
IOSTAT_INFO_SHOW("fs zone reset", FS_ZONE_RESET_IO);
return 0;
}
......
......@@ -23,7 +23,7 @@
#include <trace/events/f2fs.h>
static inline bool is_extension_exist(const unsigned char *s, const char *sub,
bool tmp_ext)
bool tmp_ext, bool tmp_dot)
{
size_t slen = strlen(s);
size_t sublen = strlen(sub);
......@@ -49,13 +49,27 @@ static inline bool is_extension_exist(const unsigned char *s, const char *sub,
for (i = 1; i < slen - sublen; i++) {
if (s[i] != '.')
continue;
if (!strncasecmp(s + i + 1, sub, sublen))
return true;
if (!strncasecmp(s + i + 1, sub, sublen)) {
if (!tmp_dot)
return true;
if (i == slen - sublen - 1 || s[i + 1 + sublen] == '.')
return true;
}
}
return false;
}
static inline bool is_temperature_extension(const unsigned char *s, const char *sub)
{
return is_extension_exist(s, sub, true, false);
}
static inline bool is_compress_extension(const unsigned char *s, const char *sub)
{
return is_extension_exist(s, sub, true, true);
}
int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
bool hot, bool set)
{
......@@ -148,7 +162,7 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = cold_count; i < cold_count + hot_count; i++)
if (is_extension_exist(name, extlist[i], false))
if (is_temperature_extension(name, extlist[i]))
break;
f2fs_up_read(&sbi->sb_lock);
if (i < (cold_count + hot_count))
......@@ -156,12 +170,12 @@ static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
/* Don't compress unallowed extension. */
for (i = 0; i < noext_cnt; i++)
if (is_extension_exist(name, noext[i], false))
if (is_compress_extension(name, noext[i]))
return;
/* Compress wanting extension. */
for (i = 0; i < ext_cnt; i++) {
if (is_extension_exist(name, ext[i], false)) {
if (is_compress_extension(name, ext[i])) {
set_compress_context(inode);
return;
}
......@@ -189,7 +203,7 @@ static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
cold_count = le32_to_cpu(sbi->raw_super->extension_count);
hot_count = sbi->raw_super->hot_ext_count;
for (i = 0; i < cold_count + hot_count; i++)
if (is_extension_exist(name, extlist[i], true))
if (is_temperature_extension(name, extlist[i]))
break;
f2fs_up_read(&sbi->sb_lock);
......@@ -576,8 +590,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
}
#endif
new = d_splice_alias(inode, dentry);
err = PTR_ERR_OR_ZERO(new);
trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err);
trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry,
ino, IS_ERR(new) ? PTR_ERR(new) : err);
return new;
out_iput:
iput(inode);
......
......@@ -925,6 +925,7 @@ static int truncate_node(struct dnode_of_data *dn)
static int truncate_dnode(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct page *page;
int err;
......@@ -932,19 +933,30 @@ static int truncate_dnode(struct dnode_of_data *dn)
return 1;
/* get direct node */
page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
page = f2fs_get_node_page(sbi, dn->nid);
if (PTR_ERR(page) == -ENOENT)
return 1;
else if (IS_ERR(page))
return PTR_ERR(page);
if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
dn->inode->i_ino, dn->nid, ino_of_node(page));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
f2fs_put_page(page, 1);
return -EFSCORRUPTED;
}
/* Make dnode_of_data for parameter */
dn->node_page = page;
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
err = truncate_node(dn);
if (err)
if (err) {
f2fs_put_page(page, 1);
return err;
}
return 1;
}
......@@ -1596,6 +1608,9 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
trace_f2fs_writepage(page, NODE);
if (unlikely(f2fs_cp_error(sbi))) {
/* keep node pages in remount-ro mode */
if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
goto redirty_out;
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
......@@ -2063,7 +2078,6 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
struct list_head *head = &sbi->fsync_node_list;
unsigned long flags;
unsigned int cur_seq_id = 0;
int ret2, ret = 0;
while (seq_id && cur_seq_id < seq_id) {
spin_lock_irqsave(&sbi->fsync_node_lock, flags);
......@@ -2084,16 +2098,9 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
f2fs_wait_on_page_writeback(page, NODE, true, false);
put_page(page);
if (ret)
break;
}
ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
return ret;
return filemap_check_errors(NODE_MAPPING(sbi));
}
static int f2fs_write_node_pages(struct address_space *mapping,
......@@ -3065,7 +3072,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
struct f2fs_journal *journal = curseg->journal;
struct nat_entry_set *setvec[SETVEC_SIZE];
struct nat_entry_set *setvec[NAT_VEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
nid_t set_idx = 0;
......@@ -3098,7 +3105,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
set_idx, NAT_VEC_SIZE, setvec))) {
unsigned idx;
set_idx = setvec[found - 1]->set + 1;
......@@ -3319,8 +3326,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next_i;
struct nat_entry *natvec[NATVEC_SIZE];
struct nat_entry_set *setvec[SETVEC_SIZE];
void *vec[NAT_VEC_SIZE];
struct nat_entry **natvec = (struct nat_entry **)vec;
struct nat_entry_set **setvec = (struct nat_entry_set **)vec;
nid_t nid = 0;
unsigned int found;
......@@ -3343,7 +3351,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy nat cache */
f2fs_down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
nid, NAT_VEC_SIZE, natvec))) {
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
......@@ -3359,8 +3367,9 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
/* destroy nat set cache */
nid = 0;
memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE);
while ((found = __gang_lookup_nat_set(nm_i,
nid, SETVEC_SIZE, setvec))) {
nid, NAT_VEC_SIZE, setvec))) {
unsigned idx;
nid = setvec[found - 1]->set + 1;
......
......@@ -35,8 +35,7 @@
#define DEF_RF_NODE_BLOCKS 0
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
#define SETVEC_SIZE 32
#define NAT_VEC_SIZE 32
/* return value for read_node_page */
#define LOCKED_PAGE 1
......
......@@ -360,21 +360,63 @@ static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
return ra_blocks;
}
/* Detect looped node chain with Floyd's cycle detection algorithm. */
static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
block_t *blkaddr_fast, bool *is_detecting)
{
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
struct page *page = NULL;
int i;
if (!*is_detecting)
return 0;
for (i = 0; i < 2; i++) {
if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) {
*is_detecting = false;
return 0;
}
page = f2fs_get_tmp_page(sbi, *blkaddr_fast);
if (IS_ERR(page))
return PTR_ERR(page);
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
*is_detecting = false;
return 0;
}
ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast,
next_blkaddr_of_node(page));
*blkaddr_fast = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks);
}
if (*blkaddr_fast == blkaddr) {
f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u."
" Run fsck to fix it.", __func__, blkaddr);
return -EINVAL;
}
return 0;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
struct curseg_info *curseg;
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
valid_user_blocks(sbi);
block_t blkaddr, blkaddr_fast;
bool is_detecting = true;
int err = 0;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
blkaddr_fast = blkaddr;
while (1) {
struct fsync_inode_entry *entry;
......@@ -418,10 +460,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
if (err == -ENOENT) {
err = 0;
if (err == -ENOENT)
goto next;
}
f2fs_put_page(page, 1);
break;
}
......@@ -431,25 +471,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
if (IS_INODE(page) && is_dent_dnode(page))
entry->last_dentry = blkaddr;
next:
/* sanity check in order to detect looped node chain */
if (++loop_cnt >= free_blocks ||
blkaddr == next_blkaddr_of_node(page)) {
f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
__func__, blkaddr,
next_blkaddr_of_node(page));
f2fs_put_page(page, 1);
err = -EINVAL;
break;
}
ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
next_blkaddr_of_node(page));
/* check next segment */
blkaddr = next_blkaddr_of_node(page);
f2fs_put_page(page, 1);
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast,
&is_detecting);
if (err)
break;
}
return err;
}
......
......@@ -1196,6 +1196,45 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len);
#ifdef CONFIG_BLK_DEV_ZONED
static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc, blk_opf_t flag,
struct list_head *wait_list,
unsigned int *issued)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct block_device *bdev = dc->bdev;
struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
unsigned long flags;
trace_f2fs_issue_reset_zone(bdev, dc->di.start);
spin_lock_irqsave(&dc->lock, flags);
dc->state = D_SUBMIT;
dc->bio_ref++;
spin_unlock_irqrestore(&dc->lock, flags);
if (issued)
(*issued)++;
atomic_inc(&dcc->queued_discard);
dc->queued++;
list_move_tail(&dc->list, wait_list);
/* sanity check on discard range */
__check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
bio->bi_private = dc;
bio->bi_end_io = f2fs_submit_discard_endio;
submit_bio(bio);
atomic_inc(&dcc->issued_discard);
f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
}
#endif
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
......@@ -1217,6 +1256,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
return 0;
#ifdef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
__submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued);
return 0;
}
#endif
trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
lstart = dc->di.lstart;
......@@ -1461,6 +1507,19 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
}
}
#ifdef CONFIG_BLK_DEV_ZONED
static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t lblkstart,
block_t blklen)
{
trace_f2fs_queue_reset_zone(bdev, blkstart);
mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
}
#endif
static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t blkstart, block_t blklen)
{
......@@ -1724,6 +1783,19 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
mutex_lock(&dcc->cmd_lock);
dc = __lookup_discard_cmd(sbi, blkaddr);
#ifdef CONFIG_BLK_DEV_ZONED
if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
/* force submit zone reset */
if (dc->state == D_PREP)
__submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
&dcc->wait_list, NULL);
dc->ref++;
mutex_unlock(&dcc->cmd_lock);
/* wait zone reset */
__wait_one_discard_bio(sbi, dc);
return;
}
#endif
if (dc) {
if (dc->state == D_PREP) {
__punch_discard_cmd(sbi, dc, blkaddr);
......@@ -1876,9 +1948,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
sector, nr_sects, GFP_NOFS);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
trace_f2fs_issue_reset_zone(bdev, blkstart);
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
sector, nr_sects, GFP_NOFS);
}
__queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
return 0;
}
/* For conventional zones, use regular discard if supported */
......@@ -2115,7 +2193,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
len = next_pos - cur_pos;
if (f2fs_sb_has_blkzoned(sbi) ||
(force && len < cpc->trim_minlen))
!force || len < cpc->trim_minlen)
goto skip;
f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
......@@ -4768,17 +4846,17 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
{
unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
block_t zone_block, wp_block, last_valid_block;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
int i, s, b, ret;
struct seg_entry *se;
if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block);
wp_segno = GET_SEGNO(sbi, wp_block);
wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
zone_block = fdev->start_blk + (zone->start >>
sbi->log_sectors_per_block);
zone_segno = GET_SEGNO(sbi, zone_block);
zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
......@@ -4811,39 +4889,52 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
}
/*
* If last valid block is beyond the write pointer, report the
* inconsistency. This inconsistency does not cause write error
* because the zone will not be selected for write operation until
* it get discarded. Just report it.
* The write pointer matches with the valid blocks or
* already points to the end of the zone.
*/
if (last_valid_block >= wp_block) {
f2fs_notice(sbi, "Valid block beyond write pointer: "
"valid block[0x%x,0x%x] wp[0x%x,0x%x]",
GET_SEGNO(sbi, last_valid_block),
GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
wp_segno, wp_blkoff);
if ((last_valid_block + 1 == wp_block) ||
(zone->wp == zone->start + zone->len))
return 0;
}
/*
* If there is no valid block in the zone and if write pointer is
* not at zone start, reset the write pointer.
*/
if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
if (last_valid_block + 1 == zone_block) {
/*
* If there is no valid block in the zone and if write pointer
* is not at zone start, reset the write pointer.
*/
f2fs_notice(sbi,
"Zone without valid block has non-zero write "
"pointer. Reset the write pointer: wp[0x%x,0x%x]",
wp_segno, wp_blkoff);
ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
zone->len >> log_sectors_per_block);
if (ret) {
zone->len >> sbi->log_sectors_per_block);
if (ret)
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
fdev->path, ret);
return ret;
}
return ret;
}
return 0;
/*
* If there are valid blocks and the write pointer doesn't
* match with them, we need to report the inconsistency and
* fill the zone till the end to close the zone. This inconsistency
* does not cause write error because the zone will not be selected
* for write operation until it get discarded.
*/
f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
"valid block[0x%x,0x%x] wp[0x%x,0x%x]",
GET_SEGNO(sbi, last_valid_block),
GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
wp_segno, wp_blkoff);
ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
zone->len - (zone->wp - zone->start),
GFP_NOFS, 0);
if (ret)
f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
fdev->path, ret);
return ret;
}
static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
......@@ -4876,7 +4967,6 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
struct blk_zone zone;
unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
block_t cs_zone_block, wp_block;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
sector_t zone_sector;
int err;
......@@ -4888,8 +4978,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
return 0;
/* report zone for the sector the curseg points to */
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
<< log_sectors_per_block;
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
sbi->log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
......@@ -4901,10 +4991,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block);
wp_segno = GET_SEGNO(sbi, wp_block);
wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0);
if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
wp_sector_off == 0)
......@@ -4931,8 +5021,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (!zbd)
return 0;
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
<< log_sectors_per_block;
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) <<
sbi->log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
......@@ -4950,7 +5040,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
"Reset the zone: curseg[0x%x,0x%x]",
type, cs->segno, cs->next_blkoff);
err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block,
zone.len >> log_sectors_per_block);
zone.len >> sbi->log_sectors_per_block);
if (err) {
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
zbd->path, err);
......
This diff is collapsed.
This diff is collapsed.
......@@ -528,10 +528,12 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
if (!ipage)
f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr, &base_size, &is_inline);
f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (!ipage)
f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
......
......@@ -83,6 +83,7 @@ struct f2fs_xattr_entry {
sizeof(struct f2fs_xattr_header) - \
sizeof(struct f2fs_xattr_entry))
#define MIN_INLINE_XATTR_SIZE (sizeof(struct f2fs_xattr_header) / sizeof(__le32))
#define MAX_INLINE_XATTR_SIZE \
(DEF_ADDRS_PER_INODE - \
F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \
......
......@@ -103,6 +103,7 @@ enum f2fs_error {
ERROR_INCONSISTENT_SIT,
ERROR_CORRUPTED_VERITY_XATTR,
ERROR_CORRUPTED_XATTR,
ERROR_INVALID_NODE_REFERENCE,
ERROR_MAX,
};
......
......@@ -1512,7 +1512,7 @@ DEFINE_EVENT(f2fs_discard, f2fs_remove_discard,
TP_ARGS(dev, blkstart, blklen)
);
TRACE_EVENT(f2fs_issue_reset_zone,
DECLARE_EVENT_CLASS(f2fs_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
......@@ -1528,11 +1528,25 @@ TRACE_EVENT(f2fs_issue_reset_zone,
__entry->blkstart = blkstart;
),
TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
TP_printk("dev = (%d,%d), zone at block = 0x%llx",
show_dev(__entry->dev),
(unsigned long long)__entry->blkstart)
);
DEFINE_EVENT(f2fs_reset_zone, f2fs_queue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
TP_ARGS(dev, blkstart)
);
DEFINE_EVENT(f2fs_reset_zone, f2fs_issue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
TP_ARGS(dev, blkstart)
);
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct block_device *dev, unsigned int nobarrier,
......@@ -1979,6 +1993,7 @@ TRACE_EVENT(f2fs_iostat,
__field(unsigned long long, fs_nrio)
__field(unsigned long long, fs_mrio)
__field(unsigned long long, fs_discard)
__field(unsigned long long, fs_reset_zone)
),
TP_fast_assign(
......@@ -2010,12 +2025,14 @@ TRACE_EVENT(f2fs_iostat,
__entry->fs_nrio = iostat[FS_NODE_READ_IO];
__entry->fs_mrio = iostat[FS_META_READ_IO];
__entry->fs_discard = iostat[FS_DISCARD_IO];
__entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO];
),
TP_printk("dev = (%d,%d), "
"app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu, "
"compr(buffered=%llu, mapped=%llu)], "
"fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu], "
"fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu, "
"reset_zone=%llu], "
"gc [data=%llu, node=%llu], "
"cp [data=%llu, node=%llu, meta=%llu], "
"app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
......@@ -2026,6 +2043,7 @@ TRACE_EVENT(f2fs_iostat,
__entry->app_bio, __entry->app_mio, __entry->app_bcdio,
__entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio,
__entry->fs_nio, __entry->fs_mio, __entry->fs_discard,
__entry->fs_reset_zone,
__entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio,
__entry->fs_cp_nio, __entry->fs_cp_mio,
__entry->app_rio, __entry->app_drio, __entry->app_brio,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment