Commit 42612e77 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've added some knobs to enhance compression feature
  and harden testing environment. In addition, we've fixed several bugs
  reported from Android devices such as long discarding latency, device
  hanging during quota_sync, etc.

  Enhancements:
   - support lzo-rle algorithm
   - add two ioctls to release and reserve blocks for compression
   - support partial truncation/fiemap on compressed file
   - introduce sysfs entries to attach IO flags explicitly
   - add iostat trace point along with read io stat

  Bug fixes:
   - fix long discard latency
   - flush quota data by f2fs_quota_sync correctly
   - fix to recover parent inode number for power-cut recovery
   - fix lz4/zstd output buffer budget
   - parse checkpoint mount option correctly
   - avoid inifinite loop to wait for flushing node/meta pages
   - manage discard space correctly

  And some refactoring and clean up patches were added"

* tag 'f2fs-for-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (51 commits)
  f2fs: attach IO flags to the missing cases
  f2fs: add node_io_flag for bio flags likewise data_io_flag
  f2fs: remove unused parameter of f2fs_put_rpages_mapping()
  f2fs: handle readonly filesystem in f2fs_ioc_shutdown()
  f2fs: avoid utf8_strncasecmp() with unstable name
  f2fs: don't return vmalloc() memory from f2fs_kmalloc()
  f2fs: fix retry logic in f2fs_write_cache_pages()
  f2fs: fix wrong discard space
  f2fs: compress: don't compress any datas after cp stop
  f2fs: remove unneeded return value of __insert_discard_tree()
  f2fs: fix wrong value of tracepoint parameter
  f2fs: protect new segment allocation in expand_inode_data
  f2fs: code cleanup by removing ifdef macro surrounding
  f2fs: avoid inifinite loop to wait for flushing node pages at cp_error
  f2fs: flush dirty meta pages when flushing them
  f2fs: fix checkpoint=disable:%u%%
  f2fs: compress: fix zstd data corruption
  f2fs: add compressed/gc data read IO stat
  f2fs: fix potential use-after-free issue
  f2fs: compress: don't handle non-compressed data in workqueue
  ...
parents ad57a102 b7b911d5
......@@ -323,3 +323,27 @@ What: /sys/fs/f2fs/<disk>/mounted_time_sec
Date: February 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Show the mounted time in secs of this partition.
What: /sys/fs/f2fs/<disk>/data_io_flag
Date: April 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Give a way to attach REQ_META|FUA to data writes
given temperature-based bits. Now the bits indicate:
* REQ_META | REQ_FUA |
* 5 | 4 | 3 | 2 | 1 | 0 |
* Cold | Warm | Hot | Cold | Warm | Hot |
What: /sys/fs/f2fs/<disk>/node_io_flag
Date: June 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Give a way to attach REQ_META|FUA to node writes
given temperature-based bits. Now the bits indicate:
* REQ_META | REQ_FUA |
* 5 | 4 | 3 | 2 | 1 | 0 |
* Cold | Warm | Hot | Cold | Warm | Hot |
What: /sys/fs/f2fs/<disk>/iostat_period_ms
Date: April 2020
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: Give a way to change iostat_period time. 3secs by default.
The new iostat trace gives stats gap given the period.
......@@ -248,7 +248,7 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "enabl
would be unusable can be viewed at /sys/fs/f2fs/<disk>/unusable
This space is reclaimed once checkpoint=enable.
compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo",
"lz4" and "zstd" algorithm.
"lz4", "zstd" and "lzo-rle" algorithm.
compress_log_size=%u Support configuring compress cluster size, the size will
be 4KB * (1 << %u), 16KB is minimum size, also it's
default size.
......
......@@ -127,3 +127,13 @@ config F2FS_FS_ZSTD
default y
help
Support ZSTD compress algorithm, if unsure, say Y.
config F2FS_FS_LZORLE
bool "LZO-RLE compression support"
depends on F2FS_FS_COMPRESSION
depends on F2FS_FS_LZO
select LZO_COMPRESS
select LZO_DECOMPRESS
default y
help
Support LZO-RLE compress algorithm, if unsure, say Y.
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/acl.h
*
......
......@@ -86,6 +86,8 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
return ERR_PTR(err);
}
f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
lock_page(page);
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
......@@ -220,6 +222,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.is_por = (type == META_POR),
};
struct blk_plug plug;
int err;
if (unlikely(type == META_POR))
fio.op_flags &= ~REQ_META;
......@@ -263,8 +266,11 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
f2fs_submit_page_bio(&fio);
f2fs_put_page(page, 0);
err = f2fs_submit_page_bio(&fio);
f2fs_put_page(page, err ? 1 : 0);
if (!err)
f2fs_update_iostat(sbi, FS_META_READ_IO, F2FS_BLKSIZE);
}
out:
blk_finish_plug(&plug);
......@@ -889,8 +895,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
int i;
int err;
sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
GFP_KERNEL);
sbi->ckpt = f2fs_kvzalloc(sbi, array_size(blk_size, cp_blks),
GFP_KERNEL);
if (!sbi->ckpt)
return -ENOMEM;
/*
......@@ -1160,10 +1166,12 @@ static int block_operations(struct f2fs_sb_info *sbi)
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
struct blk_plug plug;
int err = 0, cnt = 0;
blk_start_plug(&plug);
/*
* Let's flush inline_data in dirty node pages.
*/
f2fs_flush_inline_data(sbi);
retry_flush_quotas:
f2fs_lock_all(sbi);
......@@ -1192,7 +1200,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
err = f2fs_sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
return err;
cond_resched();
goto retry_flush_quotas;
}
......@@ -1208,7 +1216,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
return err;
cond_resched();
goto retry_flush_quotas;
}
......@@ -1224,7 +1232,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
goto out;
return err;
}
cond_resched();
goto retry_flush_nodes;
......@@ -1236,8 +1244,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
out:
blk_finish_plug(&plug);
return err;
}
......@@ -1260,6 +1266,9 @@ void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type)
if (unlikely(f2fs_cp_error(sbi)))
break;
if (type == F2FS_DIRTY_META)
f2fs_sync_meta_pages(sbi, META, LONG_MAX,
FS_CP_META_IO);
io_schedule_timeout(DEFAULT_IO_TIMEOUT);
}
finish_wait(&sbi->cp_wait, &wait);
......@@ -1553,7 +1562,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return 0;
f2fs_warn(sbi, "Start checkpoint disabled!");
}
mutex_lock(&sbi->cp_mutex);
if (cpc->reason != CP_RESIZE)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
......@@ -1622,7 +1632,8 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_update_time(sbi, CP_TIME);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
mutex_unlock(&sbi->cp_mutex);
if (cpc->reason != CP_RESIZE)
mutex_unlock(&sbi->cp_mutex);
return err;
}
......
......@@ -65,15 +65,6 @@ static void f2fs_set_compressed_page(struct page *page,
page->mapping = inode->i_mapping;
}
static void f2fs_put_compressed_page(struct page *page)
{
set_page_private(page, (unsigned long)NULL);
ClearPagePrivate(page);
page->mapping = NULL;
unlock_page(page);
put_page(page);
}
static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock)
{
int i;
......@@ -98,8 +89,7 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
f2fs_drop_rpages(cc, len, true);
}
static void f2fs_put_rpages_mapping(struct compress_ctx *cc,
struct address_space *mapping,
static void f2fs_put_rpages_mapping(struct address_space *mapping,
pgoff_t start, int len)
{
int i;
......@@ -236,7 +226,12 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
if (!cc->private)
return -ENOMEM;
cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size);
/*
* we do not change cc->clen to LZ4_compressBound(inputsize) to
* adapt worst compress case, because lz4 compressor can handle
* output budget properly.
*/
cc->clen = cc->rlen - PAGE_SIZE - COMPRESS_HEADER_SIZE;
return 0;
}
......@@ -252,11 +247,9 @@ static int lz4_compress_pages(struct compress_ctx *cc)
len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen,
cc->clen, cc->private);
if (!len) {
printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n",
KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id);
return -EIO;
}
if (!len)
return -EAGAIN;
cc->clen = len;
return 0;
}
......@@ -366,6 +359,13 @@ static int zstd_compress_pages(struct compress_ctx *cc)
return -EIO;
}
/*
* there is compressed data remained in intermediate buffer due to
* no more space in cbuf.cdata
*/
if (ret)
return -EAGAIN;
cc->clen = outbuf.pos;
return 0;
}
......@@ -451,6 +451,31 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = {
};
#endif
#ifdef CONFIG_F2FS_FS_LZO
#ifdef CONFIG_F2FS_FS_LZORLE
static int lzorle_compress_pages(struct compress_ctx *cc)
{
int ret;
ret = lzorle1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata,
&cc->clen, cc->private);
if (ret != LZO_E_OK) {
printk_ratelimited("%sF2FS-fs (%s): lzo-rle compress failed, ret:%d\n",
KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret);
return -EIO;
}
return 0;
}
static const struct f2fs_compress_ops f2fs_lzorle_ops = {
.init_compress_ctx = lzo_init_compress_ctx,
.destroy_compress_ctx = lzo_destroy_compress_ctx,
.compress_pages = lzorle_compress_pages,
.decompress_pages = lzo_decompress_pages,
};
#endif
#endif
static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = {
#ifdef CONFIG_F2FS_FS_LZO
&f2fs_lzo_ops,
......@@ -467,6 +492,11 @@ static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = {
#else
NULL,
#endif
#if defined(CONFIG_F2FS_FS_LZO) && defined(CONFIG_F2FS_FS_LZORLE)
&f2fs_lzorle_ops,
#else
NULL,
#endif
};
bool f2fs_is_compress_backend_ready(struct inode *inode)
......@@ -476,17 +506,47 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
}
static struct page *f2fs_grab_page(void)
static mempool_t *compress_page_pool = NULL;
static int num_compress_pages = 512;
module_param(num_compress_pages, uint, 0444);
MODULE_PARM_DESC(num_compress_pages,
"Number of intermediate compress pages to preallocate");
int f2fs_init_compress_mempool(void)
{
compress_page_pool = mempool_create_page_pool(num_compress_pages, 0);
if (!compress_page_pool)
return -ENOMEM;
return 0;
}
void f2fs_destroy_compress_mempool(void)
{
mempool_destroy(compress_page_pool);
}
static struct page *f2fs_compress_alloc_page(void)
{
struct page *page;
page = alloc_page(GFP_NOFS);
if (!page)
return NULL;
page = mempool_alloc(compress_page_pool, GFP_NOFS);
lock_page(page);
return page;
}
static void f2fs_compress_free_page(struct page *page)
{
if (!page)
return;
set_page_private(page, (unsigned long)NULL);
ClearPagePrivate(page);
page->mapping = NULL;
unlock_page(page);
mempool_free(page, compress_page_pool);
}
static int f2fs_compress_pages(struct compress_ctx *cc)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
......@@ -516,7 +576,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
for (i = 0; i < cc->nr_cpages; i++) {
cc->cpages[i] = f2fs_grab_page();
cc->cpages[i] = f2fs_compress_alloc_page();
if (!cc->cpages[i]) {
ret = -ENOMEM;
goto out_free_cpages;
......@@ -561,7 +621,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
vunmap(cc->rbuf);
for (i = nr_cpages; i < cc->nr_cpages; i++) {
f2fs_put_compressed_page(cc->cpages[i]);
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
......@@ -581,7 +641,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
f2fs_put_compressed_page(cc->cpages[i]);
f2fs_compress_free_page(cc->cpages[i]);
}
kfree(cc->cpages);
cc->cpages = NULL;
......@@ -788,6 +848,8 @@ static bool cluster_may_compress(struct compress_ctx *cc)
return false;
if (!f2fs_cluster_is_full(cc))
return false;
if (unlikely(f2fs_cp_error(F2FS_I_SB(cc->inode))))
return false;
return __cluster_may_compress(cc);
}
......@@ -879,7 +941,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
if (!PageUptodate(page)) {
f2fs_unlock_rpages(cc, i + 1);
f2fs_put_rpages_mapping(cc, mapping, start_idx,
f2fs_put_rpages_mapping(mapping, start_idx,
cc->cluster_size);
f2fs_destroy_compress_ctx(cc);
goto retry;
......@@ -914,7 +976,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
unlock_pages:
f2fs_unlock_rpages(cc, i);
release_pages:
f2fs_put_rpages_mapping(cc, mapping, start_idx, i);
f2fs_put_rpages_mapping(mapping, start_idx, i);
f2fs_destroy_compress_ctx(cc);
return ret;
}
......@@ -954,6 +1016,55 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
return first_index;
}
int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock)
{
void *fsdata = NULL;
struct page *pagep;
int log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
pgoff_t start_idx = from >> (PAGE_SHIFT + log_cluster_size) <<
log_cluster_size;
int err;
err = f2fs_is_compressed_cluster(inode, start_idx);
if (err < 0)
return err;
/* truncate normal cluster */
if (!err)
return f2fs_do_truncate_blocks(inode, from, lock);
/* truncate compressed cluster */
err = f2fs_prepare_compress_overwrite(inode, &pagep,
start_idx, &fsdata);
/* should not be a normal cluster */
f2fs_bug_on(F2FS_I_SB(inode), err == 0);
if (err <= 0)
return err;
if (err > 0) {
struct page **rpages = fsdata;
int cluster_size = F2FS_I(inode)->i_cluster_size;
int i;
for (i = cluster_size - 1; i >= 0; i--) {
loff_t start = rpages[i]->index << PAGE_SHIFT;
if (from <= start) {
zero_user_segment(rpages[i], 0, PAGE_SIZE);
} else {
zero_user_segment(rpages[i], from - start,
PAGE_SIZE);
break;
}
}
f2fs_compress_write_end(inode, fsdata, start_idx, true);
}
return 0;
}
static int f2fs_write_compressed_pages(struct compress_ctx *cc,
int *submitted,
struct writeback_control *wbc,
......@@ -985,7 +1096,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
loff_t psize;
int i, err;
if (!f2fs_trylock_op(sbi))
if (!IS_NOQUOTA(inode) && !f2fs_trylock_op(sbi))
return -EAGAIN;
set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
......@@ -1092,7 +1203,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
if (!IS_NOQUOTA(inode))
f2fs_unlock_op(sbi);
spin_lock(&fi->i_size_lock);
if (fi->last_disk_size < psize)
......@@ -1118,7 +1230,8 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
f2fs_unlock_op(sbi);
if (!IS_NOQUOTA(inode))
f2fs_unlock_op(sbi);
return -EAGAIN;
}
......@@ -1132,7 +1245,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
if (unlikely(bio->bi_status))
mapping_set_error(cic->inode->i_mapping, -EIO);
f2fs_put_compressed_page(page);
f2fs_compress_free_page(page);
dec_page_count(sbi, F2FS_WB_DATA);
......@@ -1293,7 +1406,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page;
page = f2fs_grab_page();
page = f2fs_compress_alloc_page();
if (!page)
goto out_free;
......@@ -1313,7 +1426,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
continue;
}
dic->tpages[i] = f2fs_grab_page();
dic->tpages[i] = f2fs_compress_alloc_page();
if (!dic->tpages[i])
goto out_free;
}
......@@ -1335,8 +1448,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
if (!dic->tpages[i])
continue;
unlock_page(dic->tpages[i]);
put_page(dic->tpages[i]);
f2fs_compress_free_page(dic->tpages[i]);
}
kfree(dic->tpages);
}
......@@ -1345,7 +1457,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
for (i = 0; i < dic->nr_cpages; i++) {
if (!dic->cpages[i])
continue;
f2fs_put_compressed_page(dic->cpages[i]);
f2fs_compress_free_page(dic->cpages[i]);
}
kfree(dic->cpages);
}
......
......@@ -115,7 +115,8 @@ static enum count_type __read_io_type(struct page *page)
/* postprocessing steps for read bios */
enum bio_post_read_step {
STEP_DECRYPT,
STEP_DECOMPRESS,
STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
STEP_VERITY,
};
......@@ -514,6 +515,34 @@ void f2fs_submit_bio(struct f2fs_sb_info *sbi,
__submit_bio(sbi, bio, type);
}
static void __attach_io_flag(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
unsigned int io_flag, fua_flag, meta_flag;
if (fio->type == DATA)
io_flag = sbi->data_io_flag;
else if (fio->type == NODE)
io_flag = sbi->node_io_flag;
else
return;
fua_flag = io_flag & temp_mask;
meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
/*
* data/node io flag bits per temp:
* REQ_META | REQ_FUA |
* 5 | 4 | 3 | 2 | 1 | 0 |
* Cold | Warm | Hot | Cold | Warm | Hot |
*/
if ((1 << fio->temp) & meta_flag)
fio->op_flags |= REQ_META;
if ((1 << fio->temp) & fua_flag)
fio->op_flags |= REQ_FUA;
}
static void __submit_merged_bio(struct f2fs_bio_info *io)
{
struct f2fs_io_info *fio = &io->fio;
......@@ -521,6 +550,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
if (!io->bio)
return;
__attach_io_flag(fio);
bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
if (is_read_io(fio->op))
......@@ -662,6 +692,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (fio->io_wbc && !is_read_io(fio->op))
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
__attach_io_flag(fio);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
......@@ -848,6 +879,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_PAGES);
__attach_io_flag(fio);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
add_bio_entry(fio->sbi, bio, page, fio->temp);
......@@ -968,7 +1000,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
if (f2fs_compressed_file(inode))
post_read_steps |= 1 << STEP_DECOMPRESS;
post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
if (f2fs_need_verity(inode, first_idx))
post_read_steps |= 1 << STEP_VERITY;
......@@ -1011,6 +1043,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
}
ClearPageError(page);
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
__submit_bio(sbi, bio, DATA);
return 0;
}
......@@ -1809,6 +1842,25 @@ static int f2fs_xattr_fiemap(struct inode *inode,
return (err < 0 ? err : 0);
}
static loff_t max_inode_blocks(struct inode *inode)
{
loff_t result = ADDRS_PER_INODE(inode);
loff_t leaf_count = ADDRS_PER_BLOCK(inode);
/* two direct node blocks */
result += (leaf_count * 2);
/* two indirect node blocks */
leaf_count *= NIDS_PER_BLOCK;
result += (leaf_count * 2);
/* one double indirect node block */
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
return result;
}
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
......@@ -1818,6 +1870,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
int ret = 0;
bool compr_cluster = false;
unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
ret = f2fs_precache_extents(inode);
......@@ -1852,6 +1906,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
if (compr_cluster)
map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
ret = get_data_block(inode, start_blk, &map_bh, 0,
F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
if (ret)
......@@ -1862,7 +1919,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
start_blk = next_pgofs;
if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
F2FS_I_SB(inode)->max_file_blocks))
max_inode_blocks(inode)))
goto prep_next;
flags |= FIEMAP_EXTENT_LAST;
......@@ -1874,11 +1931,38 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
if (ret)
goto out;
size = 0;
}
if (start_blk > last_blk || ret)
if (start_blk > last_blk)
goto out;
if (compr_cluster) {
compr_cluster = false;
logical = blk_to_logical(inode, start_blk - 1);
phys = blk_to_logical(inode, map_bh.b_blocknr);
size = blk_to_logical(inode, cluster_size);
flags |= FIEMAP_EXTENT_ENCODED;
start_blk += cluster_size - 1;
if (start_blk > last_blk)
goto out;
goto prep_next;
}
if (map_bh.b_blocknr == COMPRESS_ADDR) {
compr_cluster = true;
start_blk++;
goto prep_next;
}
logical = blk_to_logical(inode, start_blk);
phys = blk_to_logical(inode, map_bh.b_blocknr);
size = map_bh.b_size;
......@@ -2016,6 +2100,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
goto submit_and_realloc;
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = block_nr;
goto out;
......@@ -2114,6 +2199,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
for (i = 0; i < dic->nr_cpages; i++) {
struct page *page = dic->cpages[i];
block_t blkaddr;
struct bio_post_read_ctx *ctx;
blkaddr = data_blkaddr(dn.inode, dn.node_page,
dn.ofs_in_node + i + 1);
......@@ -2131,16 +2217,16 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
page->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
bio = NULL;
dic->failed = true;
if (refcount_sub_and_test(dic->nr_cpages - i,
&dic->ref))
&dic->ref)) {
f2fs_decompress_end_io(dic->rpages,
cc->cluster_size, true,
false);
f2fs_free_dic(dic);
f2fs_free_dic(dic);
}
f2fs_put_dnode(&dn);
*bio_ret = bio;
*bio_ret = NULL;
return ret;
}
}
......@@ -2150,7 +2236,14 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
if (bio_add_page(bio, page, blocksize, 0) < blocksize)
goto submit_and_realloc;
/* tag STEP_DECOMPRESS to handle IO in wq */
ctx = bio->bi_private;
if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
inc_page_count(sbi, F2FS_RD_DATA);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
ClearPageError(page);
*last_block_in_bio = blkaddr;
}
......@@ -2624,8 +2717,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
/* Dentry blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode)) {
/* Dentry/quota blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
fio.need_lock = LOCK_DONE;
err = f2fs_do_write_data_page(&fio);
goto done;
......@@ -2767,7 +2860,6 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
int cycled;
int range_whole = 0;
xa_mark_t tag;
int nwritten = 0;
......@@ -2785,17 +2877,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
if (index == 0)
cycled = 1;
else
cycled = 0;
end = -1;
} else {
index = wbc->range_start >> PAGE_SHIFT;
end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
}
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
......@@ -2960,12 +3047,13 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
}
}
#endif
if ((!cycled && !done) || retry) {
cycled = 1;
if (retry) {
index = 0;
end = writeback_index - 1;
end = -1;
goto retry;
}
if (wbc->range_cyclic && !done)
done_index = 0;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
......@@ -3494,6 +3582,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
} else {
if (err > 0)
f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
}
out:
......@@ -3577,6 +3668,37 @@ static int f2fs_set_data_page_dirty(struct page *page)
return 0;
}
static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
{
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct dnode_of_data dn;
sector_t start_idx, blknr = 0;
int ret;
start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
if (ret)
return 0;
if (dn.data_blkaddr != COMPRESS_ADDR) {
dn.ofs_in_node += block - start_idx;
blknr = f2fs_data_blkaddr(&dn);
if (!__is_valid_data_blkaddr(blknr))
blknr = 0;
}
f2fs_put_dnode(&dn);
return blknr;
#else
return -EOPNOTSUPP;
#endif
}
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
......@@ -3588,6 +3710,9 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
filemap_write_and_wait(mapping);
if (f2fs_compressed_file(inode))
return f2fs_bmap_compress(inode, block);
return generic_block_bmap(mapping, block, get_data_block_bmap);
}
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -13,6 +13,7 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include "f2fs.h"
#include "node.h"
......@@ -737,6 +738,10 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
goto put_encrypted_page;
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_update_iostat(sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, 1);
......@@ -840,6 +845,10 @@ static int move_data_block(struct inode *inode, block_t bidx,
f2fs_put_page(mpage, 1);
goto up_out;
}
f2fs_update_iostat(fio.sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
f2fs_update_iostat(fio.sbi, FS_GDATA_READ_IO, F2FS_BLKSIZE);
lock_page(mpage);
if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
!PageUptodate(mpage))) {
......@@ -1399,12 +1408,29 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
unsigned int end)
static int free_segment_range(struct f2fs_sb_info *sbi,
unsigned int secs, bool gc_only)
{
int type;
unsigned int segno, next_inuse;
unsigned int segno, next_inuse, start, end;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
int gc_mode, gc_type;
int err = 0;
int type;
/* Force block allocation for GC */
MAIN_SECS(sbi) -= secs;
start = MAIN_SECS(sbi) * sbi->segs_per_sec;
end = MAIN_SEGS(sbi) - 1;
mutex_lock(&DIRTY_I(sbi)->seglist_lock);
for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
if (SIT_I(sbi)->last_victim[gc_mode] >= start)
SIT_I(sbi)->last_victim[gc_mode] = 0;
for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
if (sbi->next_victim_seg[gc_type] >= start)
sbi->next_victim_seg[gc_type] = NULL_SEGNO;
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
......@@ -1417,18 +1443,24 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
down_write(&sbi->gc_lock);
do_garbage_collect(sbi, segno, &gc_list, FG_GC);
up_write(&sbi->gc_lock);
put_gc_inode(&gc_list);
if (get_valid_blocks(sbi, segno, true))
return -EAGAIN;
if (!gc_only && get_valid_blocks(sbi, segno, true)) {
err = -EAGAIN;
goto out;
}
if (fatal_signal_pending(current)) {
err = -ERESTARTSYS;
goto out;
}
}
if (gc_only)
goto out;
err = f2fs_sync_fs(sbi->sb, 1);
err = f2fs_write_checkpoint(sbi, &cpc);
if (err)
return err;
goto out;
next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
if (next_inuse <= end) {
......@@ -1436,6 +1468,8 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
next_inuse);
f2fs_bug_on(sbi, 1);
}
out:
MAIN_SECS(sbi) += secs;
return err;
}
......@@ -1481,6 +1515,7 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
MAIN_SECS(sbi) += secs;
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
......@@ -1502,8 +1537,8 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
{
__u64 old_block_count, shrunk_blocks;
struct cp_control cpc = { CP_RESIZE, 0, 0, 0 };
unsigned int secs;
int gc_mode, gc_type;
int err = 0;
__u32 rem;
......@@ -1538,10 +1573,27 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
return -EINVAL;
}
freeze_bdev(sbi->sb->s_bdev);
shrunk_blocks = old_block_count - block_count;
secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
/* stop other GC */
if (!down_write_trylock(&sbi->gc_lock))
return -EAGAIN;
/* stop CP to protect MAIN_SEC in free_segment_range */
f2fs_lock_op(sbi);
err = free_segment_range(sbi, secs, true);
f2fs_unlock_op(sbi);
up_write(&sbi->gc_lock);
if (err)
return err;
set_sbi_flag(sbi, SBI_IS_RESIZEFS);
freeze_super(sbi->sb);
down_write(&sbi->gc_lock);
mutex_lock(&sbi->cp_mutex);
spin_lock(&sbi->stat_lock);
if (shrunk_blocks + valid_user_blocks(sbi) +
sbi->current_reserved_blocks + sbi->unusable_block_count +
......@@ -1550,69 +1602,44 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
else
sbi->user_block_count -= shrunk_blocks;
spin_unlock(&sbi->stat_lock);
if (err) {
thaw_bdev(sbi->sb->s_bdev, sbi->sb);
return err;
}
mutex_lock(&sbi->resize_mutex);
set_sbi_flag(sbi, SBI_IS_RESIZEFS);
mutex_lock(&DIRTY_I(sbi)->seglist_lock);
MAIN_SECS(sbi) -= secs;
for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
if (SIT_I(sbi)->last_victim[gc_mode] >=
MAIN_SECS(sbi) * sbi->segs_per_sec)
SIT_I(sbi)->last_victim[gc_mode] = 0;
for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
if (sbi->next_victim_seg[gc_type] >=
MAIN_SECS(sbi) * sbi->segs_per_sec)
sbi->next_victim_seg[gc_type] = NULL_SEGNO;
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
if (err)
goto out_err;
err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
MAIN_SEGS(sbi) - 1);
err = free_segment_range(sbi, secs, false);
if (err)
goto out;
goto recover_out;
update_sb_metadata(sbi, -secs);
err = f2fs_commit_super(sbi, false);
if (err) {
update_sb_metadata(sbi, secs);
goto out;
goto recover_out;
}
mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, -secs);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
set_sbi_flag(sbi, SBI_IS_DIRTY);
mutex_unlock(&sbi->cp_mutex);
err = f2fs_sync_fs(sbi->sb, 1);
err = f2fs_write_checkpoint(sbi, &cpc);
if (err) {
mutex_lock(&sbi->cp_mutex);
update_fs_metadata(sbi, secs);
mutex_unlock(&sbi->cp_mutex);
update_sb_metadata(sbi, secs);
f2fs_commit_super(sbi, false);
}
out:
recover_out:
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
MAIN_SECS(sbi) += secs;
spin_lock(&sbi->stat_lock);
sbi->user_block_count += shrunk_blocks;
spin_unlock(&sbi->stat_lock);
}
out_err:
mutex_unlock(&sbi->cp_mutex);
up_write(&sbi->gc_lock);
thaw_super(sbi->sb);
clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
mutex_unlock(&sbi->resize_mutex);
thaw_bdev(sbi->sb->s_bdev, sbi->sb);
return err;
}
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/gc.h
*
......
......@@ -67,22 +67,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
struct fscrypt_name *fname)
static u32 TEA_hash_name(const u8 *p, size_t len)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
const unsigned char *p;
__u32 in[8], buf[4];
const unsigned char *name = name_info->name;
size_t len = name_info->len;
/* encrypted bigname case */
if (fname && !fname->disk_name.name)
return cpu_to_le32(fname->hash);
if (is_dot_dotdot(name_info))
return 0;
/* Initialize the default seed for the hash checksum functions */
buf[0] = 0x67452301;
......@@ -90,7 +77,6 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
buf[2] = 0x98badcfe;
buf[3] = 0x10325476;
p = name;
while (1) {
str2hashbuf(p, len, in, 4);
TEA_transform(buf, in);
......@@ -99,41 +85,43 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info,
break;
len -= 16;
}
hash = buf[0];
f2fs_hash = cpu_to_le32(hash & ~F2FS_HASH_COL_BIT);
return f2fs_hash;
return buf[0] & ~F2FS_HASH_COL_BIT;
}
f2fs_hash_t f2fs_dentry_hash(const struct inode *dir,
const struct qstr *name_info, struct fscrypt_name *fname)
/*
* Compute @fname->hash. For all directories, @fname->disk_name must be set.
* For casefolded directories, @fname->usr_fname must be set, and also
* @fname->cf_name if the filename is valid Unicode.
*/
void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
{
#ifdef CONFIG_UNICODE
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
const struct unicode_map *um = sbi->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr folded;
const u8 *name = fname->disk_name.name;
size_t len = fname->disk_name.len;
if (!name_info->len || !IS_CASEFOLDED(dir))
goto opaque_seq;
WARN_ON_ONCE(!name);
buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
return -ENOMEM;
dlen = utf8_casefold(um, name_info, buff, PATH_MAX);
if (dlen < 0) {
kvfree(buff);
goto opaque_seq;
if (is_dot_dotdot(name, len)) {
fname->hash = 0;
return;
}
folded.name = buff;
folded.len = dlen;
r = __f2fs_dentry_hash(&folded, fname);
kvfree(buff);
return r;
opaque_seq:
#ifdef CONFIG_UNICODE
if (IS_CASEFOLDED(dir)) {
/*
* If the casefolded name is provided, hash it instead of the
* on-disk name. If the casefolded name is *not* provided, that
* should only be because the name wasn't valid Unicode, so fall
* back to treating the name as an opaque byte sequence.
*/
WARN_ON_ONCE(!fname->usr_fname->name);
if (fname->cf_name.name) {
name = fname->cf_name.name;
len = fname->cf_name.len;
} else {
name = fname->usr_fname->name;
len = fname->usr_fname->len;
}
}
#endif
return __f2fs_dentry_hash(name_info, fname);
fname->hash = cpu_to_le32(TEA_hash_name(name, len));
}
......@@ -306,15 +306,14 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
}
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
struct fscrypt_name *fname, struct page **res_page)
const struct f2fs_filename *fname,
struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
struct page *ipage;
void *inline_dentry;
f2fs_hash_t namehash;
ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
......@@ -322,12 +321,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
return NULL;
}
namehash = f2fs_dentry_hash(dir, &name, fname);
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
de = f2fs_find_target_dentry(fname, namehash, NULL, &d);
de = f2fs_find_target_dentry(&d, fname, NULL);
unlock_page(ipage);
if (de)
*res_page = ipage;
......@@ -444,7 +441,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
struct qstr new_name;
struct f2fs_filename fname;
nid_t ino;
umode_t fake_mode;
......@@ -460,14 +457,19 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
continue;
}
new_name.name = d.filename[bit_pos];
new_name.len = le16_to_cpu(de->name_len);
/*
* We only need the disk_name and hash to move the dentry.
* We don't need the original or casefolded filenames.
*/
memset(&fname, 0, sizeof(fname));
fname.disk_name.name = d.filename[bit_pos];
fname.disk_name.len = le16_to_cpu(de->name_len);
fname.hash = de->hash_code;
ino = le32_to_cpu(de->ino);
fake_mode = f2fs_get_de_type(de) << S_SHIFT;
err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL,
ino, fake_mode);
err = f2fs_add_regular_entry(dir, &fname, NULL, ino, fake_mode);
if (err)
goto punch_dentry_pages;
......@@ -544,7 +546,7 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
struct fscrypt_name fname;
struct f2fs_filename fname;
void *inline_dentry = NULL;
int err = 0;
......@@ -553,19 +555,19 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
f2fs_lock_op(sbi);
err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
err = f2fs_setup_filename(dir, &dentry->d_name, 0, &fname);
if (err)
goto out;
ipage = f2fs_get_node_page(sbi, dir->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto out;
goto out_fname;
}
if (f2fs_has_enough_room(dir, ipage, &fname)) {
f2fs_put_page(ipage, 1);
goto out;
goto out_fname;
}
inline_dentry = inline_data_addr(dir, ipage);
......@@ -573,22 +575,22 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry)
err = do_convert_inline_dir(dir, ipage, inline_dentry);
if (!err)
f2fs_put_page(ipage, 1);
out_fname:
f2fs_free_filename(&fname);
out:
f2fs_unlock_op(sbi);
return err;
}
int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
const struct qstr *orig_name,
struct inode *inode, nid_t ino, umode_t mode)
int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
struct inode *inode, nid_t ino, umode_t mode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct page *ipage;
unsigned int bit_pos;
f2fs_hash_t name_hash;
void *inline_dentry = NULL;
struct f2fs_dentry_ptr d;
int slots = GET_DENTRY_SLOTS(new_name->len);
int slots = GET_DENTRY_SLOTS(fname->disk_name.len);
struct page *page = NULL;
int err = 0;
......@@ -610,8 +612,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
page = f2fs_init_inode_metadata(inode, dir, new_name,
orig_name, ipage);
page = f2fs_init_inode_metadata(inode, dir, fname, ipage);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto fail;
......@@ -620,8 +621,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
name_hash = f2fs_dentry_hash(dir, new_name, NULL);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash,
bit_pos);
set_page_dirty(ipage);
......
......@@ -482,7 +482,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
nid_t ino = -1;
int err = 0;
unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
struct fscrypt_name fname;
struct f2fs_filename fname;
trace_f2fs_lookup_start(dir, dentry, flags);
......@@ -491,19 +491,20 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
err = fscrypt_prepare_lookup(dir, dentry, &fname);
err = f2fs_prepare_lookup(dir, dentry, &fname);
if (err == -ENOENT)
goto out_splice;
if (err)
goto out;
de = __f2fs_find_entry(dir, &fname, &page);
fscrypt_free_filename(&fname);
f2fs_free_filename(&fname);
if (!de) {
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
err = -ENOENT;
goto out_splice;
}
......@@ -549,7 +550,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
#endif
new = d_splice_alias(inode, dentry);
err = PTR_ERR_OR_ZERO(new);
trace_f2fs_lookup_end(dir, dentry, ino, err);
trace_f2fs_lookup_end(dir, dentry, ino, !new ? -ENOENT : err);
return new;
out_iput:
iput(inode);
......@@ -564,7 +565,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
struct f2fs_dir_entry *de;
struct page *page;
int err = -ENOENT;
int err;
trace_f2fs_unlink_enter(dir, dentry);
......@@ -1287,9 +1288,7 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
};
const struct inode_operations f2fs_dir_inode_operations = {
......@@ -1307,9 +1306,7 @@ const struct inode_operations f2fs_dir_inode_operations = {
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
.fiemap = f2fs_fiemap,
};
......@@ -1317,9 +1314,7 @@ const struct inode_operations f2fs_symlink_inode_operations = {
.get_link = f2fs_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
};
const struct inode_operations f2fs_special_inode_operations = {
......@@ -1327,7 +1322,5 @@ const struct inode_operations f2fs_special_inode_operations = {
.setattr = f2fs_setattr,
.get_acl = f2fs_get_acl,
.set_acl = f2fs_set_acl,
#ifdef CONFIG_F2FS_FS_XATTR
.listxattr = f2fs_listxattr,
#endif
};
......@@ -1300,7 +1300,13 @@ static int read_node_page(struct page *page, int op_flags)
}
fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
return f2fs_submit_page_bio(&fio);
err = f2fs_submit_page_bio(&fio);
if (!err)
f2fs_update_iostat(sbi, FS_NODE_READ_IO, F2FS_BLKSIZE);
return err;
}
/*
......@@ -1514,8 +1520,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
trace_f2fs_writepage(page, NODE);
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
unlock_page(page);
return 0;
}
goto redirty_out;
}
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
......@@ -1801,6 +1814,53 @@ static bool flush_dirty_inode(struct page *page)
return true;
}
int f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
{
pgoff_t index = 0;
struct pagevec pvec;
int nr_pages;
int ret = 0;
pagevec_init(&pvec);
while ((nr_pages = pagevec_lookup_tag(&pvec,
NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
int i;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (!IS_DNODE(page))
continue;
lock_page(page);
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
unlock_page(page);
continue;
}
if (!PageDirty(page)) {
/* someone wrote it for us */
goto continue_unlock;
}
/* flush inline_data, if it's async context. */
if (is_inline_node(page)) {
clear_inline_node(page);
unlock_page(page);
flush_inline_data(sbi, ino_of_node(page));
continue;
}
unlock_page(page);
}
pagevec_release(&pvec);
cond_resched();
}
return ret;
}
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
......@@ -1864,8 +1924,8 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
goto continue_unlock;
}
/* flush inline_data */
if (is_inline_node(page)) {
/* flush inline_data, if it's async context. */
if (do_balance && is_inline_node(page)) {
clear_inline_node(page);
unlock_page(page);
flush_inline_data(sbi, ino_of_node(page));
......@@ -2482,7 +2542,6 @@ void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next;
int nr = nr_shrink;
if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
......@@ -2491,17 +2550,23 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 ||
nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
break;
while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) {
struct free_nid *i, *next;
unsigned int batch = SHRINK_NID_BATCH_SIZE;
__remove_free_nid(sbi, i, FREE_NID);
kmem_cache_free(free_nid_slab, i);
nr_shrink--;
spin_lock(&nm_i->nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (!nr_shrink || !batch ||
nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
break;
__remove_free_nid(sbi, i, FREE_NID);
kmem_cache_free(free_nid_slab, i);
nr_shrink--;
batch--;
}
spin_unlock(&nm_i->nid_list_lock);
}
spin_unlock(&nm_i->nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
......@@ -2928,7 +2993,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
return 0;
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
nm_i->nat_bits = f2fs_kzalloc(sbi,
nm_i->nat_bits = f2fs_kvzalloc(sbi,
nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
......@@ -3061,9 +3126,9 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
int i;
nm_i->free_nid_bitmap =
f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
nm_i->nat_blocks),
GFP_KERNEL);
f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *),
nm_i->nat_blocks),
GFP_KERNEL);
if (!nm_i->free_nid_bitmap)
return -ENOMEM;
......
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/node.h
*
......@@ -15,6 +15,9 @@
#define FREE_NID_PAGES 8
#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
/* size of free nid batch when shrinking */
#define SHRINK_NID_BATCH_SIZE 8
#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */
......
......@@ -107,13 +107,51 @@ static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
kmem_cache_free(fsync_entry_slab, entry);
}
static int init_recovered_filename(const struct inode *dir,
struct f2fs_inode *raw_inode,
struct f2fs_filename *fname,
struct qstr *usr_fname)
{
int err;
memset(fname, 0, sizeof(*fname));
fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
fname->disk_name.name = raw_inode->i_name;
if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
return -ENAMETOOLONG;
if (!IS_ENCRYPTED(dir)) {
usr_fname->name = fname->disk_name.name;
usr_fname->len = fname->disk_name.len;
fname->usr_fname = usr_fname;
}
/* Compute the hash of the filename */
if (IS_CASEFOLDED(dir)) {
err = f2fs_init_casefolded_name(dir, fname);
if (err)
return err;
f2fs_hash_filename(dir, fname);
#ifdef CONFIG_UNICODE
/* Case-sensitive match is fine for recovery */
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
#endif
} else {
f2fs_hash_filename(dir, fname);
}
return 0;
}
static int recover_dentry(struct inode *inode, struct page *ipage,
struct list_head *dir_list)
{
struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
nid_t pino = le32_to_cpu(raw_inode->i_pino);
struct f2fs_dir_entry *de;
struct fscrypt_name fname;
struct f2fs_filename fname;
struct qstr usr_fname;
struct page *page;
struct inode *dir, *einode;
struct fsync_inode_entry *entry;
......@@ -132,16 +170,9 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
}
dir = entry->inode;
memset(&fname, 0, sizeof(struct fscrypt_name));
fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
fname.disk_name.name = raw_inode->i_name;
if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
WARN_ON(1);
err = -ENAMETOOLONG;
err = init_recovered_filename(dir, raw_inode, &fname, &usr_fname);
if (err)
goto out;
}
retry:
de = __f2fs_find_entry(dir, &fname, &page);
if (de && inode->i_ino == le32_to_cpu(de->ino))
......
......@@ -1029,9 +1029,9 @@ static void f2fs_submit_discard_endio(struct bio *bio)
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
unsigned long flags;
dc->error = blk_status_to_errno(bio->bi_status);
spin_lock_irqsave(&dc->lock, flags);
if (!dc->error)
dc->error = blk_status_to_errno(bio->bi_status);
dc->bio_ref--;
if (!dc->bio_ref && dc->state == D_SUBMIT) {
dc->state = D_DONE;
......@@ -1101,7 +1101,6 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
} else if (discard_type == DPOLICY_FSTRIM) {
dpolicy->io_aware = false;
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
......@@ -1215,12 +1214,14 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
len = total_len;
}
if (!err && len)
if (!err && len) {
dcc->undiscard_blks -= len;
__update_discard_tree_range(sbi, bdev, lstart, start, len);
}
return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
static void __insert_discard_tree(struct f2fs_sb_info *sbi,
struct block_device *bdev, block_t lstart,
block_t start, block_t len,
struct rb_node **insert_p,
......@@ -1229,7 +1230,6 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct rb_node **p;
struct rb_node *parent = NULL;
struct discard_cmd *dc = NULL;
bool leftmost = true;
if (insert_p && insert_parent) {
......@@ -1241,12 +1241,8 @@ static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
lstart, &leftmost);
do_insert:
dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
__attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
p, leftmost);
if (!dc)
return NULL;
return dc;
}
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
......@@ -1463,6 +1459,8 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
return issued;
}
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy);
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
......@@ -1471,12 +1469,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
int i, issued = 0;
int i, issued;
bool io_interrupted = false;
if (dpolicy->timeout)
f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
retry:
issued = 0;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (dpolicy->timeout &&
f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
......@@ -1523,6 +1523,11 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
break;
}
if (dpolicy->type == DPOLICY_UMOUNT && issued) {
__wait_all_discard_cmd(sbi, dpolicy);
goto retry;
}
if (!issued && io_interrupted)
issued = -1;
......@@ -3102,6 +3107,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
type = CURSEG_COLD_DATA;
}
/*
* We need to wait for node_write to avoid block allocation during
* checkpoint. This can only happen to quota writes which can cause
* the below discard race condition.
*/
if (IS_DATASEG(type))
down_write(&sbi->node_write);
down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
......@@ -3167,6 +3180,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
up_read(&SM_I(sbi)->curseg_lock);
if (IS_DATASEG(type))
up_write(&sbi->node_write);
if (put_pin_sem)
up_read(&sbi->pin_sem);
}
......
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/segment.h
*
......
......@@ -285,6 +285,22 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).s_resgid));
}
static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi)
{
if (!F2FS_OPTION(sbi).unusable_cap_perc)
return;
if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
F2FS_OPTION(sbi).unusable_cap = sbi->user_block_count;
else
F2FS_OPTION(sbi).unusable_cap = (sbi->user_block_count / 100) *
F2FS_OPTION(sbi).unusable_cap_perc;
f2fs_info(sbi, "Adjust unusable cap for checkpoint=disable = %u / %u%%",
F2FS_OPTION(sbi).unusable_cap,
F2FS_OPTION(sbi).unusable_cap_perc);
}
static void init_once(void *foo)
{
struct f2fs_inode_info *fi = (struct f2fs_inode_info *) foo;
......@@ -471,11 +487,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
if (!strcmp(name, "on")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
} else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
} else if (!strcmp(name, "off")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
} else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
} else if (!strcmp(name, "sync")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
} else {
kvfree(name);
......@@ -635,16 +651,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
if (!strcmp(name, "adaptive")) {
if (f2fs_sb_has_blkzoned(sbi)) {
f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
kvfree(name);
return -EINVAL;
}
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
} else if (strlen(name) == 3 &&
!strncmp(name, "lfs", 3)) {
} else if (!strcmp(name, "lfs")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else {
kvfree(name);
......@@ -769,14 +783,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
if (strlen(name) == 10 &&
!strncmp(name, "user-based", 10)) {
if (!strcmp(name, "user-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
} else if (strlen(name) == 3 &&
!strncmp(name, "off", 3)) {
} else if (!strcmp(name, "off")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
} else if (strlen(name) == 8 &&
!strncmp(name, "fs-based", 8)) {
} else if (!strcmp(name, "fs-based")) {
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
} else {
kvfree(name);
......@@ -789,11 +800,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!name)
return -ENOMEM;
if (strlen(name) == 7 &&
!strncmp(name, "default", 7)) {
if (!strcmp(name, "default")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
} else if (strlen(name) == 5 &&
!strncmp(name, "reuse", 5)) {
} else if (!strcmp(name, "reuse")) {
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
} else {
kvfree(name);
......@@ -805,14 +814,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
if (strlen(name) == 5 &&
!strncmp(name, "posix", 5)) {
if (!strcmp(name, "posix")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
} else if (strlen(name) == 6 &&
!strncmp(name, "strict", 6)) {
} else if (!strcmp(name, "strict")) {
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_STRICT;
} else if (strlen(name) == 9 &&
!strncmp(name, "nobarrier", 9)) {
} else if (!strcmp(name, "nobarrier")) {
F2FS_OPTION(sbi).fsync_mode =
FSYNC_MODE_NOBARRIER;
} else {
......@@ -832,12 +838,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
if (arg < 0 || arg > 100)
return -EINVAL;
if (arg == 100)
F2FS_OPTION(sbi).unusable_cap =
sbi->user_block_count;
else
F2FS_OPTION(sbi).unusable_cap =
(sbi->user_block_count / 100) * arg;
F2FS_OPTION(sbi).unusable_cap_perc = arg;
set_opt(sbi, DISABLE_CHECKPOINT);
break;
case Opt_checkpoint_disable_cap:
......@@ -860,17 +861,18 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
name = match_strdup(&args[0]);
if (!name)
return -ENOMEM;
if (strlen(name) == 3 && !strcmp(name, "lzo")) {
if (!strcmp(name, "lzo")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZO;
} else if (strlen(name) == 3 &&
!strcmp(name, "lz4")) {
} else if (!strcmp(name, "lz4")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZ4;
} else if (strlen(name) == 4 &&
!strcmp(name, "zstd")) {
} else if (!strcmp(name, "zstd")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_ZSTD;
} else if (!strcmp(name, "lzo-rle")) {
F2FS_OPTION(sbi).compress_algorithm =
COMPRESS_LZORLE;
} else {
kfree(name);
return -EINVAL;
......@@ -1330,7 +1332,8 @@ static int f2fs_statfs_project(struct super_block *sb,
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
buf->f_blocks = limit;
buf->f_bfree = buf->f_bavail =
(buf->f_blocks > curblock) ?
......@@ -1465,6 +1468,9 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
case COMPRESS_ZSTD:
algtype = "zstd";
break;
case COMPRESS_LZORLE:
algtype = "lzo-rle";
break;
}
seq_printf(seq, ",compress_algorithm=%s", algtype);
......@@ -1880,6 +1886,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
limit_reserve_root(sbi);
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
restore_gc:
......@@ -3062,7 +3069,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_seq = f2fs_kzalloc(sbi,
FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi,
BITS_TO_LONGS(FDEV(devi).nr_blkz)
* sizeof(unsigned long),
GFP_KERNEL);
......@@ -3449,7 +3456,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
init_rwsem(&sbi->gc_lock);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
......@@ -3460,6 +3466,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
sbi->iostat_enable = false;
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1: NR_TEMP_TYPE;
......@@ -3557,6 +3564,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->reserved_blocks = 0;
sbi->current_reserved_blocks = 0;
limit_reserve_root(sbi);
adjust_unusable_cap_perc(sbi);
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
......@@ -3927,7 +3935,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_bioset();
if (err)
goto free_bio_enrty_cache;
err = f2fs_init_compress_mempool();
if (err)
goto free_bioset;
return 0;
free_bioset:
f2fs_destroy_bioset();
free_bio_enrty_cache:
f2fs_destroy_bio_entry_cache();
free_post_read:
......@@ -3955,6 +3968,7 @@ static int __init init_f2fs_fs(void)
static void __exit exit_f2fs_fs(void)
{
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
f2fs_destroy_post_read_processing();
......
......@@ -15,6 +15,7 @@
#include "f2fs.h"
#include "segment.h"
#include "gc.h"
#include <trace/events/f2fs.h>
static struct proc_dir_entry *f2fs_proc_root;
......@@ -372,7 +373,6 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
if (!sbi->iostat_enable)
......@@ -380,6 +380,15 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
if (!strcmp(a->attr.name, "iostat_period_ms")) {
if (t < MIN_IOSTAT_PERIOD_MS || t > MAX_IOSTAT_PERIOD_MS)
return -EINVAL;
spin_lock(&sbi->iostat_lock);
sbi->iostat_period_ms = (unsigned int)t;
spin_unlock(&sbi->iostat_lock);
return count;
}
*ui = (unsigned int)t;
return count;
......@@ -538,6 +547,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
......@@ -545,6 +555,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate);
F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
#endif
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
......@@ -618,6 +630,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(iostat_period_ms),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
ATTR_LIST(extension_list),
......@@ -625,6 +638,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_rate),
ATTR_LIST(inject_type),
#endif
ATTR_LIST(data_io_flag),
ATTR_LIST(node_io_flag),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),
ATTR_LIST(unusable),
......@@ -754,6 +769,33 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
return 0;
}
void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
int i;
if (time_is_after_jiffies(sbi->iostat_next_period))
return;
/* Need double check under the lock */
spin_lock(&sbi->iostat_lock);
if (time_is_after_jiffies(sbi->iostat_next_period)) {
spin_unlock(&sbi->iostat_lock);
return;
}
sbi->iostat_next_period = jiffies +
msecs_to_jiffies(sbi->iostat_period_ms);
for (i = 0; i < NR_IO_TYPE; i++) {
iostat_diff[i] = sbi->rw_iostat[i] -
sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
}
spin_unlock(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
}
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
......@@ -766,33 +808,58 @@ static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
seq_printf(seq, "time: %-16llu\n", now);
/* print app IOs */
/* print app write IOs */
seq_puts(seq, "[WRITE]\n");
seq_printf(seq, "app buffered: %-16llu\n",
sbi->write_iostat[APP_BUFFERED_IO]);
sbi->rw_iostat[APP_BUFFERED_IO]);
seq_printf(seq, "app direct: %-16llu\n",
sbi->write_iostat[APP_DIRECT_IO]);
sbi->rw_iostat[APP_DIRECT_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
sbi->write_iostat[APP_MAPPED_IO]);
sbi->rw_iostat[APP_MAPPED_IO]);
/* print fs IOs */
/* print fs write IOs */
seq_printf(seq, "fs data: %-16llu\n",
sbi->write_iostat[FS_DATA_IO]);
sbi->rw_iostat[FS_DATA_IO]);
seq_printf(seq, "fs node: %-16llu\n",
sbi->write_iostat[FS_NODE_IO]);
sbi->rw_iostat[FS_NODE_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
sbi->write_iostat[FS_META_IO]);
sbi->rw_iostat[FS_META_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
sbi->write_iostat[FS_GC_DATA_IO]);
sbi->rw_iostat[FS_GC_DATA_IO]);
seq_printf(seq, "fs gc node: %-16llu\n",
sbi->write_iostat[FS_GC_NODE_IO]);
sbi->rw_iostat[FS_GC_NODE_IO]);
seq_printf(seq, "fs cp data: %-16llu\n",
sbi->write_iostat[FS_CP_DATA_IO]);
sbi->rw_iostat[FS_CP_DATA_IO]);
seq_printf(seq, "fs cp node: %-16llu\n",
sbi->write_iostat[FS_CP_NODE_IO]);
sbi->rw_iostat[FS_CP_NODE_IO]);
seq_printf(seq, "fs cp meta: %-16llu\n",
sbi->write_iostat[FS_CP_META_IO]);
sbi->rw_iostat[FS_CP_META_IO]);
/* print app read IOs */
seq_puts(seq, "[READ]\n");
seq_printf(seq, "app buffered: %-16llu\n",
sbi->rw_iostat[APP_BUFFERED_READ_IO]);
seq_printf(seq, "app direct: %-16llu\n",
sbi->rw_iostat[APP_DIRECT_READ_IO]);
seq_printf(seq, "app mapped: %-16llu\n",
sbi->rw_iostat[APP_MAPPED_READ_IO]);
/* print fs read IOs */
seq_printf(seq, "fs data: %-16llu\n",
sbi->rw_iostat[FS_DATA_READ_IO]);
seq_printf(seq, "fs gc data: %-16llu\n",
sbi->rw_iostat[FS_GDATA_READ_IO]);
seq_printf(seq, "fs compr_data: %-16llu\n",
sbi->rw_iostat[FS_CDATA_READ_IO]);
seq_printf(seq, "fs node: %-16llu\n",
sbi->rw_iostat[FS_NODE_READ_IO]);
seq_printf(seq, "fs meta: %-16llu\n",
sbi->rw_iostat[FS_META_READ_IO]);
/* print other IOs */
seq_puts(seq, "[OTHER]\n");
seq_printf(seq, "fs discard: %-16llu\n",
sbi->write_iostat[FS_DISCARD]);
sbi->rw_iostat[FS_DISCARD]);
return 0;
}
......
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* f2fs IO tracer
*
......
// SPDX-License-Identifier: GPL-2.0
/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/f2fs/xattr.h
*
......@@ -136,6 +136,7 @@ extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *);
#else
#define f2fs_xattr_handlers NULL
#define f2fs_listxattr NULL
static inline int f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *page, int flags)
......@@ -148,11 +149,6 @@ static inline int f2fs_getxattr(struct inode *inode, int index,
{
return -EOPNOTSUPP;
}
static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer,
size_t buffer_size)
{
return -EOPNOTSUPP;
}
static inline int f2fs_init_xattr_caches(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { }
#endif
......
......@@ -50,6 +50,7 @@ TRACE_DEFINE_ENUM(CP_RECOVERY);
TRACE_DEFINE_ENUM(CP_DISCARD);
TRACE_DEFINE_ENUM(CP_TRIMMED);
TRACE_DEFINE_ENUM(CP_PAUSE);
TRACE_DEFINE_ENUM(CP_RESIZE);
#define show_block_type(type) \
__print_symbolic(type, \
......@@ -126,7 +127,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE);
{ CP_RECOVERY, "Recovery" }, \
{ CP_DISCARD, "Discard" }, \
{ CP_PAUSE, "Pause" }, \
{ CP_TRIMMED, "Trimmed" })
{ CP_TRIMMED, "Trimmed" }, \
{ CP_RESIZE, "Resize" })
#define show_fsync_cpreason(type) \
__print_symbolic(type, \
......@@ -154,7 +156,8 @@ TRACE_DEFINE_ENUM(CP_PAUSE);
__print_symbolic(type, \
{ COMPRESS_LZO, "LZO" }, \
{ COMPRESS_LZ4, "LZ4" }, \
{ COMPRESS_ZSTD, "ZSTD" })
{ COMPRESS_ZSTD, "ZSTD" }, \
{ COMPRESS_LZORLE, "LZO-RLE" })
struct f2fs_sb_info;
struct f2fs_io_info;
......@@ -1812,6 +1815,82 @@ DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end,
TP_ARGS(inode, cluster_idx, compressed_size, ret)
);
TRACE_EVENT(f2fs_iostat,
TP_PROTO(struct f2fs_sb_info *sbi, unsigned long long *iostat),
TP_ARGS(sbi, iostat),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long long, app_dio)
__field(unsigned long long, app_bio)
__field(unsigned long long, app_wio)
__field(unsigned long long, app_mio)
__field(unsigned long long, fs_dio)
__field(unsigned long long, fs_nio)
__field(unsigned long long, fs_mio)
__field(unsigned long long, fs_gc_dio)
__field(unsigned long long, fs_gc_nio)
__field(unsigned long long, fs_cp_dio)
__field(unsigned long long, fs_cp_nio)
__field(unsigned long long, fs_cp_mio)
__field(unsigned long long, app_drio)
__field(unsigned long long, app_brio)
__field(unsigned long long, app_rio)
__field(unsigned long long, app_mrio)
__field(unsigned long long, fs_drio)
__field(unsigned long long, fs_gdrio)
__field(unsigned long long, fs_cdrio)
__field(unsigned long long, fs_nrio)
__field(unsigned long long, fs_mrio)
__field(unsigned long long, fs_discard)
),
TP_fast_assign(
__entry->dev = sbi->sb->s_dev;
__entry->app_dio = iostat[APP_DIRECT_IO];
__entry->app_bio = iostat[APP_BUFFERED_IO];
__entry->app_wio = iostat[APP_WRITE_IO];
__entry->app_mio = iostat[APP_MAPPED_IO];
__entry->fs_dio = iostat[FS_DATA_IO];
__entry->fs_nio = iostat[FS_NODE_IO];
__entry->fs_mio = iostat[FS_META_IO];
__entry->fs_gc_dio = iostat[FS_GC_DATA_IO];
__entry->fs_gc_nio = iostat[FS_GC_NODE_IO];
__entry->fs_cp_dio = iostat[FS_CP_DATA_IO];
__entry->fs_cp_nio = iostat[FS_CP_NODE_IO];
__entry->fs_cp_mio = iostat[FS_CP_META_IO];
__entry->app_drio = iostat[APP_DIRECT_READ_IO];
__entry->app_brio = iostat[APP_BUFFERED_READ_IO];
__entry->app_rio = iostat[APP_READ_IO];
__entry->app_mrio = iostat[APP_MAPPED_READ_IO];
__entry->fs_drio = iostat[FS_DATA_READ_IO];
__entry->fs_gdrio = iostat[FS_GDATA_READ_IO];
__entry->fs_cdrio = iostat[FS_CDATA_READ_IO];
__entry->fs_nrio = iostat[FS_NODE_READ_IO];
__entry->fs_mrio = iostat[FS_META_READ_IO];
__entry->fs_discard = iostat[FS_DISCARD];
),
TP_printk("dev = (%d,%d), "
"app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
"fs [data=%llu, node=%llu, meta=%llu, discard=%llu], "
"gc [data=%llu, node=%llu], "
"cp [data=%llu, node=%llu, meta=%llu], "
"app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], "
"fs [data=%llu, (gc_data=%llu, compr_data=%llu), "
"node=%llu, meta=%llu]",
show_dev(__entry->dev), __entry->app_wio, __entry->app_dio,
__entry->app_bio, __entry->app_mio, __entry->fs_dio,
__entry->fs_nio, __entry->fs_mio, __entry->fs_discard,
__entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio,
__entry->fs_cp_nio, __entry->fs_cp_mio,
__entry->app_rio, __entry->app_drio, __entry->app_brio,
__entry->app_mrio, __entry->fs_drio, __entry->fs_gdrio,
__entry->fs_cdrio, __entry->fs_nrio, __entry->fs_mrio)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment