Commit 71644dff authored by Jaegeuk Kim's avatar Jaegeuk Kim

f2fs: add block_age-based extent cache

This patch introduces a runtime hot/cold data separation method
for f2fs, in order to improve the accuracy for data temperature
classification, reduce the garbage collection overhead after
long-term data updates.

Enhanced hot/cold data separation can record data block update
frequency as "age" of the extent per inode, and take use of the age
info to indicate better temperature type for data block allocation:
 - It records total data blocks allocated since mount;
 - When file extent has been updated, it calculate the count of data
blocks allocated since last update as the age of the extent;
 - Before the data block allocated, it searches for the age info and
chooses the suitable segment for allocation.

Test and result:
 - Prepare: create about 30000 files
  * 3% for cold files (with cold file extension like .apk, from 3M to 10M)
  * 50% for warm files (with random file extension like .FcDxq, from 1K
to 4M)
  * 47% for hot files (with hot file extension like .db, from 1K to 256K)
 - create(5%)/random update(90%)/delete(5%) the files
  * total write amount is about 70G
  * fsync will be called for .db files, and buffered write will be used
for other files

The storage of test device is large enough(128G) so that it will not
switch to SSR mode during the test.

Benefit: dirty segment count increment reduce about 14%
 - before: Dirty +21110
 - after:  Dirty +18286
Signed-off-by: default avatarqixiaoyu1 <qixiaoyu1@xiaomi.com>
Signed-off-by: default avatarxiongping1 <xiongping1@xiaomi.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 72840ccc
...@@ -655,3 +655,17 @@ Description: When space utilization exceeds this, do background DISCARD aggressi ...@@ -655,3 +655,17 @@ Description: When space utilization exceeds this, do background DISCARD aggressi
Does DISCARD forcibly in a period of given min_discard_issue_time when the number Does DISCARD forcibly in a period of given min_discard_issue_time when the number
of discards is not 0 and set discard granularity to 1. of discards is not 0 and set discard granularity to 1.
Default: 80 Default: 80
What: /sys/fs/f2fs/<disk>/hot_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as hot. By default it was initialized as 262144 blocks
(equals to 1GB).
What: /sys/fs/f2fs/<disk>/warm_data_age_threshold
Date: November 2022
Contact: "Ping Xiong" <xiongping1@xiaomi.com>
Description: When DATA SEPARATION is on, it controls the age threshold to indicate
the data blocks as warm. By default it was initialized as 2621440 blocks
(equals to 10GB).
...@@ -347,6 +347,10 @@ memory=%s Control memory mode. This supports "normal" and "low" modes. ...@@ -347,6 +347,10 @@ memory=%s Control memory mode. This supports "normal" and "low" modes.
Because of the nature of low memory devices, in this mode, f2fs Because of the nature of low memory devices, in this mode, f2fs
will try to save memory sometimes by sacrificing performance. will try to save memory sometimes by sacrificing performance.
"normal" mode is the default mode and same as before. "normal" mode is the default mode and same as before.
age_extent_cache Enable an age extent cache based on rb-tree. It records
data block update frequency of the extent per inode, in
order to provide better temperature hints for data block
allocation.
======================== ============================================================ ======================== ============================================================
Debugfs Entries Debugfs Entries
......
...@@ -88,6 +88,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) ...@@ -88,6 +88,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_total[EX_READ] += si->hit_largest; si->hit_total[EX_READ] += si->hit_largest;
/* block age extent_cache only */
si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks);
/* validation check of the segment numbers */ /* validation check of the segment numbers */
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
...@@ -516,6 +519,22 @@ static int stat_show(struct seq_file *s, void *v) ...@@ -516,6 +519,22 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree[EX_READ], si->zombie_tree[EX_READ], si->ext_tree[EX_READ], si->zombie_tree[EX_READ],
si->ext_node[EX_READ]); si->ext_node[EX_READ]);
seq_puts(s, "\nExtent Cache (Block Age):\n");
seq_printf(s, " - Allocated Data Blocks: %llu\n",
si->allocated_data_blocks);
seq_printf(s, " - Hit Count: L1:%llu L2:%llu\n",
si->hit_cached[EX_BLOCK_AGE],
si->hit_rbtree[EX_BLOCK_AGE]);
seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
!si->total_ext[EX_BLOCK_AGE] ? 0 :
div64_u64(si->hit_total[EX_BLOCK_AGE] * 100,
si->total_ext[EX_BLOCK_AGE]),
si->hit_total[EX_BLOCK_AGE],
si->total_ext[EX_BLOCK_AGE]);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree[EX_BLOCK_AGE],
si->zombie_tree[EX_BLOCK_AGE],
si->ext_node[EX_BLOCK_AGE]);
seq_puts(s, "\nBalancing F2FS Async:\n"); seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - DIO (R: %4d, W: %4d)\n", seq_printf(s, " - DIO (R: %4d, W: %4d)\n",
si->nr_dio_read, si->nr_dio_write); si->nr_dio_read, si->nr_dio_write);
...@@ -586,6 +605,8 @@ static int stat_show(struct seq_file *s, void *v) ...@@ -586,6 +605,8 @@ static int stat_show(struct seq_file *s, void *v)
si->cache_mem >> 10); si->cache_mem >> 10);
seq_printf(s, " - read extent cache: %llu KB\n", seq_printf(s, " - read extent cache: %llu KB\n",
si->ext_mem[EX_READ] >> 10); si->ext_mem[EX_READ] >> 10);
seq_printf(s, " - block age extent cache: %llu KB\n",
si->ext_mem[EX_BLOCK_AGE] >> 10);
seq_printf(s, " - paged : %llu KB\n", seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10); si->page_mem >> 10);
} }
......
This diff is collapsed.
...@@ -107,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; ...@@ -107,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000 #define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000
#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_MOUNT_GC_MERGE 0x20000000
#define F2FS_MOUNT_COMPRESS_CACHE 0x40000000 #define F2FS_MOUNT_COMPRESS_CACHE 0x40000000
#define F2FS_MOUNT_AGE_EXTENT_CACHE 0x80000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
...@@ -607,9 +608,22 @@ enum { ...@@ -607,9 +608,22 @@ enum {
/* number of extent info in extent cache we try to shrink */ /* number of extent info in extent cache we try to shrink */
#define READ_EXTENT_CACHE_SHRINK_NUMBER 128 #define READ_EXTENT_CACHE_SHRINK_NUMBER 128
/* number of age extent info in extent cache we try to shrink */
#define AGE_EXTENT_CACHE_SHRINK_NUMBER 128
#define LAST_AGE_WEIGHT 30
#define SAME_AGE_REGION 1024
/*
* Define data block with age less than 1GB as hot data
* define data block with age less than 10GB but more than 1GB as warm data
*/
#define DEF_HOT_DATA_AGE_THRESHOLD 262144
#define DEF_WARM_DATA_AGE_THRESHOLD 2621440
/* extent cache type */ /* extent cache type */
enum extent_type { enum extent_type {
EX_READ, EX_READ,
EX_BLOCK_AGE,
NR_EXTENT_CACHES, NR_EXTENT_CACHES,
}; };
...@@ -637,6 +651,13 @@ struct extent_info { ...@@ -637,6 +651,13 @@ struct extent_info {
unsigned int c_len; unsigned int c_len;
#endif #endif
}; };
/* block age extent_cache */
struct {
/* block age of the extent */
unsigned long long age;
/* last total blocks allocated */
unsigned long long last_blocks;
};
}; };
}; };
...@@ -1653,6 +1674,11 @@ struct f2fs_sb_info { ...@@ -1653,6 +1674,11 @@ struct f2fs_sb_info {
/* for extent tree cache */ /* for extent tree cache */
struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
atomic64_t allocated_data_blocks; /* for block age extent_cache */
/* The threshold used for hot and warm data seperation*/
unsigned int hot_data_age_threshold;
unsigned int warm_data_age_threshold;
/* basic filesystem units */ /* basic filesystem units */
unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_sectors_per_block; /* log2 sectors per block */
...@@ -3857,6 +3883,8 @@ struct f2fs_stat_info { ...@@ -3857,6 +3883,8 @@ struct f2fs_stat_info {
unsigned long long ext_mem[NR_EXTENT_CACHES]; unsigned long long ext_mem[NR_EXTENT_CACHES];
/* for read extent cache */ /* for read extent cache */
unsigned long long hit_largest; unsigned long long hit_largest;
/* for block age extent cache */
unsigned long long allocated_data_blocks;
int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
int ndirty_data, ndirty_qdata; int ndirty_data, ndirty_qdata;
unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
...@@ -4168,6 +4196,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn, ...@@ -4168,6 +4196,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi,
int nr_shrink); int nr_shrink);
/* block age extent cache ops */
void f2fs_init_age_extent_tree(struct inode *inode);
bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei);
void f2fs_update_age_extent_cache(struct dnode_of_data *dn);
void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, unsigned int len);
unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi,
int nr_shrink);
/* /*
* sysfs.c * sysfs.c
*/ */
......
...@@ -619,6 +619,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) ...@@ -619,6 +619,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
dn->inode) + ofs; dn->inode) + ofs;
f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
f2fs_update_age_extent_cache_range(dn, fofs, nr_free);
dec_valid_block_count(sbi, dn->inode, nr_free); dec_valid_block_count(sbi, dn->inode, nr_free);
} }
dn->ofs_in_node = ofs; dn->ofs_in_node = ofs;
......
...@@ -480,6 +480,7 @@ static int do_read_inode(struct inode *inode) ...@@ -480,6 +480,7 @@ static int do_read_inode(struct inode *inode)
/* Need all the flag bits */ /* Need all the flag bits */
f2fs_init_read_extent_tree(inode, node_page); f2fs_init_read_extent_tree(inode, node_page);
f2fs_init_age_extent_tree(inode);
f2fs_put_page(node_page, 1); f2fs_put_page(node_page, 1);
......
...@@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) ...@@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
avail_ram = val.totalram - val.totalhigh; avail_ram = val.totalram - val.totalhigh;
/* /*
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
*/ */
if (type == FREE_NIDS) { if (type == FREE_NIDS) {
mem_size = (nm_i->nid_cnt[FREE_NID] * mem_size = (nm_i->nid_cnt[FREE_NID] *
...@@ -85,14 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) ...@@ -85,14 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry); sizeof(struct ino_entry);
mem_size >>= PAGE_SHIFT; mem_size >>= PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == READ_EXTENT_CACHE) { } else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; enum extent_type etype = type == READ_EXTENT_CACHE ?
EX_READ : EX_BLOCK_AGE;
struct extent_tree_info *eti = &sbi->extent_tree[etype];
mem_size = (atomic_read(&eti->total_ext_tree) * mem_size = (atomic_read(&eti->total_ext_tree) *
sizeof(struct extent_tree) + sizeof(struct extent_tree) +
atomic_read(&eti->total_ext_node) * atomic_read(&eti->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_SHIFT; sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DISCARD_CACHE) { } else if (type == DISCARD_CACHE) {
mem_size = (atomic_read(&dcc->discard_cmd_cnt) * mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
sizeof(struct discard_cmd)) >> PAGE_SHIFT; sizeof(struct discard_cmd)) >> PAGE_SHIFT;
......
...@@ -147,6 +147,7 @@ enum mem_type { ...@@ -147,6 +147,7 @@ enum mem_type {
DIRTY_DENTS, /* indicates dirty dentry pages */ DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */ INO_ENTRIES, /* indicates inode entries */
READ_EXTENT_CACHE, /* indicates read extent cache */ READ_EXTENT_CACHE, /* indicates read extent cache */
AGE_EXTENT_CACHE, /* indicates age extent cache */
DISCARD_CACHE, /* indicates memory of cached discard cmds */ DISCARD_CACHE, /* indicates memory of cached discard cmds */
COMPRESS_PAGE, /* indicates memory of cached compressed pages */ COMPRESS_PAGE, /* indicates memory of cached compressed pages */
BASE_CHECK, /* check kernel status */ BASE_CHECK, /* check kernel status */
......
...@@ -453,6 +453,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) ...@@ -453,6 +453,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
f2fs_shrink_read_extent_tree(sbi, f2fs_shrink_read_extent_tree(sbi,
READ_EXTENT_CACHE_SHRINK_NUMBER); READ_EXTENT_CACHE_SHRINK_NUMBER);
/* try to shrink age extent cache when there is no enough memory */
if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
f2fs_shrink_age_extent_tree(sbi,
AGE_EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries */ /* check the # of cached NAT entries */
if (!f2fs_available_free_memory(sbi, NAT_ENTRIES)) if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
...@@ -3151,10 +3156,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio) ...@@ -3151,10 +3156,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
} }
} }
static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_info ei;
if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
if (!ei.age)
return NO_CHECK_TYPE;
if (ei.age <= sbi->hot_data_age_threshold)
return CURSEG_HOT_DATA;
if (ei.age <= sbi->warm_data_age_threshold)
return CURSEG_WARM_DATA;
return CURSEG_COLD_DATA;
}
return NO_CHECK_TYPE;
}
static int __get_segment_type_6(struct f2fs_io_info *fio) static int __get_segment_type_6(struct f2fs_io_info *fio)
{ {
if (fio->type == DATA) { if (fio->type == DATA) {
struct inode *inode = fio->page->mapping->host; struct inode *inode = fio->page->mapping->host;
int type;
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
return CURSEG_COLD_DATA_PINNED; return CURSEG_COLD_DATA_PINNED;
...@@ -3169,6 +3192,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) ...@@ -3169,6 +3192,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
} }
if (file_is_cold(inode) || f2fs_need_compress_data(inode)) if (file_is_cold(inode) || f2fs_need_compress_data(inode))
return CURSEG_COLD_DATA; return CURSEG_COLD_DATA;
type = __get_age_segment_type(inode, fio->page->index);
if (type != NO_CHECK_TYPE)
return type;
if (file_is_hot(inode) || if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) || is_inode_flag_set(inode, FI_HOT_DATA) ||
f2fs_is_cow_file(inode)) f2fs_is_cow_file(inode))
...@@ -3287,6 +3315,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, ...@@ -3287,6 +3315,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr)); locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
if (IS_DATASEG(type))
atomic64_inc(&sbi->allocated_data_blocks);
up_write(&sit_i->sentry_lock); up_write(&sit_i->sentry_lock);
if (page && IS_NODESEG(type)) { if (page && IS_NODESEG(type)) {
...@@ -3414,6 +3445,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn, ...@@ -3414,6 +3445,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
struct f2fs_summary sum; struct f2fs_summary sum;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR); f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
f2fs_update_age_extent_cache(dn);
set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version); set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio); do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr); f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
......
...@@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, ...@@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
/* count read extent cache entries */ /* count read extent cache entries */
count += __count_extent_cache(sbi, EX_READ); count += __count_extent_cache(sbi, EX_READ);
/* count block age extent cache entries */
count += __count_extent_cache(sbi, EX_BLOCK_AGE);
/* count clean nat cache entries */ /* count clean nat cache entries */
count += __count_nat_entries(sbi); count += __count_nat_entries(sbi);
...@@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, ...@@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
sbi->shrinker_run_no = run_no; sbi->shrinker_run_no = run_no;
/* shrink extent cache entries */
freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2);
/* shrink read extent cache entries */ /* shrink read extent cache entries */
freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1); freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2);
/* shrink clean nat cache entries */ /* shrink clean nat cache entries */
if (freed < nr) if (freed < nr)
...@@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi) ...@@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi) void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
{ {
f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ)); f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
f2fs_shrink_age_extent_tree(sbi,
__count_extent_cache(sbi, EX_BLOCK_AGE));
spin_lock(&f2fs_list_lock); spin_lock(&f2fs_list_lock);
list_del_init(&sbi->s_list); list_del_init(&sbi->s_list);
......
...@@ -163,6 +163,7 @@ enum { ...@@ -163,6 +163,7 @@ enum {
Opt_nogc_merge, Opt_nogc_merge,
Opt_discard_unit, Opt_discard_unit,
Opt_memory_mode, Opt_memory_mode,
Opt_age_extent_cache,
Opt_err, Opt_err,
}; };
...@@ -241,6 +242,7 @@ static match_table_t f2fs_tokens = { ...@@ -241,6 +242,7 @@ static match_table_t f2fs_tokens = {
{Opt_nogc_merge, "nogc_merge"}, {Opt_nogc_merge, "nogc_merge"},
{Opt_discard_unit, "discard_unit=%s"}, {Opt_discard_unit, "discard_unit=%s"},
{Opt_memory_mode, "memory=%s"}, {Opt_memory_mode, "memory=%s"},
{Opt_age_extent_cache, "age_extent_cache"},
{Opt_err, NULL}, {Opt_err, NULL},
}; };
...@@ -1257,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) ...@@ -1257,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
} }
kfree(name); kfree(name);
break; break;
case Opt_age_extent_cache:
set_opt(sbi, AGE_EXTENT_CACHE);
break;
default: default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p); p);
...@@ -1958,6 +1963,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) ...@@ -1958,6 +1963,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",extent_cache"); seq_puts(seq, ",extent_cache");
else else
seq_puts(seq, ",noextent_cache"); seq_puts(seq, ",noextent_cache");
if (test_opt(sbi, AGE_EXTENT_CACHE))
seq_puts(seq, ",age_extent_cache");
if (test_opt(sbi, DATA_FLUSH)) if (test_opt(sbi, DATA_FLUSH))
seq_puts(seq, ",data_flush"); seq_puts(seq, ",data_flush");
...@@ -2219,6 +2226,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) ...@@ -2219,6 +2226,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_restart_flush = false, need_stop_flush = false; bool need_restart_flush = false, need_stop_flush = false;
bool need_restart_discard = false, need_stop_discard = false; bool need_restart_discard = false, need_stop_discard = false;
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE); bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT); bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi); bool no_io_align = !F2FS_IO_ALIGNED(sbi);
bool no_atgc = !test_opt(sbi, ATGC); bool no_atgc = !test_opt(sbi, ATGC);
...@@ -2313,6 +2321,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) ...@@ -2313,6 +2321,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
f2fs_warn(sbi, "switch extent_cache option is not allowed"); f2fs_warn(sbi, "switch extent_cache option is not allowed");
goto restore_opts; goto restore_opts;
} }
/* disallow enable/disable age extent_cache dynamically */
if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) {
err = -EINVAL;
f2fs_warn(sbi, "switch age_extent_cache option is not allowed");
goto restore_opts;
}
if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) { if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
err = -EINVAL; err = -EINVAL;
......
...@@ -668,6 +668,24 @@ static ssize_t __sbi_store(struct f2fs_attr *a, ...@@ -668,6 +668,24 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count; return count;
} }
if (!strcmp(a->attr.name, "hot_data_age_threshold")) {
if (t == 0 || t >= sbi->warm_data_age_threshold)
return -EINVAL;
if (t == *ui)
return count;
*ui = (unsigned int)t;
return count;
}
if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
if (t == 0 || t <= sbi->hot_data_age_threshold)
return -EINVAL;
if (t == *ui)
return count;
*ui = (unsigned int)t;
return count;
}
*ui = (unsigned int)t; *ui = (unsigned int)t;
return count; return count;
...@@ -923,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write); ...@@ -923,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
/* For block age extent cache */
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr) #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = { static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_urgent_sleep_time), ATTR_LIST(gc_urgent_sleep_time),
...@@ -1018,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = { ...@@ -1018,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(peak_atomic_write), ATTR_LIST(peak_atomic_write),
ATTR_LIST(committed_atomic_block), ATTR_LIST(committed_atomic_block),
ATTR_LIST(revoked_atomic_block), ATTR_LIST(revoked_atomic_block),
ATTR_LIST(hot_data_age_threshold),
ATTR_LIST(warm_data_age_threshold),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(f2fs); ATTRIBUTE_GROUPS(f2fs);
......
...@@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); ...@@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
TRACE_DEFINE_ENUM(CP_PAUSE); TRACE_DEFINE_ENUM(CP_PAUSE);
TRACE_DEFINE_ENUM(CP_RESIZE); TRACE_DEFINE_ENUM(CP_RESIZE);
TRACE_DEFINE_ENUM(EX_READ); TRACE_DEFINE_ENUM(EX_READ);
TRACE_DEFINE_ENUM(EX_BLOCK_AGE);
#define show_block_type(type) \ #define show_block_type(type) \
__print_symbolic(type, \ __print_symbolic(type, \
...@@ -155,6 +156,11 @@ TRACE_DEFINE_ENUM(EX_READ); ...@@ -155,6 +156,11 @@ TRACE_DEFINE_ENUM(EX_READ);
{ COMPRESS_ZSTD, "ZSTD" }, \ { COMPRESS_ZSTD, "ZSTD" }, \
{ COMPRESS_LZORLE, "LZO-RLE" }) { COMPRESS_LZORLE, "LZO-RLE" })
#define show_extent_type(type) \
__print_symbolic(type, \
{ EX_READ, "Read" }, \
{ EX_BLOCK_AGE, "Block Age" })
struct f2fs_sb_info; struct f2fs_sb_info;
struct f2fs_io_info; struct f2fs_io_info;
struct extent_info; struct extent_info;
...@@ -1544,7 +1550,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start, ...@@ -1544,7 +1550,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s", TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s",
show_dev_ino(__entry), show_dev_ino(__entry),
__entry->pgofs, __entry->pgofs,
__entry->type == EX_READ ? "Read" : "N/A") show_extent_type(__entry->type))
); );
TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
...@@ -1583,6 +1589,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end, ...@@ -1583,6 +1589,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
__entry->blk) __entry->blk)
); );
TRACE_EVENT_CONDITION(f2fs_lookup_age_extent_tree_end,
TP_PROTO(struct inode *inode, unsigned int pgofs,
struct extent_info *ei),
TP_ARGS(inode, pgofs, ei),
TP_CONDITION(ei),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(unsigned int, pgofs)
__field(unsigned int, fofs)
__field(unsigned int, len)
__field(unsigned long long, age)
__field(unsigned long long, blocks)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pgofs = pgofs;
__entry->fofs = ei->fofs;
__entry->len = ei->len;
__entry->age = ei->age;
__entry->blocks = ei->last_blocks;
),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
"age_ext_info(fofs: %u, len: %u, age: %llu, blocks: %llu)",
show_dev_ino(__entry),
__entry->pgofs,
__entry->fofs,
__entry->len,
__entry->age,
__entry->blocks)
);
TRACE_EVENT(f2fs_update_read_extent_tree_range, TRACE_EVENT(f2fs_update_read_extent_tree_range,
TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len, TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
...@@ -1618,6 +1663,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range, ...@@ -1618,6 +1663,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range,
__entry->c_len) __entry->c_len)
); );
TRACE_EVENT(f2fs_update_age_extent_tree_range,
TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
unsigned long long age,
unsigned long long last_blks),
TP_ARGS(inode, pgofs, len, age, last_blks),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(unsigned int, pgofs)
__field(unsigned int, len)
__field(unsigned long long, age)
__field(unsigned long long, blocks)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pgofs = pgofs;
__entry->len = len;
__entry->age = age;
__entry->blocks = last_blks;
),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
"len = %u, age = %llu, blocks = %llu",
show_dev_ino(__entry),
__entry->pgofs,
__entry->len,
__entry->age,
__entry->blocks)
);
TRACE_EVENT(f2fs_shrink_extent_tree, TRACE_EVENT(f2fs_shrink_extent_tree,
TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt, TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt,
...@@ -1643,7 +1723,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree, ...@@ -1643,7 +1723,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
show_dev(__entry->dev), show_dev(__entry->dev),
__entry->node_cnt, __entry->node_cnt,
__entry->tree_cnt, __entry->tree_cnt,
__entry->type == EX_READ ? "Read" : "N/A") show_extent_type(__entry->type))
); );
TRACE_EVENT(f2fs_destroy_extent_tree, TRACE_EVENT(f2fs_destroy_extent_tree,
...@@ -1670,7 +1750,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree, ...@@ -1670,7 +1750,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s", TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s",
show_dev_ino(__entry), show_dev_ino(__entry),
__entry->node_cnt, __entry->node_cnt,
__entry->type == EX_READ ? "Read" : "N/A") show_extent_type(__entry->type))
); );
DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes, DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment