Commit a044bac9 authored by Jaegeuk Kim's avatar Jaegeuk Kim Committed by Stefan Bader

f2fs: use crc and cp version to determine roll-forward recovery

BugLink: https://bugs.launchpad.net/bugs/1818797

commit a468f0ef upstream.

Previously, we used cp_version only to detect recoverable dnodes.
In order to avoid same garbage cp_version, we needed to truncate the next
dnode during checkpoint, resulting in additional discard or data write.
If we can distinguish this by using crc in addition to cp_version, we can
remove this overhead.

There is backward compatibility concern where it changes node_footer layout.
So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to
detect new layout. New layout will be activated only when this flag is set.
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
[bwh: Backported to 4.4:
 - Deleted code is slightly different
 - Adjust context]
Signed-off-by: default avatarBen Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: default avatarJuerg Haefliger <juergh@canonical.com>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
parent b2516657
...@@ -902,7 +902,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) ...@@ -902,7 +902,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{ {
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid; nid_t last_nid = nm_i->next_scan_nid;
...@@ -911,15 +910,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ...@@ -911,15 +910,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0; __u32 crc32 = 0;
int i; int i;
int cp_payload_blks = __cp_payload(sbi); int cp_payload_blks = __cp_payload(sbi);
block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
bool invalidate = false;
/*
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
if (discard_next_dnode(sbi, discard_blk))
invalidate = true;
/* Flush all the NAT/SIT pages */ /* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) { while (get_pages(sbi, F2FS_DIRTY_META)) {
...@@ -996,6 +986,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ...@@ -996,6 +986,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
set_ckpt_flags(ckpt, CP_FSCK_FLAG); set_ckpt_flags(ckpt, CP_FSCK_FLAG);
/* set this flag to activate crc|cp_ver for recovery */
set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
/* update SIT/NAT bitmap */ /* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
...@@ -1053,14 +1046,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ...@@ -1053,14 +1046,6 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */ /* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi); wait_on_all_pages_writeback(sbi);
/*
* invalidate meta page which is used temporarily for zeroing out
* block at the end of warm node chain.
*/
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
release_dirty_inode(sbi); release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi))) if (unlikely(f2fs_cp_error(sbi)))
......
...@@ -1780,7 +1780,6 @@ bool is_checkpointed_data(struct f2fs_sb_info *, block_t); ...@@ -1780,7 +1780,6 @@ bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *); void release_discard_addrs(struct f2fs_sb_info *);
bool discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool); int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
......
...@@ -212,6 +212,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) ...@@ -212,6 +212,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
f2fs_change_bit(block_off, nm_i->nat_bitmap); f2fs_change_bit(block_off, nm_i->nat_bitmap);
} }
static inline nid_t ino_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
static inline nid_t nid_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.nid);
}
static inline unsigned int ofs_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline __u64 cpver_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.next_blkaddr);
}
static inline void fill_node_footer(struct page *page, nid_t nid, static inline void fill_node_footer(struct page *page, nid_t nid,
nid_t ino, unsigned int ofs, bool reset) nid_t ino, unsigned int ofs, bool reset)
{ {
...@@ -242,40 +273,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) ...@@ -242,40 +273,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{ {
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page); struct f2fs_node *rn = F2FS_NODE(page);
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
rn->footer.cp_ver = ckpt->checkpoint_ver; if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr); rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
} }
static inline nid_t ino_of_node(struct page *node_page) static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le32_to_cpu(rn->footer.ino);
}
static inline nid_t nid_of_node(struct page *node_page)
{ {
struct f2fs_node *rn = F2FS_NODE(node_page); struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
return le32_to_cpu(rn->footer.nid); size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
} __u64 cp_ver = cur_cp_version(ckpt);
static inline unsigned int ofs_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
unsigned flag = le32_to_cpu(rn->footer.flag);
return flag >> OFFSET_BIT_SHIFT;
}
static inline unsigned long long cpver_of_node(struct page *node_page)
{
struct f2fs_node *rn = F2FS_NODE(node_page);
return le64_to_cpu(rn->footer.cp_ver);
}
static inline block_t next_blkaddr_of_node(struct page *node_page) if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
{ __u64 crc = le32_to_cpu(*((__le32 *)
struct f2fs_node *rn = F2FS_NODE(node_page); ((unsigned char *)ckpt + crc_offset)));
return le32_to_cpu(rn->footer.next_blkaddr); cp_ver |= (crc << 32);
}
return cpu_to_le64(cp_ver) == cpver_of_node(page);
} }
/* /*
......
...@@ -193,7 +193,6 @@ static void recover_inode(struct inode *inode, struct page *page) ...@@ -193,7 +193,6 @@ static void recover_inode(struct inode *inode, struct page *page)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{ {
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg; struct curseg_info *curseg;
struct inode *inode; struct inode *inode;
struct page *page = NULL; struct page *page = NULL;
...@@ -214,7 +213,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) ...@@ -214,7 +213,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
page = get_tmp_page(sbi, blkaddr); page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) if (!is_recoverable_dnode(page))
break; break;
if (!is_fsync_dnode(page)) if (!is_fsync_dnode(page))
...@@ -483,7 +482,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, ...@@ -483,7 +482,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
struct list_head *dir_list) struct list_head *dir_list)
{ {
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg; struct curseg_info *curseg;
struct page *page = NULL; struct page *page = NULL;
int err = 0; int err = 0;
...@@ -503,7 +501,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, ...@@ -503,7 +501,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
page = get_tmp_page(sbi, blkaddr); page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) { if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1); f2fs_put_page(page, 1);
break; break;
} }
...@@ -595,31 +593,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) ...@@ -595,31 +593,15 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
} }
clear_sbi_flag(sbi, SBI_POR_DOING); clear_sbi_flag(sbi, SBI_POR_DOING);
if (err) { if (err)
bool invalidate = false;
if (discard_next_dnode(sbi, blkaddr))
invalidate = true;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
sync_meta_pages(sbi, META, LONG_MAX);
/* invalidate temporary meta page */
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi),
blkaddr, blkaddr);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex); mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) {
if (!err && need_writecp) {
struct cp_control cpc = { struct cp_control cpc = {
.reason = CP_RECOVERY, .reason = CP_RECOVERY,
}; };
mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, &cpc); write_checkpoint(sbi, &cpc);
} else {
mutex_unlock(&sbi->cp_mutex);
} }
destroy_fsync_dnodes(&dir_list); destroy_fsync_dnodes(&dir_list);
......
...@@ -519,28 +519,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, ...@@ -519,28 +519,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
} }
bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int err = -ENOTSUPP;
if (test_opt(sbi, DISCARD)) {
struct seg_entry *se = get_seg_entry(sbi,
GET_SEGNO(sbi, blkaddr));
unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
if (f2fs_test_bit(offset, se->discard_map))
return false;
err = f2fs_issue_discard(sbi, blkaddr, 1);
}
if (err) {
update_meta_page(sbi, NULL, blkaddr);
return true;
}
return false;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi, static void __add_discard_entry(struct f2fs_sb_info *sbi,
struct cp_control *cpc, struct seg_entry *se, struct cp_control *cpc, struct seg_entry *se,
unsigned int start, unsigned int end) unsigned int start, unsigned int end)
......
...@@ -1457,6 +1457,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1457,6 +1457,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (need_fsck) if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK); set_sbi_flag(sbi, SBI_NEED_FSCK);
if (!retry)
goto skip_recovery;
err = recover_fsync_data(sbi, false); err = recover_fsync_data(sbi, false);
if (err < 0) { if (err < 0) {
need_fsck = true; need_fsck = true;
...@@ -1474,7 +1477,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1474,7 +1477,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_kobj; goto free_kobj;
} }
} }
skip_recovery:
/* recover_fsync_data() cleared this already */ /* recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING); clear_sbi_flag(sbi, SBI_POR_DOING);
......
...@@ -99,6 +99,7 @@ struct f2fs_super_block { ...@@ -99,6 +99,7 @@ struct f2fs_super_block {
/* /*
* For checkpoint * For checkpoint
*/ */
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020 #define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010 #define CP_FSCK_FLAG 0x00000010
#define CP_ERROR_FLAG 0x00000008 #define CP_ERROR_FLAG 0x00000008
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment