Commit a5131193 authored by Jaegeuk Kim's avatar Jaegeuk Kim

f2fs: cover more area with nat_tree_lock

There was a subtle bug on nat cache management which incurs wrong nid allocation
or wrong block addresses when try_to_free_nats is triggered heavily.
This patch enlarges the previous coverage of nat_tree_lock to avoid data race.
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent e0afc4d6
...@@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, ...@@ -262,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
{ {
struct nat_entry *e; struct nat_entry *e;
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid); e = __lookup_nat_cache(nm_i, nid);
if (!e) { if (!e) {
e = grab_nat_entry(nm_i, nid); e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne); node_info_from_raw_nat(&e->ni, ne);
} }
up_write(&nm_i->nat_tree_lock);
} }
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
...@@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ...@@ -380,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
memset(&ne, 0, sizeof(struct f2fs_nat_entry)); memset(&ne, 0, sizeof(struct f2fs_nat_entry));
down_write(&nm_i->nat_tree_lock);
/* Check current segment summary */ /* Check current segment summary */
mutex_lock(&curseg->curseg_mutex); mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
...@@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ...@@ -400,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
cache: cache:
/* cache nat entry */ /* cache nat entry */
cache_nat_entry(NM_I(sbi), nid, &ne); cache_nat_entry(NM_I(sbi), nid, &ne);
up_write(&nm_i->nat_tree_lock);
} }
/* /*
...@@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) ...@@ -1459,13 +1460,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (build) { if (build) {
/* do not add allocated nids */ /* do not add allocated nids */
down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid); ne = __lookup_nat_cache(nm_i, nid);
if (ne && if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
(!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR)) nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true; allocated = true;
up_read(&nm_i->nat_tree_lock);
if (allocated) if (allocated)
return 0; return 0;
} }
...@@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ...@@ -1551,6 +1549,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true); META_NAT, true);
down_read(&nm_i->nat_tree_lock);
while (1) { while (1) {
struct page *page = get_current_nat_page(sbi, nid); struct page *page = get_current_nat_page(sbi, nid);
...@@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ...@@ -1579,6 +1579,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid); remove_free_nid(nm_i, nid);
} }
mutex_unlock(&curseg->curseg_mutex); mutex_unlock(&curseg->curseg_mutex);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false); nm_i->ra_nid_pages, META_NAT, false);
...@@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ...@@ -1861,14 +1862,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
raw_ne = nat_in_journal(sum, i); raw_ne = nat_in_journal(sum, i);
down_write(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid); ne = __lookup_nat_cache(nm_i, nid);
if (!ne) { if (!ne) {
ne = grab_nat_entry(nm_i, nid); ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne); node_info_from_raw_nat(&ne->ni, &raw_ne);
} }
__set_nat_cache_dirty(nm_i, ne); __set_nat_cache_dirty(nm_i, ne);
up_write(&nm_i->nat_tree_lock);
} }
update_nats_in_cursum(sum, -i); update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex); mutex_unlock(&curseg->curseg_mutex);
...@@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, ...@@ -1902,7 +1901,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct f2fs_nat_block *nat_blk; struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur; struct nat_entry *ne, *cur;
struct page *page = NULL; struct page *page = NULL;
struct f2fs_nm_info *nm_i = NM_I(sbi);
/* /*
* there are two steps to flush nat entries: * there are two steps to flush nat entries:
...@@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, ...@@ -1939,12 +1937,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_ne = &nat_blk->entries[nid - start_nid]; raw_ne = &nat_blk->entries[nid - start_nid];
} }
raw_nat_from_node_info(raw_ne, &ne->ni); raw_nat_from_node_info(raw_ne, &ne->ni);
down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne); nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne); __clear_nat_cache_dirty(NM_I(sbi), ne);
up_write(&NM_I(sbi)->nat_tree_lock);
if (nat_get_blkaddr(ne) == NULL_ADDR) if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false); add_free_nid(sbi, nid, false);
} }
...@@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, ...@@ -1956,9 +1950,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, set->entry_cnt); f2fs_bug_on(sbi, set->entry_cnt);
down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set); kmem_cache_free(nat_entry_set_slab, set);
} }
...@@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) ...@@ -1978,6 +1970,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt) if (!nm_i->dirty_nat_cnt)
return; return;
down_write(&nm_i->nat_tree_lock);
/* /*
* if there are no enough space in journal to store dirty nat * if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them * entries, remove all entries from journal and merge them
...@@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) ...@@ -1986,7 +1981,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi); remove_nats_in_journal(sbi);
down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i, while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) { set_idx, SETVEC_SIZE, setvec))) {
unsigned idx; unsigned idx;
...@@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) ...@@ -1995,12 +1989,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
__adjust_nat_entry_set(setvec[idx], &sets, __adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum)); MAX_NAT_JENTRIES(sum));
} }
up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */ /* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list) list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set); __flush_nat_entry_set(sbi, set);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment