Commit 0fcb9d21 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "Most part of the patches include enhancing the stability and
  performance of in-memory extent caches feature.

  In addition, it introduces several new features and configurable
  points:
   - F2FS_GOING_DOWN_METAFLUSH ioctl to test power failures
   - F2FS_IOC_WRITE_CHECKPOINT ioctl to trigger checkpoint by users
   - background_gc=sync mount option to do gc synchronously
   - periodic checkpoints
   - sysfs entry to control readahead blocks for free nids

  And the following bug fixes have been merged.
   - fix SSA corruption by collapse/insert_range
   - correct a couple of gc behaviors
   - fix the results of f2fs_map_blocks
   - fix error case handling of volatile/atomic writes"

* tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (54 commits)
  f2fs: fix to skip shrinking extent nodes
  f2fs: fix error path of ->symlink
  f2fs: fix to clear GCed flag for atomic written page
  f2fs: don't need to submit bio on error case
  f2fs: fix leakage of inmemory atomic pages
  f2fs: refactor __find_rev_next_{zero}_bit
  f2fs: support fiemap for inline_data
  f2fs: flush dirty data for bmap
  f2fs: relocate the tracepoint for background_gc
  f2fs crypto: fix racing of accessing encrypted page among
  f2fs: export ra_nid_pages to sysfs
  f2fs: readahead for free nids building
  f2fs: support lower priority asynchronous readahead in ra_meta_pages
  f2fs: don't tag REQ_META for temporary non-meta pages
  f2fs: add a tracepoint for f2fs_read_data_pages
  f2fs: set GFP_NOFS for grab_cache_page
  f2fs: fix SSA updates resulting in corruption
  Revert "f2fs: do not skip dentry block writes"
  f2fs: add F2FS_GOING_DOWN_METAFLUSH to test power-failure
  f2fs: merge meta writes as many possible
  ...
parents d000f8d6 beaa57dd
...@@ -80,3 +80,15 @@ Date: February 2015 ...@@ -80,3 +80,15 @@ Date: February 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org> Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description: Description:
Controls the trimming rate in batch mode. Controls the trimming rate in batch mode.
What: /sys/fs/f2fs/<disk>/cp_interval
Date: October 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the checkpoint timing.
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
Description:
Controls the count of nid pages to be readaheaded.
...@@ -102,7 +102,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage ...@@ -102,7 +102,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection collection and if background_gc=off, garbage collection
will be truned off. will be truned off. If background_gc=sync, it will turn
on synchronous garbage collection running in background.
Default value for this option is on. So garbage Default value for this option is on. So garbage
collection is on by default. collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine disable_roll_forward Disable the roll-forward recovery routine
......
...@@ -47,7 +47,8 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) ...@@ -47,7 +47,8 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
/* /*
* We guarantee no failure on the returned page. * We guarantee no failure on the returned page.
*/ */
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
bool is_meta)
{ {
struct address_space *mapping = META_MAPPING(sbi); struct address_space *mapping = META_MAPPING(sbi);
struct page *page; struct page *page;
...@@ -58,6 +59,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) ...@@ -58,6 +59,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
.blk_addr = index, .blk_addr = index,
.encrypted_page = NULL, .encrypted_page = NULL,
}; };
if (unlikely(!is_meta))
fio.rw &= ~REQ_META;
repeat: repeat:
page = grab_cache_page(mapping, index); page = grab_cache_page(mapping, index);
if (!page) { if (!page) {
...@@ -91,6 +95,17 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) ...@@ -91,6 +95,17 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return page; return page;
} }
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, true);
}
/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
{ {
switch (type) { switch (type) {
...@@ -125,7 +140,8 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) ...@@ -125,7 +140,8 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
/* /*
* Readahead CP/NAT/SIT/SSA pages * Readahead CP/NAT/SIT/SSA pages
*/ */
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type) int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{ {
block_t prev_blk_addr = 0; block_t prev_blk_addr = 0;
struct page *page; struct page *page;
...@@ -133,10 +149,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type ...@@ -133,10 +149,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
struct f2fs_io_info fio = { struct f2fs_io_info fio = {
.sbi = sbi, .sbi = sbi,
.type = META, .type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO, .rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
.encrypted_page = NULL, .encrypted_page = NULL,
}; };
if (unlikely(type == META_POR))
fio.rw &= ~REQ_META;
for (; nrpages-- > 0; blkno++) { for (; nrpages-- > 0; blkno++) {
if (!is_valid_blkaddr(sbi, blkno, type)) if (!is_valid_blkaddr(sbi, blkno, type))
...@@ -196,7 +215,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) ...@@ -196,7 +215,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0); f2fs_put_page(page, 0);
if (readahead) if (readahead)
ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
} }
static int f2fs_write_meta_page(struct page *page, static int f2fs_write_meta_page(struct page *page,
...@@ -257,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, ...@@ -257,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write) long nr_to_write)
{ {
struct address_space *mapping = META_MAPPING(sbi); struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX; pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
struct pagevec pvec; struct pagevec pvec;
long nwritten = 0; long nwritten = 0;
struct writeback_control wbc = { struct writeback_control wbc = {
...@@ -277,6 +296,13 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, ...@@ -277,6 +296,13 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) { for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
if (prev == LONG_MAX)
prev = page->index - 1;
if (nr_to_write != LONG_MAX && page->index != prev + 1) {
pagevec_release(&pvec);
goto stop;
}
lock_page(page); lock_page(page);
if (unlikely(page->mapping != mapping)) { if (unlikely(page->mapping != mapping)) {
...@@ -297,13 +323,14 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, ...@@ -297,13 +323,14 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
break; break;
} }
nwritten++; nwritten++;
prev = page->index;
if (unlikely(nwritten >= nr_to_write)) if (unlikely(nwritten >= nr_to_write))
break; break;
} }
pagevec_release(&pvec); pagevec_release(&pvec);
cond_resched(); cond_resched();
} }
stop:
if (nwritten) if (nwritten)
f2fs_submit_merged_bio(sbi, type, WRITE); f2fs_submit_merged_bio(sbi, type, WRITE);
...@@ -495,7 +522,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) ...@@ -495,7 +522,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi); orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP); ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) { for (i = 0; i < orphan_blocks; i++) {
struct page *page = get_meta_page(sbi, start_blk + i); struct page *page = get_meta_page(sbi, start_blk + i);
...@@ -1000,6 +1027,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ...@@ -1000,6 +1027,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_addr(sbi); start_blk = __start_cp_addr(sbi);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
/* write out checkpoint buffer at block 0 */ /* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++); update_meta_page(sbi, ckpt, start_blk++);
...@@ -1109,6 +1141,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) ...@@ -1109,6 +1141,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason == CP_RECOVERY) if (cpc->reason == CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE, f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver); "checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
out: out:
mutex_unlock(&sbi->cp_mutex); mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
......
This diff is collapsed.
...@@ -33,11 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi) ...@@ -33,11 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
int i; int i;
/* validation check of the segment numbers */ /* validation check of the segment numbers */
si->hit_largest = atomic_read(&sbi->read_hit_largest); si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic_read(&sbi->read_hit_cached); si->hit_cached = atomic64_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree); si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree; si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic_read(&sbi->total_hit_ext); si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree; si->ext_tree = sbi->total_ext_tree;
si->ext_node = atomic_read(&sbi->total_ext_node); si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
...@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) ...@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
} }
} }
dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
si->bimodal = div_u64(bimodal, dist); si->bimodal = div64_u64(bimodal, dist);
if (si->dirty_count) if (si->dirty_count)
si->avg_vblocks = div_u64(total_vblocks, ndirty); si->avg_vblocks = div_u64(total_vblocks, ndirty);
else else
...@@ -198,9 +198,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) ...@@ -198,9 +198,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->page_mem = 0; si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages; npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT; si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages; npages = META_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT; si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
} }
static int stat_show(struct seq_file *s, void *v) static int stat_show(struct seq_file *s, void *v)
...@@ -283,12 +283,12 @@ static int stat_show(struct seq_file *s, void *v) ...@@ -283,12 +283,12 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks, seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks); si->bg_node_blks);
seq_puts(s, "\nExtent Cache:\n"); seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n", seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached, si->hit_largest, si->hit_cached,
si->hit_rbtree); si->hit_rbtree);
seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n", seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
!si->total_ext ? 0 : !si->total_ext ? 0 :
(si->hit_total * 100) / si->total_ext, div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext); si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n", seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node); si->ext_tree, si->ext_node);
...@@ -333,13 +333,13 @@ static int stat_show(struct seq_file *s, void *v) ...@@ -333,13 +333,13 @@ static int stat_show(struct seq_file *s, void *v)
/* memory footprint */ /* memory footprint */
update_mem_info(si->sbi); update_mem_info(si->sbi);
seq_printf(s, "\nMemory: %u KB\n", seq_printf(s, "\nMemory: %llu KB\n",
(si->base_mem + si->cache_mem + si->page_mem) >> 10); (si->base_mem + si->cache_mem + si->page_mem) >> 10);
seq_printf(s, " - static: %u KB\n", seq_printf(s, " - static: %llu KB\n",
si->base_mem >> 10); si->base_mem >> 10);
seq_printf(s, " - cached: %u KB\n", seq_printf(s, " - cached: %llu KB\n",
si->cache_mem >> 10); si->cache_mem >> 10);
seq_printf(s, " - paged : %u KB\n", seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10); si->page_mem >> 10);
} }
mutex_unlock(&f2fs_stat_mutex); mutex_unlock(&f2fs_stat_mutex);
...@@ -378,10 +378,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) ...@@ -378,10 +378,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi; si->sbi = sbi;
sbi->stat_info = si; sbi->stat_info = si;
atomic_set(&sbi->total_hit_ext, 0); atomic64_set(&sbi->total_hit_ext, 0);
atomic_set(&sbi->read_hit_rbtree, 0); atomic64_set(&sbi->read_hit_rbtree, 0);
atomic_set(&sbi->read_hit_largest, 0); atomic64_set(&sbi->read_hit_largest, 0);
atomic_set(&sbi->read_hit_cached, 0); atomic64_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_inode, 0);
......
...@@ -258,7 +258,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) ...@@ -258,7 +258,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
if (f2fs_has_inline_dentry(dir)) if (f2fs_has_inline_dentry(dir))
return f2fs_parent_inline_dir(dir, p); return f2fs_parent_inline_dir(dir, p);
page = get_lock_data_page(dir, 0); page = get_lock_data_page(dir, 0, false);
if (IS_ERR(page)) if (IS_ERR(page))
return NULL; return NULL;
...@@ -740,7 +740,7 @@ bool f2fs_empty_dir(struct inode *dir) ...@@ -740,7 +740,7 @@ bool f2fs_empty_dir(struct inode *dir)
return f2fs_empty_inline_dir(dir); return f2fs_empty_inline_dir(dir);
for (bidx = 0; bidx < nblock; bidx++) { for (bidx = 0; bidx < nblock; bidx++) {
dentry_page = get_lock_data_page(dir, bidx); dentry_page = get_lock_data_page(dir, bidx, false);
if (IS_ERR(dentry_page)) { if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT) if (PTR_ERR(dentry_page) == -ENOENT)
continue; continue;
...@@ -787,7 +787,6 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, ...@@ -787,7 +787,6 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
else else
d_type = DT_UNKNOWN; d_type = DT_UNKNOWN;
/* encrypted case */
de_name.name = d->filename[bit_pos]; de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len); de_name.len = le16_to_cpu(de->name_len);
...@@ -795,12 +794,20 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, ...@@ -795,12 +794,20 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int save_len = fstr->len; int save_len = fstr->len;
int ret; int ret;
de_name.name = kmalloc(de_name.len, GFP_NOFS);
if (!de_name.name)
return false;
memcpy(de_name.name, d->filename[bit_pos], de_name.len);
ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code, ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
&de_name, fstr); &de_name, fstr);
de_name = *fstr; kfree(de_name.name);
fstr->len = save_len;
if (ret < 0) if (ret < 0)
return true; return true;
de_name = *fstr;
fstr->len = save_len;
} }
if (!dir_emit(ctx, de_name.name, de_name.len, if (!dir_emit(ctx, de_name.name, de_name.len,
...@@ -847,7 +854,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) ...@@ -847,7 +854,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
for (; n < npages; n++) { for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n); dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) if (IS_ERR(dentry_page))
continue; continue;
......
...@@ -155,11 +155,12 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, ...@@ -155,11 +155,12 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
return count - et->count; return count - et->count;
} }
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs) static void __drop_largest_extent(struct inode *inode,
pgoff_t fofs, unsigned int len)
{ {
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (largest->fofs <= fofs && largest->fofs + largest->len > fofs) if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
largest->len = 0; largest->len = 0;
} }
...@@ -168,7 +169,7 @@ void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs) ...@@ -168,7 +169,7 @@ void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
if (!f2fs_may_extent_tree(inode)) if (!f2fs_may_extent_tree(inode))
return; return;
__drop_largest_extent(inode, fofs); __drop_largest_extent(inode, fofs, 1);
} }
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
...@@ -350,8 +351,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, ...@@ -350,8 +351,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
} }
if (en) { if (en) {
if (en->ei.len > et->largest.len) __try_update_largest_extent(et, en);
et->largest = en->ei;
et->cached_en = en; et->cached_en = en;
} }
return en; return en;
...@@ -388,18 +388,17 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, ...@@ -388,18 +388,17 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
if (!en) if (!en)
return NULL; return NULL;
if (en->ei.len > et->largest.len) __try_update_largest_extent(et, en);
et->largest = en->ei;
et->cached_en = en; et->cached_en = en;
return en; return en;
} }
unsigned int f2fs_update_extent_tree_range(struct inode *inode, static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len) pgoff_t fofs, block_t blkaddr, unsigned int len)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree; struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; struct extent_node *en = NULL, *en1 = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL; struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev; struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL; struct rb_node **insert_p = NULL, *insert_parent = NULL;
...@@ -409,6 +408,8 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode, ...@@ -409,6 +408,8 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!et) if (!et)
return false; return false;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
write_lock(&et->lock); write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) { if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
...@@ -419,148 +420,99 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode, ...@@ -419,148 +420,99 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
prev = et->largest; prev = et->largest;
dei.len = 0; dei.len = 0;
/* we do not guarantee that the largest extent is cached all the time */ /*
__drop_largest_extent(inode, fofs); * drop largest extent before lookup, in case it's already
* been shrunk from extent tree
*/
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */ /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent); &insert_p, &insert_parent);
if (!en) { if (!en)
if (next_en) {
en = next_en; en = next_en;
f2fs_bug_on(sbi, en->ei.fofs <= pos);
pos = en->ei.fofs;
} else {
/*
* skip searching in the tree since there is no
* larger extent node in the cache.
*/
goto update_extent;
}
}
/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */ /* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
while (en) { while (en && en->ei.fofs < end) {
struct rb_node *node; unsigned int org_end;
int parts = 0; /* # of parts current extent split into */
if (pos >= end) next_en = en1 = NULL;
break;
dei = en->ei; dei = en->ei;
en1 = en2 = NULL; org_end = dei.fofs + dei.len;
f2fs_bug_on(sbi, pos >= org_end);
node = rb_next(&en->rb_node);
/*
* 2.1 there are four cases when we invalidate blkaddr in extent
* node, |V: valid address, X: will be invalidated|
*/
/* case#1, invalidate right part of extent node |VVVVVXXXXX| */
if (pos > dei.fofs && end >= dei.fofs + dei.len) {
en->ei.len = pos - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest)) if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
et->largest = en->ei; en->ei.len = pos - en->ei.fofs;
goto next; prev_en = en;
parts = 1;
} }
/* case#2, invalidate left part of extent node |XXXXXVVVVV| */ if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
if (pos <= dei.fofs && end < dei.fofs + dei.len) { if (parts) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
org_end - end);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
next_en = en1;
} else {
en->ei.fofs = end; en->ei.fofs = end;
en->ei.blk += end - dei.fofs; en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs; en->ei.len -= end - dei.fofs;
next_en = en;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) { }
__detach_extent_node(sbi, et, en); parts++;
insert_p = NULL;
insert_parent = NULL;
goto update;
} }
if (__is_extent_same(&dei, &et->largest)) if (!next_en) {
et->largest = en->ei; struct rb_node *node = rb_next(&en->rb_node);
goto next;
next_en = node ?
rb_entry(node, struct extent_node, rb_node)
: NULL;
} }
if (parts)
__try_update_largest_extent(et, en);
else
__detach_extent_node(sbi, et, en); __detach_extent_node(sbi, et, en);
/* /*
* if we remove node in rb-tree, our parent node pointer may * if original extent is split into zero or two parts, extent
* point the wrong place, discard them. * tree has been altered by deletion or insertion, therefore
* invalidate pointers regard to tree.
*/ */
if (parts != 1) {
insert_p = NULL; insert_p = NULL;
insert_parent = NULL; insert_parent = NULL;
/* case#3, invalidate entire extent node |XXXXXXXXXX| */
if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
if (__is_extent_same(&dei, &et->largest))
et->largest.len = 0;
goto update;
}
/*
* case#4, invalidate data in the middle of extent node
* |VVVXXXXVVV|
*/
if (dei.len > F2FS_MIN_EXTENT_LEN) {
unsigned int endofs;
/* insert left part of split extent into cache */
if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
pos - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
} }
/* insert right part of split extent into cache */ /* update in global extent list */
endofs = dei.fofs + dei.len;
if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
endofs - end);
en2 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
}
update:
/* 2.2 update in global extent list */
spin_lock(&sbi->extent_lock); spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list)) if (!parts && !list_empty(&en->list))
list_del(&en->list); list_del(&en->list);
if (en1) if (en1)
list_add_tail(&en1->list, &sbi->extent_list); list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock); spin_unlock(&sbi->extent_lock);
/* 2.3 release extent node */ /* release extent node */
if (en) if (!parts)
kmem_cache_free(extent_node_slab, en); kmem_cache_free(extent_node_slab, en);
next:
en = node ? rb_entry(node, struct extent_node, rb_node) : NULL; en = next_en;
next_en = en;
if (en)
pos = en->ei.fofs;
} }
update_extent:
/* 3. update extent in extent cache */ /* 3. update extent in extent cache */
if (blkaddr) { if (blkaddr) {
struct extent_node *den = NULL; struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len); set_extent_info(&ei, fofs, blkaddr, len);
en3 = __try_merge_extent_node(sbi, et, &ei, &den, en1 = __try_merge_extent_node(sbi, et, &ei, &den,
prev_en, next_en); prev_en, next_en);
if (!en3) if (!en1)
en3 = __insert_extent_tree(sbi, et, &ei, en1 = __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent); insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */ /* give up extent_cache, if split and small updates happen */
...@@ -572,11 +524,11 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode, ...@@ -572,11 +524,11 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
} }
spin_lock(&sbi->extent_lock); spin_lock(&sbi->extent_lock);
if (en3) { if (en1) {
if (list_empty(&en3->list)) if (list_empty(&en1->list))
list_add_tail(&en3->list, &sbi->extent_list); list_add_tail(&en1->list, &sbi->extent_list);
else else
list_move_tail(&en3->list, &sbi->extent_list); list_move_tail(&en1->list, &sbi->extent_list);
} }
if (den && !list_empty(&den->list)) if (den && !list_empty(&den->list))
list_del(&den->list); list_del(&den->list);
...@@ -650,6 +602,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) ...@@ -650,6 +602,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
} }
spin_unlock(&sbi->extent_lock); spin_unlock(&sbi->extent_lock);
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
while ((found = radix_tree_gang_lookup(root, while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) { (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i; unsigned i;
...@@ -663,7 +620,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) ...@@ -663,7 +620,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
write_unlock(&et->lock); write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink) if (node_cnt + tree_cnt >= nr_shrink)
break; goto unlock_out;
} }
} }
unlock_out: unlock_out:
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/bio.h> #include <linux/bio.h>
#ifdef CONFIG_F2FS_CHECK_FS #ifdef CONFIG_F2FS_CHECK_FS
...@@ -52,6 +53,7 @@ ...@@ -52,6 +53,7 @@
#define F2FS_MOUNT_NOBARRIER 0x00000800 #define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000 #define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000 #define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
...@@ -122,6 +124,7 @@ enum { ...@@ -122,6 +124,7 @@ enum {
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
#define BATCHED_TRIM_BLOCKS(sbi) \ #define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define DEF_CP_INTERVAL 60 /* 60 secs */
struct cp_control { struct cp_control {
int reason; int reason;
...@@ -230,6 +233,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, ...@@ -230,6 +233,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \ #define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy) _IOR('f', 19, struct f2fs_encryption_policy)
...@@ -246,6 +250,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, ...@@ -246,6 +250,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */ #define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */ #define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */ #define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT) #if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/* /*
...@@ -492,12 +497,20 @@ static inline bool __is_front_mergeable(struct extent_info *cur, ...@@ -492,12 +497,20 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front); return __is_extent_mergeable(cur, front);
} }
static inline void __try_update_largest_extent(struct extent_tree *et,
struct extent_node *en)
{
if (en->ei.len > et->largest.len)
et->largest = en->ei;
}
struct f2fs_nm_info { struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */ block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */ nid_t max_nid; /* maximum possible node ids */
nid_t available_nids; /* maximum available node ids */ nid_t available_nids; /* maximum available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */ nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */ unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
/* NAT cache management */ /* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_root;/* root of the nat entry cache */
...@@ -724,6 +737,7 @@ struct f2fs_sb_info { ...@@ -724,6 +737,7 @@ struct f2fs_sb_info {
struct rw_semaphore node_write; /* locking node writes */ struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */ struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait; wait_queue_head_t cp_wait;
long cp_expires, cp_interval; /* next expected periodic cp */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
...@@ -787,10 +801,10 @@ struct f2fs_sb_info { ...@@ -787,10 +801,10 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */ unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */ unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */ atomic_t inplace_count; /* # of inplace update */
atomic_t total_hit_ext; /* # of lookup extent cache */ atomic64_t total_hit_ext; /* # of lookup extent cache */
atomic_t read_hit_rbtree; /* # of hit rbtree extent node */ atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */
atomic_t read_hit_largest; /* # of hit largest extent node */ atomic64_t read_hit_largest; /* # of hit largest extent node */
atomic_t read_hit_cached; /* # of hit cached extent node */ atomic64_t read_hit_cached; /* # of hit cached extent node */
atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */
...@@ -1220,6 +1234,24 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) ...@@ -1220,6 +1234,24 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
return sbi->total_valid_inode_count; return sbi->total_valid_inode_count;
} }
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
}
static inline void f2fs_copy_page(struct page *src, struct page *dst)
{
char *src_kaddr = kmap(src);
char *dst_kaddr = kmap(dst);
memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
kunmap(dst);
kunmap(src);
}
static inline void f2fs_put_page(struct page *page, int unlock) static inline void f2fs_put_page(struct page *page, int unlock)
{ {
if (!page) if (!page)
...@@ -1579,6 +1611,26 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) ...@@ -1579,6 +1611,26 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
return S_ISREG(mode); return S_ISREG(mode);
} }
static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kmalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags, PAGE_KERNEL);
return ret;
}
static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kzalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
return ret;
}
#define get_inode_mode(i) \ #define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) (F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
...@@ -1721,6 +1773,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *); ...@@ -1721,6 +1773,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *); int create_flush_cmd_control(struct f2fs_sb_info *);
void destroy_flush_cmd_control(struct f2fs_sb_info *); void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t); void invalidate_blocks(struct f2fs_sb_info *, block_t);
bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *); void release_discard_addrs(struct f2fs_sb_info *);
...@@ -1739,6 +1792,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *, ...@@ -1739,6 +1792,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
void allocate_data_block(struct f2fs_sb_info *, struct page *, void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int); block_t, block_t *, struct f2fs_summary *, int);
void f2fs_wait_on_page_writeback(struct page *, enum page_type); void f2fs_wait_on_page_writeback(struct page *, enum page_type);
void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
void write_data_summaries(struct f2fs_sb_info *, block_t); void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t); void write_node_summaries(struct f2fs_sb_info *, block_t);
int lookup_journal_in_cursum(struct f2fs_summary_block *, int lookup_journal_in_cursum(struct f2fs_summary_block *,
...@@ -1754,8 +1808,9 @@ void destroy_segment_manager_caches(void); ...@@ -1754,8 +1808,9 @@ void destroy_segment_manager_caches(void);
*/ */
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int); bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
...@@ -1787,9 +1842,9 @@ void set_data_blkaddr(struct dnode_of_data *); ...@@ -1787,9 +1842,9 @@ void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t); int f2fs_get_block(struct dnode_of_data *, pgoff_t);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t); int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
struct page *get_read_data_page(struct inode *, pgoff_t, int); struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
struct page *find_data_page(struct inode *, pgoff_t); struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *); int do_write_data_page(struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
...@@ -1802,7 +1857,7 @@ int f2fs_release_page(struct page *, gfp_t); ...@@ -1802,7 +1857,7 @@ int f2fs_release_page(struct page *, gfp_t);
int start_gc_thread(struct f2fs_sb_info *); int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *); void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *); block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
int f2fs_gc(struct f2fs_sb_info *); int f2fs_gc(struct f2fs_sb_info *, bool);
void build_gc_manager(struct f2fs_sb_info *); void build_gc_manager(struct f2fs_sb_info *);
/* /*
...@@ -1820,7 +1875,8 @@ struct f2fs_stat_info { ...@@ -1820,7 +1875,8 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi; struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones; int main_area_segs, main_area_sections, main_area_zones;
int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext; unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, ext_node; int ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, dirty_nats, sits, dirty_sits, fnids; int nats, dirty_nats, sits, dirty_sits, fnids;
...@@ -1844,7 +1900,7 @@ struct f2fs_stat_info { ...@@ -1844,7 +1900,7 @@ struct f2fs_stat_info {
unsigned int segment_count[2]; unsigned int segment_count[2];
unsigned int block_count[2]; unsigned int block_count[2];
unsigned int inplace_count; unsigned int inplace_count;
unsigned base_mem, cache_mem, page_mem; unsigned long long base_mem, cache_mem, page_mem;
}; };
static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
...@@ -1857,10 +1913,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ...@@ -1857,10 +1913,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--) #define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext)) #define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree)) #define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest)) #define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
#define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached)) #define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached))
#define stat_inc_inline_xattr(inode) \ #define stat_inc_inline_xattr(inode) \
do { \ do { \
if (f2fs_has_inline_xattr(inode)) \ if (f2fs_has_inline_xattr(inode)) \
...@@ -1998,6 +2054,8 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, ...@@ -1998,6 +2054,8 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
bool f2fs_empty_inline_dir(struct inode *); bool f2fs_empty_inline_dir(struct inode *);
int f2fs_read_inline_dir(struct file *, struct dir_context *, int f2fs_read_inline_dir(struct file *, struct dir_context *,
struct f2fs_str *); struct f2fs_str *);
int f2fs_inline_data_fiemap(struct inode *,
struct fiemap_extent_info *, __u64, __u64);
/* /*
* shrinker.c * shrinker.c
......
This diff is collapsed.
...@@ -78,9 +78,12 @@ static int gc_thread_func(void *data) ...@@ -78,9 +78,12 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi); stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */ /* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi)) if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
wait_ms = gc_th->no_gc_sleep_time; wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
prefree_segments(sbi), free_segments(sbi));
/* balancing f2fs's metadata periodically */ /* balancing f2fs's metadata periodically */
f2fs_balance_fs_bg(sbi); f2fs_balance_fs_bg(sbi);
...@@ -257,6 +260,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, ...@@ -257,6 +260,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p; struct victim_sel_policy p;
unsigned int secno, max_cost; unsigned int secno, max_cost;
unsigned int last_segment = MAIN_SEGS(sbi);
int nsearched = 0; int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock); mutex_lock(&dirty_i->seglist_lock);
...@@ -267,6 +271,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, ...@@ -267,6 +271,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_segno = NULL_SEGNO; p.min_segno = NULL_SEGNO;
p.min_cost = max_cost = get_max_cost(sbi, &p); p.min_cost = max_cost = get_max_cost(sbi, &p);
if (p.max_search == 0)
goto out;
if (p.alloc_mode == LFS && gc_type == FG_GC) { if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi); p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO) if (p.min_segno != NULL_SEGNO)
...@@ -277,9 +284,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, ...@@ -277,9 +284,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned long cost; unsigned long cost;
unsigned int segno; unsigned int segno;
segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset); segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
if (segno >= MAIN_SEGS(sbi)) { if (segno >= last_segment) {
if (sbi->last_victim[p.gc_mode]) { if (sbi->last_victim[p.gc_mode]) {
last_segment = sbi->last_victim[p.gc_mode];
sbi->last_victim[p.gc_mode] = 0; sbi->last_victim[p.gc_mode] = 0;
p.offset = 0; p.offset = 0;
continue; continue;
...@@ -327,6 +335,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, ...@@ -327,6 +335,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
sbi->cur_victim_sec, sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi)); prefree_segments(sbi), free_segments(sbi));
} }
out:
mutex_unlock(&dirty_i->seglist_lock); mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1; return (p.min_segno == NULL_SEGNO) ? 0 : 1;
...@@ -541,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) ...@@ -541,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
int err; int err;
/* do not read out */ /* do not read out */
page = grab_cache_page(inode->i_mapping, bidx); page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
if (!page) if (!page)
return; return;
...@@ -550,8 +559,16 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) ...@@ -550,8 +559,16 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
if (err) if (err)
goto out; goto out;
if (unlikely(dn.data_blkaddr == NULL_ADDR)) if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
ClearPageUptodate(page);
goto put_out; goto put_out;
}
/*
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
f2fs_wait_on_page_writeback(page, DATA);
get_node_info(fio.sbi, dn.nid, &ni); get_node_info(fio.sbi, dn.nid, &ni);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
...@@ -580,7 +597,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) ...@@ -580,7 +597,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
goto put_page_out; goto put_page_out;
set_page_dirty(fio.encrypted_page); set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, META); f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
if (clear_page_dirty_for_io(fio.encrypted_page)) if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META); dec_page_count(fio.sbi, F2FS_DIRTY_META);
...@@ -611,7 +628,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) ...@@ -611,7 +628,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
{ {
struct page *page; struct page *page;
page = get_lock_data_page(inode, bidx); page = get_lock_data_page(inode, bidx, true);
if (IS_ERR(page)) if (IS_ERR(page))
return; return;
...@@ -705,7 +722,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, ...@@ -705,7 +722,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
data_page = get_read_data_page(inode, data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, READA); start_bidx + ofs_in_node, READA, true);
if (IS_ERR(data_page)) { if (IS_ERR(data_page)) {
iput(inode); iput(inode);
continue; continue;
...@@ -797,13 +814,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, ...@@ -797,13 +814,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
return nfree; return nfree;
} }
int f2fs_gc(struct f2fs_sb_info *sbi) int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
{ {
unsigned int segno = NULL_SEGNO; unsigned int segno, i;
unsigned int i; int gc_type = sync ? FG_GC : BG_GC;
int gc_type = BG_GC; int sec_freed = 0;
int nfree = 0; int ret = -EINVAL;
int ret = -1;
struct cp_control cpc; struct cp_control cpc;
struct gc_inode_list gc_list = { struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist), .ilist = LIST_HEAD_INIT(gc_list.ilist),
...@@ -812,12 +828,14 @@ int f2fs_gc(struct f2fs_sb_info *sbi) ...@@ -812,12 +828,14 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
cpc.reason = __get_cp_reason(sbi); cpc.reason = __get_cp_reason(sbi);
gc_more: gc_more:
segno = NULL_SEGNO;
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop; goto stop;
if (unlikely(f2fs_cp_error(sbi))) if (unlikely(f2fs_cp_error(sbi)))
goto stop; goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC; gc_type = FG_GC;
if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc); write_checkpoint(sbi, &cpc);
...@@ -830,23 +848,38 @@ int f2fs_gc(struct f2fs_sb_info *sbi) ...@@ -830,23 +848,38 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
/* readahead multi ssa blocks those have contiguous address */ /* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1) if (sbi->segs_per_sec > 1)
ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
META_SSA); META_SSA, true);
for (i = 0; i < sbi->segs_per_sec; i++) for (i = 0; i < sbi->segs_per_sec; i++) {
nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type); /*
* for FG_GC case, halt gcing left segments once failed one
* of segments in selected section to avoid long latency.
*/
if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
gc_type == FG_GC)
break;
}
if (i == sbi->segs_per_sec && gc_type == FG_GC)
sec_freed++;
if (gc_type == FG_GC) if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO; sbi->cur_victim_sec = NULL_SEGNO;
if (has_not_enough_free_secs(sbi, nfree)) if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed))
goto gc_more; goto gc_more;
if (gc_type == FG_GC) if (gc_type == FG_GC)
write_checkpoint(sbi, &cpc); write_checkpoint(sbi, &cpc);
}
stop: stop:
mutex_unlock(&sbi->gc_mutex); mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list); put_gc_inode(&gc_list);
if (sync)
ret = sec_freed ? 0 : -EAGAIN;
return ret; return ret;
} }
......
...@@ -19,12 +19,6 @@ ...@@ -19,12 +19,6 @@
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/*
* with this macro, we can control the max time we do garbage collection,
* when user triggers batch mode gc by ioctl.
*/
#define F2FS_BATCH_GC_MAX_NUM 16
/* Search max. number of dirty segments to select a victim segment */ /* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */ #define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h> #include <linux/f2fs_fs.h>
#include "f2fs.h" #include "f2fs.h"
#include "node.h"
bool f2fs_may_inline_data(struct inode *inode) bool f2fs_may_inline_data(struct inode *inode)
{ {
...@@ -274,12 +275,14 @@ bool recover_inline_data(struct inode *inode, struct page *npage) ...@@ -274,12 +275,14 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
if (f2fs_has_inline_data(inode)) { if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino); ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage)); f2fs_bug_on(sbi, IS_ERR(ipage));
truncate_inline_inode(ipage, 0); if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode); f2fs_clear_inline_inode(inode);
update_inode(inode, ipage); update_inode(inode, ipage);
f2fs_put_page(ipage, 1); f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
truncate_blocks(inode, 0, false); if (truncate_blocks(inode, 0, false))
return false;
goto process_inline; goto process_inline;
} }
return false; return false;
...@@ -568,3 +571,38 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, ...@@ -568,3 +571,38 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
f2fs_put_page(ipage, 1); f2fs_put_page(ipage, 1);
return 0; return 0;
} }
int f2fs_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
{
__u64 byteaddr, ilen;
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
FIEMAP_EXTENT_LAST;
struct node_info ni;
struct page *ipage;
int err = 0;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
if (!f2fs_has_inline_data(inode)) {
err = -EAGAIN;
goto out;
}
ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode));
if (start >= ilen)
goto out;
if (start + len < ilen)
ilen = start + len;
ilen -= start;
get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
out:
f2fs_put_page(ipage, 1);
return err;
}
...@@ -296,16 +296,12 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -296,16 +296,12 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0; return 0;
/* /*
* We need to lock here to prevent from producing dirty node pages * We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections. * during the urgent cleaning time when runing out of free sections.
*/ */
f2fs_lock_op(sbi);
update_inode_page(inode); update_inode_page(inode);
f2fs_unlock_op(sbi);
if (wbc)
f2fs_balance_fs(sbi); f2fs_balance_fs(sbi);
return 0; return 0;
} }
......
...@@ -410,11 +410,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -410,11 +410,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
* If the symlink path is stored into inline_data, there is no * If the symlink path is stored into inline_data, there is no
* performance regression. * performance regression.
*/ */
if (!err) if (!err) {
filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1); filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
if (IS_DIRSYNC(dir)) if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1); f2fs_sync_fs(sbi->sb, 1);
} else {
f2fs_unlink(dir, dentry);
}
kfree(sd); kfree(sd);
f2fs_fname_crypto_free_buffer(&disk_link); f2fs_fname_crypto_free_buffer(&disk_link);
...@@ -947,8 +950,13 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook ...@@ -947,8 +950,13 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
/* Symlink is encrypted */ /* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr; sd = (struct f2fs_encrypted_symlink_data *)caddr;
cstr.name = sd->encrypted_path;
cstr.len = le16_to_cpu(sd->len); cstr.len = le16_to_cpu(sd->len);
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
goto errout;
}
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */ /* this is broken symlink case */
if (cstr.name[0] == 0 && cstr.len == 0) { if (cstr.name[0] == 0 && cstr.len == 0) {
...@@ -970,6 +978,8 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook ...@@ -970,6 +978,8 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
if (res < 0) if (res < 0)
goto errout; goto errout;
kfree(cstr.name);
paddr = pstr.name; paddr = pstr.name;
/* Null-terminate the name */ /* Null-terminate the name */
...@@ -979,6 +989,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook ...@@ -979,6 +989,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
page_cache_release(cpage); page_cache_release(cpage);
return *cookie = paddr; return *cookie = paddr;
errout: errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr); f2fs_fname_crypto_free_buffer(&pstr);
kunmap(cpage); kunmap(cpage);
page_cache_release(cpage); page_cache_release(cpage);
......
...@@ -1323,23 +1323,24 @@ static int f2fs_write_node_page(struct page *page, ...@@ -1323,23 +1323,24 @@ static int f2fs_write_node_page(struct page *page,
nid = nid_of_node(page); nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid); f2fs_bug_on(sbi, page->index != nid);
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
} else {
down_read(&sbi->node_write);
}
get_node_info(sbi, nid, &ni); get_node_info(sbi, nid, &ni);
/* This page is already truncated */ /* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) { if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page); ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES); dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page); unlock_page(page);
return 0; return 0;
} }
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
} else {
down_read(&sbi->node_write);
}
set_page_writeback(page); set_page_writeback(page);
fio.blk_addr = ni.blk_addr; fio.blk_addr = ni.blk_addr;
write_node_page(nid, &fio); write_node_page(nid, &fio);
...@@ -1528,7 +1529,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ...@@ -1528,7 +1529,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
return; return;
/* readahead nat pages to be scanned */ /* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
while (1) { while (1) {
struct page *page = get_current_nat_page(sbi, nid); struct page *page = get_current_nat_page(sbi, nid);
...@@ -1558,6 +1560,9 @@ static void build_free_nids(struct f2fs_sb_info *sbi) ...@@ -1558,6 +1560,9 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid); remove_free_nid(nm_i, nid);
} }
mutex_unlock(&curseg->curseg_mutex); mutex_unlock(&curseg->curseg_mutex);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
} }
/* /*
...@@ -1803,10 +1808,10 @@ int restore_node_summary(struct f2fs_sb_info *sbi, ...@@ -1803,10 +1808,10 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, bio_blocks); nrpages = min(last_offset - i, bio_blocks);
/* readahead node pages */ /* readahead node pages */
ra_meta_pages(sbi, addr, nrpages, META_POR); ra_meta_pages(sbi, addr, nrpages, META_POR, true);
for (idx = addr; idx < addr + nrpages; idx++) { for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = get_meta_page(sbi, idx); struct page *page = get_tmp_page(sbi, idx);
rn = F2FS_NODE(page); rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid; sum_entry->nid = rn->footer.nid;
...@@ -2000,6 +2005,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) ...@@ -2000,6 +2005,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->fcnt = 0; nm_i->fcnt = 0;
nm_i->nat_cnt = 0; nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list); INIT_LIST_HEAD(&nm_i->free_nid_list);
......
...@@ -14,9 +14,11 @@ ...@@ -14,9 +14,11 @@
/* node block offset on the NAT area dedicated to the given start node id */ /* node block offset on the NAT area dedicated to the given start node id */
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform readahead before building free nids */ /* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 4 #define FREE_NID_PAGES 4
#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */ /* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128 #define MAX_RA_NODE 128
......
...@@ -180,7 +180,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) ...@@ -180,7 +180,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
ra_meta_pages(sbi, blkaddr, 1, META_POR); ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
while (1) { while (1) {
struct fsync_inode_entry *entry; struct fsync_inode_entry *entry;
...@@ -188,7 +188,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) ...@@ -188,7 +188,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
if (!is_valid_blkaddr(sbi, blkaddr, META_POR)) if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0; return 0;
page = get_meta_page(sbi, blkaddr); page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) if (cp_ver != cpver_of_node(page))
break; break;
...@@ -383,15 +383,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, ...@@ -383,15 +383,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
start = start_bidx_of_node(ofs_of_node(page), fi); start = start_bidx_of_node(ofs_of_node(page), fi);
end = start + ADDRS_PER_PAGE(page, fi); end = start + ADDRS_PER_PAGE(page, fi);
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0); set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, start, ALLOC_NODE); err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) { if (err)
f2fs_unlock_op(sbi);
goto out; goto out;
}
f2fs_wait_on_page_writeback(dn.node_page, NODE); f2fs_wait_on_page_writeback(dn.node_page, NODE);
...@@ -456,7 +452,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, ...@@ -456,7 +452,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
set_page_dirty(dn.node_page); set_page_dirty(dn.node_page);
err: err:
f2fs_put_dnode(&dn); f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
out: out:
f2fs_msg(sbi->sb, KERN_NOTICE, f2fs_msg(sbi->sb, KERN_NOTICE,
"recover_data: ino = %lx, recovered = %d blocks, err = %d", "recover_data: ino = %lx, recovered = %d blocks, err = %d",
...@@ -485,7 +480,7 @@ static int recover_data(struct f2fs_sb_info *sbi, ...@@ -485,7 +480,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
ra_meta_pages_cond(sbi, blkaddr); ra_meta_pages_cond(sbi, blkaddr);
page = get_meta_page(sbi, blkaddr); page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) { if (cp_ver != cpver_of_node(page)) {
f2fs_put_page(page, 1); f2fs_put_page(page, 1);
...@@ -570,7 +565,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) ...@@ -570,7 +565,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* truncate meta pages to be used by the recovery */ /* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi), truncate_inode_pages_range(META_MAPPING(sbi),
MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1); (loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
if (err) { if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(NODE_MAPPING(sbi));
......
This diff is collapsed.
...@@ -137,10 +137,12 @@ enum { ...@@ -137,10 +137,12 @@ enum {
/* /*
* BG_GC means the background cleaning job. * BG_GC means the background cleaning job.
* FG_GC means the on-demand cleaning job. * FG_GC means the on-demand cleaning job.
* FORCE_FG_GC means on-demand cleaning job in background.
*/ */
enum { enum {
BG_GC = 0, BG_GC = 0,
FG_GC FG_GC,
FORCE_FG_GC,
}; };
/* for a function parameter to select a victim segment */ /* for a function parameter to select a victim segment */
......
...@@ -213,8 +213,10 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); ...@@ -213,8 +213,10 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr) #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = { static struct attribute *f2fs_attrs[] = {
...@@ -231,6 +233,8 @@ static struct attribute *f2fs_attrs[] = { ...@@ -231,6 +233,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(max_victim_search), ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level), ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh), ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(cp_interval),
NULL, NULL,
}; };
...@@ -292,11 +296,16 @@ static int parse_options(struct super_block *sb, char *options) ...@@ -292,11 +296,16 @@ static int parse_options(struct super_block *sb, char *options)
if (!name) if (!name)
return -ENOMEM; return -ENOMEM;
if (strlen(name) == 2 && !strncmp(name, "on", 2)) if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
set_opt(sbi, BG_GC); set_opt(sbi, BG_GC);
else if (strlen(name) == 3 && !strncmp(name, "off", 3)) clear_opt(sbi, FORCE_FG_GC);
} else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
clear_opt(sbi, BG_GC); clear_opt(sbi, BG_GC);
else { clear_opt(sbi, FORCE_FG_GC);
} else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
kfree(name); kfree(name);
return -EINVAL; return -EINVAL;
} }
...@@ -631,10 +640,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) ...@@ -631,10 +640,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{ {
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
seq_printf(seq, ",background_gc=%s", "on"); if (test_opt(sbi, FORCE_FG_GC))
seq_printf(seq, ",background_gc=%s", "sync");
else else
seq_printf(seq, ",background_gc=%s", "on");
} else {
seq_printf(seq, ",background_gc=%s", "off"); seq_printf(seq, ",background_gc=%s", "off");
}
if (test_opt(sbi, DISABLE_ROLL_FORWARD)) if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward"); seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD)) if (test_opt(sbi, DISCARD))
...@@ -742,6 +755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) ...@@ -742,6 +755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
int err, active_logs; int err, active_logs;
bool need_restart_gc = false; bool need_restart_gc = false;
bool need_stop_gc = false; bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
sync_filesystem(sb); sync_filesystem(sb);
...@@ -767,6 +781,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) ...@@ -767,6 +781,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip; goto skip;
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
f2fs_msg(sbi->sb, KERN_WARNING,
"switch extent_cache option is not allowed");
goto restore_opts;
}
/* /*
* We stop the GC thread if FS is mounted as RO * We stop the GC thread if FS is mounted as RO
* or if background_gc = off is passed in mount * or if background_gc = off is passed in mount
...@@ -996,6 +1018,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) ...@@ -996,6 +1018,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0); atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL; sbi->dir_level = DEF_DIR_LEVEL;
sbi->cp_interval = DEF_CP_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK); clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list); INIT_LIST_HEAD(&sbi->s_list);
...@@ -1332,6 +1355,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -1332,6 +1355,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_commit_super(sbi, true); f2fs_commit_super(sbi, true);
} }
sbi->cp_expires = round_jiffies_up(jiffies);
return 0; return 0;
free_kobj: free_kobj:
......
...@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks, ...@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->ret) __entry->ret)
); );
TRACE_EVENT(f2fs_background_gc,
TP_PROTO(struct super_block *sb, long wait_ms,
unsigned int prefree, unsigned int free),
TP_ARGS(sb, wait_ms, prefree, free),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(long, wait_ms)
__field(unsigned int, prefree)
__field(unsigned int, free)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->wait_ms = wait_ms;
__entry->prefree = prefree;
__entry->free = free;
),
TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
show_dev(__entry),
__entry->wait_ms,
__entry->prefree,
__entry->free)
);
TRACE_EVENT(f2fs_get_victim, TRACE_EVENT(f2fs_get_victim,
TP_PROTO(struct super_block *sb, int type, int gc_type, TP_PROTO(struct super_block *sb, int type, int gc_type,
...@@ -1000,6 +1028,32 @@ TRACE_EVENT(f2fs_writepages, ...@@ -1000,6 +1028,32 @@ TRACE_EVENT(f2fs_writepages,
__entry->for_sync) __entry->for_sync)
); );
TRACE_EVENT(f2fs_readpages,
TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage),
TP_ARGS(inode, page, nrpage),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(pgoff_t, start)
__field(unsigned int, nrpage)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->start = page->index;
__entry->nrpage = nrpage;
),
TP_printk("dev = (%d,%d), ino = %lu, start = %lu nrpage = %u",
show_dev_ino(__entry),
(unsigned long)__entry->start,
__entry->nrpage)
);
TRACE_EVENT(f2fs_write_checkpoint, TRACE_EVENT(f2fs_write_checkpoint,
TP_PROTO(struct super_block *sb, int reason, char *msg), TP_PROTO(struct super_block *sb, int reason, char *msg),
...@@ -1132,17 +1186,19 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end, ...@@ -1132,17 +1186,19 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
__entry->len) __entry->len)
); );
TRACE_EVENT(f2fs_update_extent_tree, TRACE_EVENT(f2fs_update_extent_tree_range,
TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr), TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr,
unsigned int len),
TP_ARGS(inode, pgofs, blkaddr), TP_ARGS(inode, pgofs, blkaddr, len),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(ino_t, ino) __field(ino_t, ino)
__field(unsigned int, pgofs) __field(unsigned int, pgofs)
__field(u32, blk) __field(u32, blk)
__field(unsigned int, len)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -1150,12 +1206,15 @@ TRACE_EVENT(f2fs_update_extent_tree, ...@@ -1150,12 +1206,15 @@ TRACE_EVENT(f2fs_update_extent_tree,
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->pgofs = pgofs; __entry->pgofs = pgofs;
__entry->blk = blkaddr; __entry->blk = blkaddr;
__entry->len = len;
), ),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u", TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
"blkaddr = %u, len = %u",
show_dev_ino(__entry), show_dev_ino(__entry),
__entry->pgofs, __entry->pgofs,
__entry->blk) __entry->blk,
__entry->len)
); );
TRACE_EVENT(f2fs_shrink_extent_tree, TRACE_EVENT(f2fs_shrink_extent_tree,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment