Commit 0fcb9d21 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "Most part of the patches include enhancing the stability and
  performance of in-memory extent caches feature.

  In addition, it introduces several new features and configurable
  points:
   - F2FS_GOING_DOWN_METAFLUSH ioctl to test power failures
   - F2FS_IOC_WRITE_CHECKPOINT ioctl to trigger checkpoint by users
   - background_gc=sync mount option to do gc synchronously
   - periodic checkpoints
   - sysfs entry to control readahead blocks for free nids

  And the following bug fixes have been merged.
   - fix SSA corruption by collapse/insert_range
   - correct a couple of gc behaviors
   - fix the results of f2fs_map_blocks
   - fix error case handling of volatile/atomic writes"

* tag 'for-f2fs-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (54 commits)
  f2fs: fix to skip shrinking extent nodes
  f2fs: fix error path of ->symlink
  f2fs: fix to clear GCed flag for atomic written page
  f2fs: don't need to submit bio on error case
  f2fs: fix leakage of inmemory atomic pages
  f2fs: refactor __find_rev_next_{zero}_bit
  f2fs: support fiemap for inline_data
  f2fs: flush dirty data for bmap
  f2fs: relocate the tracepoint for background_gc
  f2fs crypto: fix racing of accessing encrypted page among
  f2fs: export ra_nid_pages to sysfs
  f2fs: readahead for free nids building
  f2fs: support lower priority asynchronous readahead in ra_meta_pages
  f2fs: don't tag REQ_META for temporary non-meta pages
  f2fs: add a tracepoint for f2fs_read_data_pages
  f2fs: set GFP_NOFS for grab_cache_page
  f2fs: fix SSA updates resulting in corruption
  Revert "f2fs: do not skip dentry block writes"
  f2fs: add F2FS_GOING_DOWN_METAFLUSH to test power-failure
  f2fs: merge meta writes as many possible
  ...
parents d000f8d6 beaa57dd
......@@ -80,3 +80,15 @@ Date: February 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the trimming rate in batch mode.
What: /sys/fs/f2fs/<disk>/cp_interval
Date: October 2015
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the checkpoint timing.
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
Description:
Controls the count of nid pages to be readaheaded.
......@@ -102,7 +102,8 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection
will be truned off.
will be truned off. If background_gc=sync, it will turn
on synchronous garbage collection running in background.
Default value for this option is on. So garbage
collection is on by default.
disable_roll_forward Disable the roll-forward recovery routine
......
......@@ -47,7 +47,8 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
/*
* We guarantee no failure on the returned page.
*/
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
bool is_meta)
{
struct address_space *mapping = META_MAPPING(sbi);
struct page *page;
......@@ -58,6 +59,9 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
.blk_addr = index,
.encrypted_page = NULL,
};
if (unlikely(!is_meta))
fio.rw &= ~REQ_META;
repeat:
page = grab_cache_page(mapping, index);
if (!page) {
......@@ -91,6 +95,17 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return page;
}
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, true);
}
/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
{
switch (type) {
......@@ -125,7 +140,8 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
/*
* Readahead CP/NAT/SIT/SSA pages
*/
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{
block_t prev_blk_addr = 0;
struct page *page;
......@@ -133,10 +149,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type
struct f2fs_io_info fio = {
.sbi = sbi,
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
.rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
.encrypted_page = NULL,
};
if (unlikely(type == META_POR))
fio.rw &= ~REQ_META;
for (; nrpages-- > 0; blkno++) {
if (!is_valid_blkaddr(sbi, blkno, type))
......@@ -196,7 +215,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR);
ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
......@@ -257,7 +276,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = LONG_MAX;
pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
struct pagevec pvec;
long nwritten = 0;
struct writeback_control wbc = {
......@@ -277,6 +296,13 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (prev == LONG_MAX)
prev = page->index - 1;
if (nr_to_write != LONG_MAX && page->index != prev + 1) {
pagevec_release(&pvec);
goto stop;
}
lock_page(page);
if (unlikely(page->mapping != mapping)) {
......@@ -297,13 +323,14 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
break;
}
nwritten++;
prev = page->index;
if (unlikely(nwritten >= nr_to_write))
break;
}
pagevec_release(&pvec);
cond_resched();
}
stop:
if (nwritten)
f2fs_submit_merged_bio(sbi, type, WRITE);
......@@ -495,7 +522,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP);
ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
struct page *page = get_meta_page(sbi, start_blk + i);
......@@ -1000,6 +1027,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_addr(sbi);
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
......@@ -1109,6 +1141,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (cpc->reason == CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
out:
mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
......
This diff is collapsed.
......@@ -33,11 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
int i;
/* validation check of the segment numbers */
si->hit_largest = atomic_read(&sbi->read_hit_largest);
si->hit_cached = atomic_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree);
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic_read(&sbi->total_hit_ext);
si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree;
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
......@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
}
}
dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
si->bimodal = div_u64(bimodal, dist);
si->bimodal = div64_u64(bimodal, dist);
if (si->dirty_count)
si->avg_vblocks = div_u64(total_vblocks, ndirty);
else
......@@ -198,9 +198,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT;
si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->page_mem += npages << PAGE_CACHE_SHIFT;
si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
}
static int stat_show(struct seq_file *s, void *v)
......@@ -283,12 +283,12 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n",
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached,
si->hit_rbtree);
seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n",
seq_printf(s, " - Hit Ratio: %llu%% (%llu / %llu)\n",
!si->total_ext ? 0 :
(si->hit_total * 100) / si->total_ext,
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node);
......@@ -333,13 +333,13 @@ static int stat_show(struct seq_file *s, void *v)
/* memory footprint */
update_mem_info(si->sbi);
seq_printf(s, "\nMemory: %u KB\n",
seq_printf(s, "\nMemory: %llu KB\n",
(si->base_mem + si->cache_mem + si->page_mem) >> 10);
seq_printf(s, " - static: %u KB\n",
seq_printf(s, " - static: %llu KB\n",
si->base_mem >> 10);
seq_printf(s, " - cached: %u KB\n",
seq_printf(s, " - cached: %llu KB\n",
si->cache_mem >> 10);
seq_printf(s, " - paged : %u KB\n",
seq_printf(s, " - paged : %llu KB\n",
si->page_mem >> 10);
}
mutex_unlock(&f2fs_stat_mutex);
......@@ -378,10 +378,10 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
atomic_set(&sbi->total_hit_ext, 0);
atomic_set(&sbi->read_hit_rbtree, 0);
atomic_set(&sbi->read_hit_largest, 0);
atomic_set(&sbi->read_hit_cached, 0);
atomic64_set(&sbi->total_hit_ext, 0);
atomic64_set(&sbi->read_hit_rbtree, 0);
atomic64_set(&sbi->read_hit_largest, 0);
atomic64_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0);
......
......@@ -258,7 +258,7 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p)
if (f2fs_has_inline_dentry(dir))
return f2fs_parent_inline_dir(dir, p);
page = get_lock_data_page(dir, 0);
page = get_lock_data_page(dir, 0, false);
if (IS_ERR(page))
return NULL;
......@@ -740,7 +740,7 @@ bool f2fs_empty_dir(struct inode *dir)
return f2fs_empty_inline_dir(dir);
for (bidx = 0; bidx < nblock; bidx++) {
dentry_page = get_lock_data_page(dir, bidx);
dentry_page = get_lock_data_page(dir, bidx, false);
if (IS_ERR(dentry_page)) {
if (PTR_ERR(dentry_page) == -ENOENT)
continue;
......@@ -787,7 +787,6 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
else
d_type = DT_UNKNOWN;
/* encrypted case */
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
......@@ -795,12 +794,20 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int save_len = fstr->len;
int ret;
de_name.name = kmalloc(de_name.len, GFP_NOFS);
if (!de_name.name)
return false;
memcpy(de_name.name, d->filename[bit_pos], de_name.len);
ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
&de_name, fstr);
de_name = *fstr;
fstr->len = save_len;
kfree(de_name.name);
if (ret < 0)
return true;
de_name = *fstr;
fstr->len = save_len;
}
if (!dir_emit(ctx, de_name.name, de_name.len,
......@@ -847,7 +854,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n);
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page))
continue;
......
......@@ -155,11 +155,12 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
return count - et->count;
}
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
static void __drop_largest_extent(struct inode *inode,
pgoff_t fofs, unsigned int len)
{
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs)
largest->len = 0;
}
......@@ -168,7 +169,7 @@ void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs);
__drop_largest_extent(inode, fofs, 1);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
......@@ -350,8 +351,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
}
if (en) {
if (en->ei.len > et->largest.len)
et->largest = en->ei;
__try_update_largest_extent(et, en);
et->cached_en = en;
}
return en;
......@@ -388,18 +388,17 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
if (!en)
return NULL;
if (en->ei.len > et->largest.len)
et->largest = en->ei;
__try_update_largest_extent(et, en);
et->cached_en = en;
return en;
}
unsigned int f2fs_update_extent_tree_range(struct inode *inode,
static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
struct extent_node *en = NULL, *en1 = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
......@@ -409,6 +408,8 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!et)
return false;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
......@@ -419,148 +420,99 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
prev = et->largest;
dei.len = 0;
/* we do not guarantee that the largest extent is cached all the time */
__drop_largest_extent(inode, fofs);
/*
* drop largest extent before lookup, in case it's already
* been shrunk from extent tree
*/
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
if (!en) {
if (next_en) {
en = next_en;
f2fs_bug_on(sbi, en->ei.fofs <= pos);
pos = en->ei.fofs;
} else {
/*
* skip searching in the tree since there is no
* larger extent node in the cache.
*/
goto update_extent;
}
}
if (!en)
en = next_en;
/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
while (en) {
struct rb_node *node;
while (en && en->ei.fofs < end) {
unsigned int org_end;
int parts = 0; /* # of parts current extent split into */
if (pos >= end)
break;
next_en = en1 = NULL;
dei = en->ei;
en1 = en2 = NULL;
org_end = dei.fofs + dei.len;
f2fs_bug_on(sbi, pos >= org_end);
node = rb_next(&en->rb_node);
if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
en->ei.len = pos - en->ei.fofs;
prev_en = en;
parts = 1;
}
/*
* 2.1 there are four cases when we invalidate blkaddr in extent
* node, |V: valid address, X: will be invalidated|
*/
/* case#1, invalidate right part of extent node |VVVVVXXXXX| */
if (pos > dei.fofs && end >= dei.fofs + dei.len) {
en->ei.len = pos - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
if (parts) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
org_end - end);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
next_en = en1;
} else {
en->ei.fofs = end;
en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs;
next_en = en;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
parts++;
}
/* case#2, invalidate left part of extent node |XXXXXVVVVV| */
if (pos <= dei.fofs && end < dei.fofs + dei.len) {
en->ei.fofs = end;
en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (!next_en) {
struct rb_node *node = rb_next(&en->rb_node);
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
next_en = node ?
rb_entry(node, struct extent_node, rb_node)
: NULL;
}
__detach_extent_node(sbi, et, en);
if (parts)
__try_update_largest_extent(et, en);
else
__detach_extent_node(sbi, et, en);
/*
* if we remove node in rb-tree, our parent node pointer may
* point the wrong place, discard them.
* if original extent is split into zero or two parts, extent
* tree has been altered by deletion or insertion, therefore
* invalidate pointers regard to tree.
*/
insert_p = NULL;
insert_parent = NULL;
/* case#3, invalidate entire extent node |XXXXXXXXXX| */
if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
if (__is_extent_same(&dei, &et->largest))
et->largest.len = 0;
goto update;
if (parts != 1) {
insert_p = NULL;
insert_parent = NULL;
}
/*
* case#4, invalidate data in the middle of extent node
* |VVVXXXXVVV|
*/
if (dei.len > F2FS_MIN_EXTENT_LEN) {
unsigned int endofs;
/* insert left part of split extent into cache */
if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
pos - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
/* insert right part of split extent into cache */
endofs = dei.fofs + dei.len;
if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
endofs - end);
en2 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
}
update:
/* 2.2 update in global extent list */
/* update in global extent list */
spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list))
if (!parts && !list_empty(&en->list))
list_del(&en->list);
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
/* 2.3 release extent node */
if (en)
/* release extent node */
if (!parts)
kmem_cache_free(extent_node_slab, en);
next:
en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
next_en = en;
if (en)
pos = en->ei.fofs;
en = next_en;
}
update_extent:
/* 3. update extent in extent cache */
if (blkaddr) {
struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
en3 = __try_merge_extent_node(sbi, et, &ei, &den,
en1 = __try_merge_extent_node(sbi, et, &ei, &den,
prev_en, next_en);
if (!en3)
en3 = __insert_extent_tree(sbi, et, &ei,
if (!en1)
en1 = __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
......@@ -572,11 +524,11 @@ unsigned int f2fs_update_extent_tree_range(struct inode *inode,
}
spin_lock(&sbi->extent_lock);
if (en3) {
if (list_empty(&en3->list))
list_add_tail(&en3->list, &sbi->extent_list);
if (en1) {
if (list_empty(&en1->list))
list_add_tail(&en1->list, &sbi->extent_list);
else
list_move_tail(&en3->list, &sbi->extent_list);
list_move_tail(&en1->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
......@@ -650,6 +602,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
}
spin_unlock(&sbi->extent_lock);
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
......@@ -663,7 +620,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink)
break;
goto unlock_out;
}
}
unlock_out:
......
......@@ -19,6 +19,7 @@
#include <linux/magic.h>
#include <linux/kobject.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
#ifdef CONFIG_F2FS_CHECK_FS
......@@ -52,6 +53,7 @@
#define F2FS_MOUNT_NOBARRIER 0x00000800
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
......@@ -122,6 +124,7 @@ enum {
(SM_I(sbi)->trim_sections * (sbi)->segs_per_sec)
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define DEF_CP_INTERVAL 60 /* 60 secs */
struct cp_control {
int reason;
......@@ -230,6 +233,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy)
......@@ -246,6 +250,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_GOING_DOWN_FULLSYNC 0x0 /* going down with full sync */
#define F2FS_GOING_DOWN_METASYNC 0x1 /* going down with metadata */
#define F2FS_GOING_DOWN_NOSYNC 0x2 /* going down */
#define F2FS_GOING_DOWN_METAFLUSH 0x3 /* going down with meta flush */
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
......@@ -492,12 +497,20 @@ static inline bool __is_front_mergeable(struct extent_info *cur,
return __is_extent_mergeable(cur, front);
}
static inline void __try_update_largest_extent(struct extent_tree *et,
struct extent_node *en)
{
if (en->ei.len > et->largest.len)
et->largest = en->ei;
}
struct f2fs_nm_info {
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t available_nids; /* maximum available node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
......@@ -724,6 +737,7 @@ struct f2fs_sb_info {
struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
long cp_expires, cp_interval; /* next expected periodic cp */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
......@@ -787,10 +801,10 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
atomic_t total_hit_ext; /* # of lookup extent cache */
atomic_t read_hit_rbtree; /* # of hit rbtree extent node */
atomic_t read_hit_largest; /* # of hit largest extent node */
atomic_t read_hit_cached; /* # of hit cached extent node */
atomic64_t total_hit_ext; /* # of lookup extent cache */
atomic64_t read_hit_rbtree; /* # of hit rbtree extent node */
atomic64_t read_hit_largest; /* # of hit largest extent node */
atomic64_t read_hit_cached; /* # of hit cached extent node */
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
......@@ -1220,6 +1234,24 @@ static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi)
return sbi->total_valid_inode_count;
}
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
}
static inline void f2fs_copy_page(struct page *src, struct page *dst)
{
char *src_kaddr = kmap(src);
char *dst_kaddr = kmap(dst);
memcpy(dst_kaddr, src_kaddr, PAGE_SIZE);
kunmap(dst);
kunmap(src);
}
static inline void f2fs_put_page(struct page *page, int unlock)
{
if (!page)
......@@ -1579,6 +1611,26 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
return S_ISREG(mode);
}
static inline void *f2fs_kvmalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kmalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags, PAGE_KERNEL);
return ret;
}
static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
{
void *ret;
ret = kzalloc(size, flags | __GFP_NOWARN);
if (!ret)
ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
return ret;
}
#define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
......@@ -1721,6 +1773,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
void destroy_flush_cmd_control(struct f2fs_sb_info *);
void invalidate_blocks(struct f2fs_sb_info *, block_t);
bool is_checkpointed_data(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *);
......@@ -1739,6 +1792,7 @@ void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int);
void f2fs_wait_on_page_writeback(struct page *, enum page_type);
void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t);
int lookup_journal_in_cursum(struct f2fs_summary_block *,
......@@ -1754,8 +1808,9 @@ void destroy_segment_manager_caches(void);
*/
struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t);
struct page *get_tmp_page(struct f2fs_sb_info *, pgoff_t);
bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
......@@ -1787,9 +1842,9 @@ void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
struct page *get_read_data_page(struct inode *, pgoff_t, int);
struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
......@@ -1802,7 +1857,7 @@ int f2fs_release_page(struct page *, gfp_t);
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
int f2fs_gc(struct f2fs_sb_info *);
int f2fs_gc(struct f2fs_sb_info *, bool);
void build_gc_manager(struct f2fs_sb_info *);
/*
......@@ -1820,7 +1875,8 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext;
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, dirty_nats, sits, dirty_sits, fnids;
......@@ -1844,7 +1900,7 @@ struct f2fs_stat_info {
unsigned int segment_count[2];
unsigned int block_count[2];
unsigned int inplace_count;
unsigned base_mem, cache_mem, page_mem;
unsigned long long base_mem, cache_mem, page_mem;
};
static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
......@@ -1857,10 +1913,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest))
#define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached))
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
#define stat_inc_cached_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_cached))
#define stat_inc_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
......@@ -1998,6 +2054,8 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
bool f2fs_empty_inline_dir(struct inode *);
int f2fs_read_inline_dir(struct file *, struct dir_context *,
struct f2fs_str *);
int f2fs_inline_data_fiemap(struct inode *,
struct fiemap_extent_info *, __u64, __u64);
/*
* shrinker.c
......
This diff is collapsed.
......@@ -78,9 +78,12 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi))
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
prefree_segments(sbi), free_segments(sbi));
/* balancing f2fs's metadata periodically */
f2fs_balance_fs_bg(sbi);
......@@ -257,6 +260,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p;
unsigned int secno, max_cost;
unsigned int last_segment = MAIN_SEGS(sbi);
int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
......@@ -267,6 +271,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_segno = NULL_SEGNO;
p.min_cost = max_cost = get_max_cost(sbi, &p);
if (p.max_search == 0)
goto out;
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
......@@ -277,9 +284,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned long cost;
unsigned int segno;
segno = find_next_bit(p.dirty_segmap, MAIN_SEGS(sbi), p.offset);
if (segno >= MAIN_SEGS(sbi)) {
segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
if (segno >= last_segment) {
if (sbi->last_victim[p.gc_mode]) {
last_segment = sbi->last_victim[p.gc_mode];
sbi->last_victim[p.gc_mode] = 0;
p.offset = 0;
continue;
......@@ -327,6 +335,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
sbi->cur_victim_sec,
prefree_segments(sbi), free_segments(sbi));
}
out:
mutex_unlock(&dirty_i->seglist_lock);
return (p.min_segno == NULL_SEGNO) ? 0 : 1;
......@@ -541,7 +550,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
int err;
/* do not read out */
page = grab_cache_page(inode->i_mapping, bidx);
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
if (!page)
return;
......@@ -550,8 +559,16 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
if (err)
goto out;
if (unlikely(dn.data_blkaddr == NULL_ADDR))
if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
ClearPageUptodate(page);
goto put_out;
}
/*
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
f2fs_wait_on_page_writeback(page, DATA);
get_node_info(fio.sbi, dn.nid, &ni);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
......@@ -580,7 +597,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
goto put_page_out;
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, META);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
......@@ -611,7 +628,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
{
struct page *page;
page = get_lock_data_page(inode, bidx);
page = get_lock_data_page(inode, bidx, true);
if (IS_ERR(page))
return;
......@@ -705,7 +722,7 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, READA);
start_bidx + ofs_in_node, READA, true);
if (IS_ERR(data_page)) {
iput(inode);
continue;
......@@ -797,13 +814,12 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
return nfree;
}
int f2fs_gc(struct f2fs_sb_info *sbi)
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
{
unsigned int segno = NULL_SEGNO;
unsigned int i;
int gc_type = BG_GC;
int nfree = 0;
int ret = -1;
unsigned int segno, i;
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret = -EINVAL;
struct cp_control cpc;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
......@@ -812,12 +828,14 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
cpc.reason = __get_cp_reason(sbi);
gc_more:
segno = NULL_SEGNO;
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi)))
goto stop;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC;
if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc);
......@@ -830,23 +848,38 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
/* readahead multi ssa blocks those have contiguous address */
if (sbi->segs_per_sec > 1)
ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
META_SSA);
META_SSA, true);
for (i = 0; i < sbi->segs_per_sec; i++)
nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
for (i = 0; i < sbi->segs_per_sec; i++) {
/*
* for FG_GC case, halt gcing left segments once failed one
* of segments in selected section to avoid long latency.
*/
if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
gc_type == FG_GC)
break;
}
if (i == sbi->segs_per_sec && gc_type == FG_GC)
sec_freed++;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (has_not_enough_free_secs(sbi, nfree))
goto gc_more;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed))
goto gc_more;
if (gc_type == FG_GC)
write_checkpoint(sbi, &cpc);
if (gc_type == FG_GC)
write_checkpoint(sbi, &cpc);
}
stop:
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list);
if (sync)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
......
......@@ -19,12 +19,6 @@
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/*
* with this macro, we can control the max time we do garbage collection,
* when user triggers batch mode gc by ioctl.
*/
#define F2FS_BATCH_GC_MAX_NUM 16
/* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
......
......@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
bool f2fs_may_inline_data(struct inode *inode)
{
......@@ -274,12 +275,14 @@ bool recover_inline_data(struct inode *inode, struct page *npage)
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
truncate_inline_inode(ipage, 0);
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
update_inode(inode, ipage);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
truncate_blocks(inode, 0, false);
if (truncate_blocks(inode, 0, false))
return false;
goto process_inline;
}
return false;
......@@ -568,3 +571,38 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
f2fs_put_page(ipage, 1);
return 0;
}
int f2fs_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo, __u64 start, __u64 len)
{
__u64 byteaddr, ilen;
__u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED |
FIEMAP_EXTENT_LAST;
struct node_info ni;
struct page *ipage;
int err = 0;
ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
if (!f2fs_has_inline_data(inode)) {
err = -EAGAIN;
goto out;
}
ilen = min_t(size_t, MAX_INLINE_DATA, i_size_read(inode));
if (start >= ilen)
goto out;
if (start + len < ilen)
ilen = start + len;
ilen -= start;
get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(ipage) - (char *)F2FS_INODE(ipage);
err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags);
out:
f2fs_put_page(ipage, 1);
return err;
}
......@@ -296,16 +296,12 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
return 0;
/*
* We need to lock here to prevent from producing dirty node pages
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
f2fs_lock_op(sbi);
update_inode_page(inode);
f2fs_unlock_op(sbi);
if (wbc)
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi);
return 0;
}
......
......@@ -410,11 +410,14 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
* If the symlink path is stored into inline_data, there is no
* performance regression.
*/
if (!err)
if (!err) {
filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
} else {
f2fs_unlink(dir, dentry);
}
kfree(sd);
f2fs_fname_crypto_free_buffer(&disk_link);
......@@ -947,8 +950,13 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
/* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr;
cstr.name = sd->encrypted_path;
cstr.len = le16_to_cpu(sd->len);
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
goto errout;
}
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */
if (cstr.name[0] == 0 && cstr.len == 0) {
......@@ -970,6 +978,8 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
if (res < 0)
goto errout;
kfree(cstr.name);
paddr = pstr.name;
/* Null-terminate the name */
......@@ -979,6 +989,7 @@ static const char *f2fs_encrypted_follow_link(struct dentry *dentry, void **cook
page_cache_release(cpage);
return *cookie = paddr;
errout:
kfree(cstr.name);
f2fs_fname_crypto_free_buffer(&pstr);
kunmap(cpage);
page_cache_release(cpage);
......
......@@ -1323,23 +1323,24 @@ static int f2fs_write_node_page(struct page *page,
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
} else {
down_read(&sbi->node_write);
}
get_node_info(sbi, nid, &ni);
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
unlock_page(page);
return 0;
}
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
} else {
down_read(&sbi->node_write);
}
set_page_writeback(page);
fio.blk_addr = ni.blk_addr;
write_node_page(nid, &fio);
......@@ -1528,7 +1529,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
return;
/* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
......@@ -1558,6 +1560,9 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
}
/*
......@@ -1803,10 +1808,10 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
nrpages = min(last_offset - i, bio_blocks);
/* readahead node pages */
ra_meta_pages(sbi, addr, nrpages, META_POR);
ra_meta_pages(sbi, addr, nrpages, META_POR, true);
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = get_meta_page(sbi, idx);
struct page *page = get_tmp_page(sbi, idx);
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
......@@ -2000,6 +2005,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
......
......@@ -14,9 +14,11 @@
/* node block offset on the NAT area dedicated to the given start node id */
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform readahead before building free nids */
/* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 4
#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
......
......@@ -180,7 +180,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
ra_meta_pages(sbi, blkaddr, 1, META_POR);
ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
while (1) {
struct fsync_inode_entry *entry;
......@@ -188,7 +188,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = get_meta_page(sbi, blkaddr);
page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page))
break;
......@@ -383,15 +383,11 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
start = start_bidx_of_node(ofs_of_node(page), fi);
end = start + ADDRS_PER_PAGE(page, fi);
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err) {
f2fs_unlock_op(sbi);
if (err)
goto out;
}
f2fs_wait_on_page_writeback(dn.node_page, NODE);
......@@ -456,7 +452,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
set_page_dirty(dn.node_page);
err:
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
out:
f2fs_msg(sbi->sb, KERN_NOTICE,
"recover_data: ino = %lx, recovered = %d blocks, err = %d",
......@@ -485,7 +480,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
ra_meta_pages_cond(sbi, blkaddr);
page = get_meta_page(sbi, blkaddr);
page = get_tmp_page(sbi, blkaddr);
if (cp_ver != cpver_of_node(page)) {
f2fs_put_page(page, 1);
......@@ -570,7 +565,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
......
This diff is collapsed.
......@@ -137,10 +137,12 @@ enum {
/*
* BG_GC means the background cleaning job.
* FG_GC means the on-demand cleaning job.
* FORCE_FG_GC means on-demand cleaning job in background.
*/
enum {
BG_GC = 0,
FG_GC
FG_GC,
FORCE_FG_GC,
};
/* for a function parameter to select a victim segment */
......
......@@ -213,8 +213,10 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
......@@ -231,6 +233,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(cp_interval),
NULL,
};
......@@ -292,11 +296,16 @@ static int parse_options(struct super_block *sb, char *options)
if (!name)
return -ENOMEM;
if (strlen(name) == 2 && !strncmp(name, "on", 2))
if (strlen(name) == 2 && !strncmp(name, "on", 2)) {
set_opt(sbi, BG_GC);
else if (strlen(name) == 3 && !strncmp(name, "off", 3))
clear_opt(sbi, FORCE_FG_GC);
} else if (strlen(name) == 3 && !strncmp(name, "off", 3)) {
clear_opt(sbi, BG_GC);
else {
clear_opt(sbi, FORCE_FG_GC);
} else if (strlen(name) == 4 && !strncmp(name, "sync", 4)) {
set_opt(sbi, BG_GC);
set_opt(sbi, FORCE_FG_GC);
} else {
kfree(name);
return -EINVAL;
}
......@@ -631,10 +640,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb);
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC))
seq_printf(seq, ",background_gc=%s", "on");
else
if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) {
if (test_opt(sbi, FORCE_FG_GC))
seq_printf(seq, ",background_gc=%s", "sync");
else
seq_printf(seq, ",background_gc=%s", "on");
} else {
seq_printf(seq, ",background_gc=%s", "off");
}
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
......@@ -742,6 +755,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
int err, active_logs;
bool need_restart_gc = false;
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
sync_filesystem(sb);
......@@ -767,6 +781,14 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (f2fs_readonly(sb) && (*flags & MS_RDONLY))
goto skip;
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
f2fs_msg(sbi->sb, KERN_WARNING,
"switch extent_cache option is not allowed");
goto restore_opts;
}
/*
* We stop the GC thread if FS is mounted as RO
* or if background_gc = off is passed in mount
......@@ -996,6 +1018,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->cp_interval = DEF_CP_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
......@@ -1332,6 +1355,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_commit_super(sbi, true);
}
sbi->cp_expires = round_jiffies_up(jiffies);
return 0;
free_kobj:
......
......@@ -514,6 +514,34 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->ret)
);
TRACE_EVENT(f2fs_background_gc,
TP_PROTO(struct super_block *sb, long wait_ms,
unsigned int prefree, unsigned int free),
TP_ARGS(sb, wait_ms, prefree, free),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(long, wait_ms)
__field(unsigned int, prefree)
__field(unsigned int, free)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->wait_ms = wait_ms;
__entry->prefree = prefree;
__entry->free = free;
),
TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
show_dev(__entry),
__entry->wait_ms,
__entry->prefree,
__entry->free)
);
TRACE_EVENT(f2fs_get_victim,
TP_PROTO(struct super_block *sb, int type, int gc_type,
......@@ -1000,6 +1028,32 @@ TRACE_EVENT(f2fs_writepages,
__entry->for_sync)
);
TRACE_EVENT(f2fs_readpages,
TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage),
TP_ARGS(inode, page, nrpage),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(pgoff_t, start)
__field(unsigned int, nrpage)
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->start = page->index;
__entry->nrpage = nrpage;
),
TP_printk("dev = (%d,%d), ino = %lu, start = %lu nrpage = %u",
show_dev_ino(__entry),
(unsigned long)__entry->start,
__entry->nrpage)
);
TRACE_EVENT(f2fs_write_checkpoint,
TP_PROTO(struct super_block *sb, int reason, char *msg),
......@@ -1132,17 +1186,19 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
__entry->len)
);
TRACE_EVENT(f2fs_update_extent_tree,
TRACE_EVENT(f2fs_update_extent_tree_range,
TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr),
TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr,
unsigned int len),
TP_ARGS(inode, pgofs, blkaddr),
TP_ARGS(inode, pgofs, blkaddr, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(ino_t, ino)
__field(unsigned int, pgofs)
__field(u32, blk)
__field(unsigned int, len)
),
TP_fast_assign(
......@@ -1150,12 +1206,15 @@ TRACE_EVENT(f2fs_update_extent_tree,
__entry->ino = inode->i_ino;
__entry->pgofs = pgofs;
__entry->blk = blkaddr;
__entry->len = len;
),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, blkaddr = %u",
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
"blkaddr = %u, len = %u",
show_dev_ino(__entry),
__entry->pgofs,
__entry->blk)
__entry->blk,
__entry->len)
);
TRACE_EVENT(f2fs_shrink_extent_tree,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment